In [4]:
import pandas as pd
import numpy as np

def compute_ST_indicator(data, relation):
    
    if relation == 'negative':
        year_of_best = data[data.Value == data.Value.min()]['Year'].values[0]
    if relation == 'positive':
        year_of_best = data[data.Value == data.Value.max()]['Year'].values[0]

    df = data[data.Year == year_of_best].dropna(subset=['Value'])
    
    if relation == 'negative':
        ST = max(0,df.sort_values(by='Value').head(5).Value.mean())
    if relation == 'positive':
        ST = df.sort_values(by='Value', ascending=False).head(5).Value.mean()
    
    return ST

In [5]:
files = {'AB1': 'AB1_SDG.csv',
         'AB2': 'AB2_SDG.csv',
         'AB3': 'AB3_origin.M.csv',
         'BE1': 'BE1.csv',
         'BE2': 'BE2_WB.csv',
         'BE3': 'BE3_SDG.csv',
         'CV1': 'CV1_SDG.csv',
         'CV2': 'CV2_origin.M.csv',
         'CV3': 'CV3_WB.csv',
         'EE1': 'EE1_SDG.csv',
         'EE2': 'EE2_SDG.csv',
         'EE3': 'EE3_origin.M.csv',
         'EQ1': "EQ1_WB.csv",
         'EQ2': 'EQ2_origin.M.csv',
         'EQ3': 'EQ3_origin.M.csv',
         'EW1': 'EW1_SDG.csv',
         'EW2': 'EW2_SDG.csv',
         'EW3': 'EW3_SDG.csv',
         'GB1': 'GB1_SDG.csv',
         'GB2': 'GB2_SDG.csv',
         'GB3': 'GB3_origin.M.csv',
         'GE1': 'GE1.csv',
         'GE2': 'GE2.csv',
         'GE3': 'GE3.csv',
         'GJ1': 'GJ1_origin.M.csv',
         'GJ2': 'GJ2_SDG.csv',
         'GN1': 'GN1_origin.M.csv',
         'GN2': 'GN2_SDG.csv',
         'GT1': 'GT1_origin.M.csv',
         'GT2': 'GT2_origin.M.csv',
         'GV1': 'GV1_WB.csv',
         'GV2': 'GV2_SDG.csv',
         'ME1': 'ME1_origin.M.csv',
         'ME2': 'ME2_origin.M.csv',
         'ME3': 'ME3_origin.M.csv',
         'SE1': 'SE1.csv',
         'SE2': 'SE2_SDG.csv',
         'SE3': 'SE3_SDG.csv',
         'SL1': 'SL1_origin.M.csv',
         'SL2': 'SL2_origin.M.csv',
         'SL3': 'SL3_origin.M.csv',
         'SP1': 'SP1_SDG.csv',
         'SP2': 'SP2_SDG.csv',
         'SP3': 'SP3_SDG.csv'
         }


ST = pd.read_csv('data/sustainable_targets/ST_new.csv', index_col=0)

In [6]:
ST

Unnamed: 0,Number of targets,Relation,Target 1,Target 2
EE1,1,negative,0.928,
EE2,1,positive,,51.4
EE3,1,positive,,100.0
EW1,1,positive,,265.757935
EW2,2,negative,25.0,75.0
EW3,1,positive,,10.42671
SL1,1,negative,5.0,
SL2,1,positive,,11.9
SL3,1,negative,0.012,
ME1,1,negative,0.169685,


In [7]:
res = []
for indicator, file in files.items():
    data = pd.read_csv(f'data/indicator/{indicator}/processed/{file}')
    relation = ST.loc[indicator]['Relation']
    target = compute_ST_indicator(data[(data.Year > 2005)], relation)
    
    res.append([indicator, relation, target])

In [8]:
pd.DataFrame(res, columns=['Indicator', 'Relation', 'Target'])

Unnamed: 0,Indicator,Relation,Target
0,AB1,positive,100.0
1,AB2,negative,0.248472
2,AB3,positive,86.950497
3,BE1,positive,99.048117
4,BE2,positive,93.426404
5,BE3,positive,428.688
6,CV1,positive,0.99186
7,CV2,positive,100.0
8,CV3,positive,85.173508
9,EE1,negative,1.092


In [None]:
data = pd.read_csv('data/indicator/ME3/preprocessed/ME3_origin.M.csv')

In [None]:
compute_ST_indicator(data[(data.Year > 2005)], 'negative')

In [None]:
#data = pd.read_csv('data/full_data/data.csv')

In [None]:
#ST = pd.read_csv('data/sustainable_targets/ST_2020.csv', index_col=0)

In [None]:
compute_ST_indicator(data[(data.Year > 2005) & (data.Indicator == 'ME1')], 'negative')

In [None]:
ST.loc['BE3']

In [None]:
ST_new = ST.copy()
ST_to_compute = ['EE1', 'GE1', 'GE2', 'GE3', 'BE3', 'GV1', 'GT1', 'GJ1', 'GN1', 'SE1']

for indicator in ST_to_compute:
    df = data[(data.Year > 2005) & (data.Indicator == indicator)]
    relation = ST.loc[indicator].Relation
    print(indicator)
    indicator_st = compute_ST_indicator(df, relation)
    if relation == 'negative':
        ST_new.loc[indicator, 'Target 1'] = indicator_st
    else:
        ST_new.loc[indicator, 'Target 2'] = indicator_st

    print(indicator_st)

In [None]:
ST_new.to_csv('data/sustainable_targets/ST_2020.csv')

In [None]:
ST = pd.read_csv('data/sustainable_targets/ST_2020.csv', index_col=0)

In [None]:
ST.loc['SL1']

In [None]:
ST.loc['SL1', 'Relation'] = 'negative'
ST.loc['SL1', 'Target 1'] = 5
ST.loc['SL1', 'Target 2'] = np.nan

In [None]:
ST.loc['GE1', 'Target 1'] = 0.19
ST.loc['GE2', 'Target 1'] = 0.06
ST.loc['GE3', 'Target 1'] = 0.0

In [None]:
ST.to_csv('data/sustainable_targets/ST_2020.csv')

In [None]:
df = pd.read_csv('data/indicator/GE1/computed/GE1.csv')
df[df.Year == 2016].sort_values(by='Value').head(5).mean()

In [None]:
df = pd.read_csv('data/indicator/GE2/computed/GE2.csv')
df[df.Year == 2016].sort_values(by='Value').head(5).mean()

In [None]:
(0.050277 + 0.124163	 + 0.134547) / 5

In [None]:
df = pd.read_csv('data/indicator/GE3/computed/GE3.csv')
df[df.Year == 2016].sort_values(by='Value').head(5).mean()

In [None]:
ST

In [None]:
df[df.Year == 2016].sort_values(by='Value').head(15)

In [None]:
import pandas as pd
from index.utils import ISO_to_Everything, add_All_ISOs
data = pd.read_csv('data/full_data/result.csv')
with pd.ExcelWriter('data/results/timeseries.xlsx') as writer:
    for agg in ['Indicator_normed', 'Category', 'Dimension', 'Index']:
        df = data[data.Aggregation == agg]
        variables = df['Variable'].unique()
        for var in variables:
            print(var)
            df_formatted = df[df.Variable == var]
            df_formatted['Year'] = df_formatted['Year'].astype(int)
            df_formatted = df_formatted.pivot(index=['ISO', 'Country', 'Continent', 'UNregion', 'IncomeLevel', 'Region'], columns='Year', values='Value' )
            df_formatted = ISO_to_Everything(df_formatted)
            df_formatted = df_formatted[['Country', 'Continent',
                          'UNregion','IncomeLevel',
                          'Region'] + list(range(2005, 2021))]
            df_formatted.to_excel(writer, sheet_name=var)

In [None]:
for agg in ['Category', 'Dimension', 'Index']:
    df = data[data.Aggregation == agg]
    variables = df['Variable'].unique()
    for var in variables:
        df_formatted = df[df.Variable == var]
        df_formatted['Year'] = df_formatted['Year'].astype(int)
        df_formatted = df_formatted.pivot(index=['ISO', 'Country', 'Continent', 'UNregion', 'IncomeLevel', 'Region'], columns='Year', values='Value' )

In [None]:
import pandas as pd
data = pd.read_csv('data/full_data/result.csv')
data[(data.Year == 2019) & (data.Aggregation == "Index")].sort_values(['Continent','Value'],ascending=False).groupby('Continent').head(5)
#data[(data.Year == 2019) & (data.Variable == "SL1") & (data.Aggregation == 'Indicator_normed')].sort_values(['Continent','Value'],ascending=False).groupby('Continent').head(3)

In [None]:
data[(data.Year == 2019) & (data.Variable == "SP2")].sort_values(['Continent','Value'],ascending=False).groupby('Continent').head(5)


In [None]:

files = {'AB1': 'AB1_SDG.csv',
         'AB2': 'AB2_SDG.csv',
         'AB3': 'AB3.csv',
         'BE1': 'BE1.csv',
         'BE2': 'BE2_WB.csv',
         'BE3': 'BE3_SDG.csv',
         'CV1': 'CV1_SDG.csv',
         'CV2': 'CV2_origin.M.csv',
         'CV3': 'CV3_WB.csv',
         'EE1': 'EE1_SDG.csv',
         'EE2': 'EE2_SDG.csv',
         'EQ1': "EQ1_WB.csv",
         'EQ2': 'EQ2_origin.M.csv',
         'EQ3': 'EQ3_origin.M.csv',
         'EW1': 'EW1_SDG.csv',
         'EW2': 'EW2_SDG.csv',
         'GB1': 'GB1_SDG.csv',
         'GB2': 'GB2_SDG.csv',
         'GB3': 'GB3_origin.M.csv',
         'GE1': 'GE1.csv',
         'GE2': 'GE2.csv',
         'GE3': 'GE3.csv',
         'GJ1': 'GJ1_origin.M.csv',
         'GN1': 'GN1_origin.M.csv',
         'GT1': 'GT1_origin.M.csv',
         'GV1': 'GV1_WB.csv',
         'ME1': 'ME1_origin.M.csv',
         'ME2': 'ME2_origin.M.csv',
         'SE1': 'SE1.csv',
         'SE2': 'SE2_SDG.csv',
         'SE3': 'SE3_SDG.csv',
         'SL1': 'SL1_origin.M.csv',
         'SL2': 'SL2_origin.M.csv',
         'SP1': 'SP1_SDG.csv',
         'SP2': 'SP2_origin.M.csv',
         'SP3': 'SP3_SDG.csv'
         }


In [None]:
res = []
for indicator, file in files.items():
    df = pd.read_csv(f'data/indicator/{indicator}/processed/{file}')
    res.append([indicator, df.Description.unique()[0]])

In [None]:
pd.DataFrame(res, columns=['indicator', 'description']).to_csv('data/results/indicator_description.xls')

In [None]:
pd.read_csv('data/indicator/SP2/processed/SP2_SDG.csv').Value.min()

In [None]:
pd.read_csv('data/indicator/SP2/raw/SP2_GHD.M.csv')