In [2]:
import pandas as pd
import numpy as np
from tasks.pipeline import indicator_pipeline

from index.IndexComputation.GreenGrowthIndex import GreenGrowthScaler, GreenGrowthPreProcessor
from index.utils import ISO_to_Country, ISO_to_Everything
from processing.utils import add_ISO

# Indicator computation




In [9]:
indicator_1 = pd.read_csv('data/indicator/EQ3//preprocessed/EQ3_origin.M.csv')
indicator_2 = pd.read_csv('data/indicator/GB3/preprocessed/GB3_origin.M.csv')

In [10]:
indicator_1.Value.describe()

count    215.000000
mean       0.376462
std        0.259919
min        0.037370
25%        0.169594
50%        0.319375
75%        0.504119
max        1.384898
Name: Value, dtype: float64

In [11]:
indicator_2.Value.describe()

count    9690.000000
mean       46.473168
std        31.079331
min         0.000000
25%        25.000000
50%        50.000000
75%        75.000000
max       100.000000
Name: Value, dtype: float64

# Notes

Different averages and standard deviation, different units ! Everything is different.

How do we combine them ? 

-> Normalization !



Indicator X: min(X) = 0.036, max(X) = 1.38

We want to transform X such that the min(X) = 1 and the max(X) = 100 (Normalization step !)

How to normalize ? 

-> Using "Benchmark" or **Targets** (sustainability)

-> Define the **Relation**, positive or negative.  (good or bad for environment)


Based on those two things, we normalize 


For positive: 

X_norm = (X - min(X)) / (X_target - X.min())* 100 


How do we define the target ? 


->  Two options: 

    -> Using set **SDG goal** if available
    -> Creating the target ourself, to do so, we just look at the five best and average (for most indicators, empirical ! )
    
We then have normalized indicator
    
    
    
Detail:

Sometimes indicator has **outliers**, in this case, before normalizing we cap it. 


In [23]:
ST = pd.read_csv('data/sustainable_targets/ST_new.csv', index_col=0).loc[['EQ3']]

In [36]:
# Should be added to the Prep
def cap_and_norm(indicators, ST, indicator_to_cap):
    
    preprocessor = GreenGrowthPreProcessor() # Initialize the preprocessor
    statistics = preprocessor.compute_statistics(indicators) # Compute statistics
    indicators_fenced = preprocessor.cap_indicators(indicators, statistics, indicator_to_cap) # Cap indicator
    indicators_normed = GreenGrowthScaler().normalize(indicators_fenced, ST) # Normed indicator
    
    return indicators_normed, statistics

In [28]:
df = indicator_1[['ISO', 'Value']].rename(columns={'Value': 'EQ3'})

In [37]:
df_normed, statistics = cap_and_norm(df, ST, [])

In [38]:
statistics

Unnamed: 0,75%,25%,max,min,lower fence,upper fence
EQ3,0.504119,0.169594,1.384898,0.03737,-0.833982,1.507695
ISO,,,ZWE,ABW,,


In [35]:
df_normed.EQ3.describe()

count    215.000000
mean      73.160493
std       18.598992
min        1.000000
25%       64.025782
50%       77.245497
75%       87.963377
max       97.424895
Name: EQ3, dtype: float64

In [32]:
df.EQ3.describe()

count    215.000000
mean       0.376462
std        0.259919
min        0.037370
25%        0.169594
50%        0.319375
75%        0.504119
max        1.384898
Name: EQ3, dtype: float64

In [43]:
df = pd.read_csv('waste_loss_per_capita.csv')
ST = pd.DataFrame({"Indicator": ['loss_capita', 'waste_capita'], "Number of targets": [1, 1], "Relation": ['negative','negative'], 'Target 1': [0, 0], 'Target 2': [np.nan,np.nan]}).set_index('Indicator')
df[['normed_waste_capita', 'normed_loss_capita']] = GreenGrowthScaler().normalize(df[['ISO', 'waste_capita', 'loss_capita']], ST)[['waste_capita', 'loss_capita']]
df.to_csv('waste_loss_per_capita_normed.csv')

In [114]:
indicators[indicators['Share of world fishing export'] > 0.000027].sort_values(by='Share of world fishing export').head(5).mean()

Share of world fishing export    0.00005
dtype: float64

In [122]:

indicators = pd.read_csv('fish_export_share.csv').set_index('ISO')[['Share of world fishing export']]

ST = pd.DataFrame({"Indicator": ['Share of world fishing export'], "Number of targets": [1], "Relation": ['negative'], 'Target 1': [0.000027], 'Target 2': [np.nan]}).set_index('Indicator')

test = cap_and_norm(indicators, ST, ['Share of world fishing export'])
indicators['Share of world fishing export_normed'] = test['Share of world fishing export']

In [123]:
indicators.to_csv('fish_export_share_normed.csv')

In [2]:
def process():
    df = (
        pd.read_csv('data/indicator/GT2/raw/GT2_FAO.M.csv')
          .query('Year == 2018 and Element == "Export Quantity" and Area not in ["China, mainland"]')
          .rename(columns={'Area': 'Country'})
    )
    df['Share of world fishing export'] = df['Value'] / df['Value'].sum() * 100
    
    df = add_ISO(df).drop(columns=['Country'])
    df['Country'] = ISO_to_Country(df['ISO'])
    return df
df = process()

In [None]:
ST = pd.DataFrame({"Indicator": ['Share of world fishing export'], "Number of targets": [1, 1], "Relation": ['negative','negative'], 'Target 1': [0, 0], 'Target 2': [np.nan,np.nan]}).set_index('Indicator')


Unnamed: 0,ISO,Country,Year,pop,loss,waste,loss_capita,waste_capita
0,AFG,Afghanistan,2018,37171922.0,957899,4.755918e+06,0.025769,0.127944
1,ALB,Albania,2018,2866376.0,344527,3.632010e+05,0.120196,0.126711
2,DZA,Algeria,2018,42228415.0,3855367,5.782224e+06,0.091298,0.136927
3,AGO,Angola,2018,30809787.0,3068556,4.547186e+06,0.099597,0.147589
4,ATG,Antigua and Barbuda,2018,96282.0,1382,1.090457e+04,0.014354,0.113257
...,...,...,...,...,...,...,...,...
169,VEN,Venezuela,2018,28887117.0,1161376,3.299862e+06,0.040204,0.114233
170,VNM,Vietnam,2018,95545959.0,7514213,1.152240e+07,0.078645,0.120595
171,YEM,Yemen,2018,28498683.0,308491,4.289316e+06,0.010825,0.150509
172,ZMB,Zambia,2018,17351714.0,525880,2.164902e+06,0.030307,0.124766


In [5]:
df[["ISO", 'Country', 'Year', 'Share of world fishing export']].sort_values(by='Share of world fishing export')#.to_csv('fish_export_share.csv', index=False)

Unnamed: 0,ISO,Country,Year,Share of world fishing export
155,TKM,Turkmenistan,2018,0.000000
150,TLS,Timor-Leste,2018,0.000000
43,DJI,Djibouti,2018,0.000000
131,LCA,St. Lucia,2018,0.000000
16,BOL,Bolivia,2018,0.000000
...,...,...,...,...
128,RUS,Russia,2018,4.506584
116,NOR,Norway,2018,5.157427
166,VNM,Vietnam,2018,6.252486
121,PER,Peru,2018,8.019984


In [151]:
df[["ISO", 'Country', 'Year', 'Share of world fishing export']].sort_values(by='Share of world fishing export').tail(20)

Unnamed: 0,ISO,Country,Year,Share of world fishing export
104,MAR,Morocco,2018,1.290308
25,CAN,Canada,2018,1.410008
147,SWE,Sweden,2018,1.59813
159,GBR,United Kingdom,2018,1.75984
46,ECU,Ecuador,2018,1.874341
70,ISL,Iceland,2018,1.901101
72,IDN,Indonesia,2018,1.95836
59,DEU,Germany,2018,2.460583
143,ESP,Spain,2018,2.601031
71,IND,India,2018,2.901892


In [62]:
def preprocess_loss():
    df = (
        pd.read_csv('data/indicator/ME3/raw/ME3_FAO.M.csv')
          .groupby(['Area', 'Year', 'Element'])['Value'].sum().reset_index()
          .pivot(index=['Area', 'Year'], columns='Element', values='Value')
          .reset_index()
          .rename(columns={'Area': 'Country', 'Loss': 'Value'})
    )
    
    df = add_ISO(df)
    return df[['ISO', 'Year', 'Value']]


def preprocess_waste():    
    waste = (
        pd.read_csv('data/indicator/ME3/raw/ME3.1_SDG.M.csv')
          .query("GeoAreaName not in ['Southern Africa']")
          .groupby(['GeoAreaName', 'TimePeriod'])['Value']
          .sum().reset_index()
          .rename(columns={'GeoAreaName': 'Country', 'TimePeriod': 'Year'})
          .drop(columns=['Year'])
    
    )
    waste = add_ISO(waste)
    return waste[['ISO', 'Value']]

def preprocess_pop():
    pop = (
        pd.read_csv('data/indicator/ME3/raw/ME3.3_WB.M.csv')
          .drop(columns=['Country Name', 'Indicator Code', 'Indicator Name'])
          .rename(columns={'Country Code': 'ISO'})
          .melt(id_vars=['ISO'], var_name='Year', value_name='Value')
          .astype({'Year': int})
    )
    return pop.query('Year > 2013')
loss = preprocess_loss()

waste = preprocess_waste()
pop = preprocess_pop()

In [63]:
loss_pop = pd.merge(loss, pop, on=['ISO', 'Year'], suffixes=('_loss', '_pop'))
waste_pop = pd.merge(waste, pop, on=['ISO'], suffixes=('_waste', '_pop'))

In [66]:
loss_pop['loss_capita'] = loss_pop["Value_loss"] / loss_pop['Value_pop']
waste_pop['waste_capita'] = waste_pop['Value_waste'] / waste_pop['Value_pop']


In [93]:
df = waste_pop.merge(loss_pop.drop(columns=['Value_pop']), on=['Year', 'ISO']).query('Year == 2018').rename(columns={"Value_waste": 'waste', 'Value_pop': 'pop', 'Value_loss': 'loss'})

df['Country'] = ISO_to_Country(df['ISO'])

In [96]:
df[['ISO', 'Country', 'Year', 'pop', 'loss', 'waste', 'loss_capita', 'waste_capita']].to_csv('waste_loss_per_capita.csv', index=False)

In [105]:
df.sort_values(by='waste_capita').tail(30)

Unnamed: 0,ISO,waste,Year,pop,waste_capita,loss,loss_capita,Country
694,SEN,2328423.0,2018,15854324.0,0.146864,460153,0.029024,Senegal
19,AGO,4547186.0,2018,30809787.0,0.147589,3068556,0.099597,Angola
449,LBN,1014263.0,2018,6859408.0,0.147864,234649,0.034208,Lebanon
139,CAF,692716.8,2018,4666375.0,0.148449,148022,0.031721,Central African Republic
709,SLE,1140592.0,2018,7650149.0,0.149094,486689,0.063618,Sierra Leone
779,TGO,1179890.0,2018,7889095.0,0.14956,358061,0.045387,Togo
459,LBR,720774.7,2018,4818976.0,0.14957,169982,0.035273,Liberia
334,GNB,280418.0,2018,1874304.0,0.149612,79707,0.042526,Guinea-Bissau
479,MWI,2719467.0,2018,18143215.0,0.149889,2055064,0.113269,Malawi
474,MDG,3937049.0,2018,26262313.0,0.149913,1125473,0.042855,Madagascar


In [74]:
waste_pop.sort_values(by="waste_capita", ascending=False).query("Year == 2018").dropna().head(50)

Unnamed: 0,ISO,Value_waste,Year,Value_pop,waste_capita
879,FSM,68521.28,2018,112640.0,0.608321
823,MYS,8301313.0,2018,31528033.0,0.263299
991,NGA,46640840.0,2018,195874685.0,0.238116
1131,RWA,2622007.0,2018,12301969.0,0.213137
1110,MDA,482446.1,2018,2708214.0,0.178142
102,BHR,279153.2,2018,1569440.0,0.177868
676,ISR,1520144.0,2018,8882800.0,0.171133
550,GRC,1829139.0,2018,10732882.0,0.170424
655,IRQ,6436089.0,2018,38433604.0,0.16746
1439,TZA,9418609.0,2018,56313444.0,0.167253


In [8]:
#indicator_pipeline('ME3')

In [None]:

def preprocess_loss():
    df = (
        pd.read_csv('data/indicator/ME3/raw/ME3_FAO.M.csv')
          .groupby(['Area', 'Year', 'Element'])['Value'].sum().reset_index()
          .pivot(index=['Area', 'Year'], columns='Element', values='Value')
    )
    
    df['Value'] = df['Loss'] / df['Production'] * 100
    df = df['Value'].reset_index().rename(columns={'Area': 'Country'})
    
    df = add_ISO(df)
    return df


def preprocess_waste():
    cons = (
        pd.read_csv('data/indicator/ME3/raw/ME3.0_FAO.M.csv')
          .groupby(['Area', 'Year'])['Value']
          .sum().to_frame(name='Value')     
          .reset_index()
          .rename(columns={'Area': 'Country'})
    )
    cons = add_ISO(cons)
    
    waste = (
        pd.read_csv('data/indicator/ME3/raw/ME3.1_SDG.csv').groupby(['GeoAreaName', 'TimePeriod'])['Value']
          .sum().reset_index()
          .rename(columns={'GeoAreaName': 'Country', 'TimePeriod': 'Year'})
          .drop(columns=['Year'])
    
    )
    waste = add_ISO(waste)
    
    df = pd.merge(cons, waste, on=['ISO'], suffixes=('_cons', '_waste'))
    
    df['Value'] = df['Value_waste'] / df['Value_cons'] * 100
    return df[['ISO', 'Year', 'Value']]




def preprocess_2():
    df_waste = preprocess_waste()
    df_loss = preprocess_loss()
    
    df = df_loss.merge(df_waste, on=['ISO', 'Year'], suffixes=('_loss_to_production', '_waste_to_consumption'))
    
    #df['Value'] = df[['Value_loss_to_production', 'Value_waste_to_consumption']].mean(axis=1)
    
    
    ST = pd.DataFrame({"Indicator": ['Value_loss_to_production', 'Value_waste_to_consumption'], "Number of targets": [1, 1], "Relation": ['negative','negative'], 'Target 1': [0, 0], 'Target 2': [np.nan,np.nan]}).set_index('Indicator')
    #df.rename(columns={"Value": 'ME3'}).groupby(['Year']).apply(lambda x: GreenGrowthScaler().normalize(x[['ME3', 'ISO']], ST)).reset_index().drop(columns=['level_1']).rename(columns='')
    
    df =  df.groupby(['Year']).apply(lambda x: GreenGrowthScaler().normalize(x, ST))#.reset_index().drop(columns=['level_1'])#.rename(columns='')
    
    df['Value'] = df[['Value_loss_to_production', 'Value_waste_to_consumption']].mean(axis=1)
    return df[['ISO', 'Year','Value']].reset_index(drop=True)


def preprocess_1():
    df_waste = preprocess_waste()
    df_loss = preprocess_loss()
    
    df = df_loss.merge(df_waste, on=['ISO', 'Year'], suffixes=('_loss_to_production', '_waste_to_consumption'))
    
    df['Value'] = df[['Value_loss_to_production', 'Value_waste_to_consumption']].mean(axis=1)
    
    
    ST = pd.DataFrame({"Indicator": ['Value'], "Number of targets": [1], "Relation": ['negative'], 'Target 1': [0], 'Target 2': [np.nan]}).set_index('Indicator')
    
    df =  df.groupby(['Year']).apply(lambda x: GreenGrowthScaler().normalize(x, ST))#.reset_index().drop(columns=['level_1'])#.rename(columns='')
    
    return df[['ISO', 'Year','Value']].reset_index(drop=True)
    
    
test_1 = preprocess_1()
test_2 = preprocess_2()

In [None]:
test = pd.merge(test_1, test_2, on=['ISO', 'Year'], suffixes=('_method_1', '_method_2'))

In [None]:
import seaborn as sns
import plotly.express as px

px.scatter(test.query("Year == 2018"), x='Value_method_1', y='Value_method_2', hover_data=['ISO'])
#sns.scatterplot(data=test.query("Year == 2018"), x='Value_1', y='Value_2')

In [None]:
test.query('Year == 2018').drop(columns=['Year']).to_csv("ME3_aggregation.csv", index=False)

In [None]:
test['Value'].describe()

In [None]:
test.query("Year == 2018").sort_values(by='Value', ascending=False).head(10)

In [None]:
indicator_pipeline('GN2')

In [None]:
pd.read_csv('data/indicator/GN2/processed/GN2_origin.M.csv').query('ISO == "AFG"')

In [None]:
pd.read_csv('data/indicator/GN2/processed/GN2_SDG.csv').query('ISO == "AFG"')

In [None]:
def preprocess():
    
    df = (
        pd.read_csv('data/indicator/GN2/raw/GN2_IRENA.M.csv', header=5)
          .rename(columns={'RE or Non-RE': 'Type', 'ISO Code': 'ISO'})
          .query('Type == "Total Renewable"')
    )
    
    df['Value'] = df['Electricity Installed Capacity (MW)'].replace(',','', regex=True).astype(float)
    df['Year'] = df['Year'].astype(int)
    df = df.groupby(['ISO', 'Year'])['Value'].sum().reset_index()
    
    pop = (
        pd.read_csv('data/indicator/GN2/raw/GN2.0_WB.M.csv')
          .drop(columns=['Country Name', 'Indicator Code', 'Indicator Name'])
          .rename(columns={'Country Code': 'ISO'})
          .melt(id_vars=['ISO'], var_name='Year', value_name='Value')
    )
    pop['Year'] = pop['Year'].astype(int)

    df = pd.merge(df, pop, on=['ISO', 'Year'], suffixes=('_capa', '_pop'))
    
    df['Value'] = df['Value_capa'] / df['Value_pop'] * 1e6
    return df


df = preprocess()
    

In [None]:
df.query('ISO == "SWE"')

In [None]:
pd.read_csv('data/indicator/GN2/raw/GN2.0_WB.M.csv').drop(columns=['Country Name', 'Indicator Code', 'Indicator Name']).rename(columns={'Country Code': 'ISO'}).melt(id_vars=['ISO'], var_name='Year', value_name='Value')

In [None]:
df

In [None]:
from processing.utils import add_ISO


def preprocess_loss():
    df = (
        pd.read_csv('data/indicator/ME3/raw/ME3_FAO.M.csv')
          .groupby(['Area', 'Year', 'Element'])['Value'].sum().reset_index()
          .pivot(index=['Area', 'Year'], columns='Element', values='Value')
    )
    
    df['Value'] = df['Loss'] / df['Production'] * 100
    df = df['Value'].reset_index().rename(columns={'Area': 'Country'})
    
    df = add_ISO(df)
    return df


def preprocess_waste():
    cons = (
        pd.read_csv('data/indicator/ME3/raw/ME3.0_FAO.M.csv')
          .groupby(['Area', 'Year'])['Value']
          .sum().to_frame(name='Value')     
          .reset_index()
          .rename(columns={'Area': 'Country'})
    )
    cons = add_ISO(cons)
    
    waste = (
        pd.read_csv('data/indicator/ME3/raw/ME3.1_SDG.csv').groupby(['GeoAreaName', 'TimePeriod'])['Value']
          .sum().reset_index()
          .rename(columns={'GeoAreaName': 'Country', 'TimePeriod': 'Year'})
          .drop(columns=['Year'])
    
    )
    waste = add_ISO(waste)
    
    df = pd.merge(cons, waste, on=['ISO'], suffixes=('_cons', '_waste'))
    
    df['Value'] = df['Value_waste'] / df['Value_cons'] * 100
    return df[['ISO', 'Year', 'Value']]


def preprocess():
    df_waste = preprocess_waste()
    df_loss = preprocess_loss()
    
    df = df_loss.merge(df_waste, on=['ISO', 'Year'], suffixes=('_loss_to_production', '_waste_to_consumption'))
    
    df['Value'] = df[['Value_loss_to_production', 'Value_waste_to_consumption']].mean(axis=1)
    
    
    #ST = pd.DataFrame({"Indicator": ['ME3'], "Number of targets": 1, "Relation": 'negative', 'Target 1': 0, 'Target 2': np.nan}).set_index('Indicator')
    #df.rename(columns={"Value": 'ME3'}).groupby(['Year']).apply(lambda x: GreenGrowthScaler().normalize(x[['ME3', 'ISO']], ST)).reset_index().drop(columns=['level_1']).rename(columns='')
    
    return df[['ISO', 'Year', 'Value']]
    

In [None]:
df = preprocess()

In [None]:
#Normalized_Indicator = GreenGrowthScaler().normalize(indicator, ST)
df

In [None]:
ST

In [None]:
df = preprocess()

In [None]:
df_waste = preprocess_waste()
df_loss = preprocess_loss()

In [None]:
df_loss

In [None]:
df_loss.merge(df_waste, on=['ISO', 'Year'], suffixes=('_loss_to_production', '_waste_to_consumption')).to_csv('food_waste_loss.csv')

In [None]:
test = df.sort_values(by=['Value'], ascending=False).query('Year == 2018').head(50)[['ISO', 'Year', 'Value']]

In [None]:
ISO_to_Everything(df).sort_values(by=['Value'], ascending=False).query('Year == 2018').head(50)[['Country', 'Year', 'Value']]

In [None]:
pd.read_csv('data/indicator/ME3/raw/ME3.1_SDG.csv').query('GeoAreaName == "Australia"')

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

def preprocess():
    df = (
        pd.read_excel('data/indicator/GV2/raw/GV2_IRENA.M.xlsx')
          #.rename(columns={'ISO-code': 'ISO'})
          #.groupby(['ISO', 'Year'])['Amount (2019 USD million)'].sum()
    )
    return df#.to_frame('Value').reset_index()
df = preprocess()

In [None]:
df#.query('ISO == "FRA"')

# Food security

In [None]:
indicator_pipeline('GS1')

In [None]:
data = pd.read_csv('data/indicator/GS1/processed/GS1_SDG.csv')
ST = pd.DataFrame({"Indicator": ['GS1'], "Number of targets": 1, "Relation": 'negative', 'Target 1': 0, 'Target 2': np.nan}).set_index('Indicator')
df = (
    data.groupby('Year')
        .apply(lambda x: GreenGrowthScaler().normalize(x.rename(columns={'Value': 'GS1'}).set_index('ISO')[['GS1']], ST)).reset_index()
        .pivot(index=['ISO'], columns=['Year'], values='GS1')
        .dropna(axis=1, how='all')
        .reset_index()
)
#ISO_to_Everything(df)#.to_csv('data/indicator/GS1/processed/food_insecurity_ts.csv')

In [None]:
# # This part is added to data/indicator/TMP/preprocess.py

# def process_TMP():
#     df = (
#         pd.read_csv('data/indicator/TMP/raw/TMP_IEA.M.csv')
#           .melt(id_vars=['Country', 'Mode/vehicle type', 'Indicator'], var_name=['Year'], value_name='Value')
#           .rename(columns={'Mode/vehicle type': 'mode'})
#           .assign(Indicator=lambda x: x.Indicator.str.strip())
#           .query("mode == 'Total passenger transport' and Indicator == 'Passenger-kilometres energy intensity (MJ/pkm)'")
#           .drop(columns=['mode', 'Indicator'])
#           .dropna()
#     )
#     return df
    
    
# config_TMP = {'Variable': 'TMP',
#              'function': process_TMP,
#              'Description': 'Total passenger transport Passenger-kilometres energy intensity (MJ/pkm)',
#              'Source': 'IEA',
#              'URL': 'https://www.iea.org/data-and-statistics/data-product/energy-efficiency-indicators'}

In [None]:
from index.IndexComputation.GreenGrowthIndex import GreenGrowthScaler

indicator = (pd.read_csv('data/indicator/TMP/processed/TMP_origin.M.csv')
               .query("Year == 2019")
               .set_index("ISO")[['Value']]
                .rename(columns={"Value": 'TMP'})
            ) # Data Frame with indicators here there is just "TMP", must be for a single year ! 

ST = pd.DataFrame({"Indicator": ['TMP'], "Number of targets": 1, "Relation": 'negative', 'Target 1': 1.104, 'Target 2': np.nan}).set_index('Indicator')
Normalized_Indicator = GreenGrowthScaler().normalize(indicator, ST) # Call this and that's it.

# ST stands for sustainable target:
# - Number of targets is almost always 1 so leave it as 1 by default
# - Relation is wether high value is good/bad for the environnment. Here, lower energy intensity is better so the relation is negative
# - Target 1 and 2 are for the value of the target. To compute the target take the average of the 5 best countries

In [None]:
Normalized_Indicator = GreenGrowthScaler().normalize(indicator, ST) # Call this and that's it.
Normalized_Indicator

In [None]:
Normalized_Indicator.to_csv('data/indicator/TMP/processed/TMP_Normalized_origin.M.csv') # You can share this

In [None]:
pd.read_csv('data/sustainable_targets/ST_2020.csv', index_col=0) # Just so you can see what it looks like for other indicators

#  Universal acess

In [None]:
import pandas as pd
import numpy as np
from index.IndexComputation.GreenGrowthIndex import GreenGrowthScaler
from sklearn.preprocessing import MinMaxScaler

def process():
    df_1 = pd.read_csv('data/indicator/GS3/raw/Rural access index.csv').dropna(axis=1).rename(columns={'Value': 'Rural access index'})
    df_2 = pd.read_csv('data/indicator/GS3/raw/Percentage of female workers in transport.csv').dropna(axis=1).rename(columns={'Value': 'Percentage female workers in transport'})
    df_3 = pd.read_csv('data/indicator/GS3/raw/Rapid Transit to Resident Ratio.csv').dropna(axis=1).rename(columns={'Value': 'Rapid Transit to Resident Ratio'})

    
    df = pd.concat([df_1.set_index(['Country']), df_2.set_index(['Country']), df_3.set_index(['Country'])], axis=1).drop(columns=['Year']).dropna()

    
    df_norm = pd.DataFrame(MinMaxScaler(feature_range=(1, 100)).fit_transform(df), columns=df.columns, index=df.index)
    return df_norm.mean(axis=1).to_frame(name='Value').assign(Year=2020).reset_index()

In [None]:
df = process()
#df = add_ISO(df.reset_index())
#ISO_to_Everything(df)[['Country', 'Universal Acess']].to_csv('data/indicator/GS3/processed/normalized_universal_access_index.csv')

In [None]:
df

In [None]:
import pandas as pd
import numpy as np
from index.IndexComputation.GreenGrowthIndex import GreenGrowthScaler
from sklearn.preprocessing import MinMaxScaler
from processing.utils import add_ISO
from index.utils import ISO_to_Everything

def process():
    df = pd.read_csv('data/indicator/GS3/raw/Logistics performance index.csv').dropna(axis=1).rename(columns={'Value': 'Logistics performance index'}).set_index('Country')

    
    
    
    ST = pd.DataFrame({"Indicator": ['Logistics performance index'],
                   "Number of targets": [1],
                   "Relation": ['positive'],
                   'Target 1': [4.069],
                   'Target 2': [np.nan]}).set_index('Indicator')

    #df_norm = GreenGrowthScaler().normalize(df[['Logistics performance index']], ST) # Call this and that's it.
    df_norm = pd.DataFrame(MinMaxScaler(feature_range=(1, 100)).fit_transform(df), columns=df.columns, index=df.index)

    return df_norm

In [None]:
df = process()
df = add_ISO(df['Logistics performance index'].reset_index())

In [None]:
ISO_to_Everything(df)[['Country', 'Logistics performance index']].to_csv('data/indicator/GS3/processed/normalized_Logistics_performance_index.csv')

In [None]:
pd.read_csv('data/indicator/GS3/processed/normalized_Logistics_performance_index.csv')

In [None]:
pd.read_csv('data/indicator/GS3/processed/normalized_universal_access_index.csv')