In [9]:
import pandas as pd
import numpy as np

In [10]:
# This part is added to data/indicator/TMP/preprocess.py

def process_TMP():
    df = (
        pd.read_csv('data/indicator/TMP/raw/TMP_IEA.M.csv')
          .melt(id_vars=['Country', 'Mode/vehicle type', 'Indicator'], var_name=['Year'], value_name='Value')
          .rename(columns={'Mode/vehicle type': 'mode'})
          .assign(Indicator=lambda x: x.Indicator.str.strip())
          .query("mode == 'Total passenger transport' and Indicator == 'Passenger-kilometres energy intensity (MJ/pkm)'")
          .drop(columns=['mode', 'Indicator'])
          .dropna()
    )
    return df
    
    
config_TMP = {'Variable': 'TMP',
             'function': process_TMP,
             'Description': 'Total passenger transport Passenger-kilometres energy intensity (MJ/pkm)',
             'Source': 'IEA',
             'URL': 'https://www.iea.org/data-and-statistics/data-product/energy-efficiency-indicators'}

In [11]:
# For testing
test = process_TMP()

In [12]:
from tasks.download import download_indicator
from tasks.preprocess import preprocess_APIs_data_in_indicator, preprocess_MANUAL_data_in_indicator
from tasks.process import process_indicator

import pandas as pd

def indicator_pipeline(indicator, fresh_start=True):
    
    print(f'Downloading..')
    download_indicator(indicator, fresh_start)
    
    print('Preprocessing...')
    preprocess_MANUAL_data_in_indicator(indicator)
    preprocess_APIs_data_in_indicator(indicator)
    
    print('Processing...')
    process_indicator(indicator)
    
indicator_pipeline('TMP')    

Downloading..
Downloading TMP's data: DONE
Preprocessing...
PreProcessing TMP Manual files: Saving at data/indicator/TMP/preprocessed/TMP_origin.M.csv
Done
Processing...
Processing TMP: 
	 Processing TMP_origin.M.csv
	 Imputation:DONE
	 Outlier removal:DONE
	 Formatting:DONE
	 saving at data/indicator/TMP/processed/TMP_origin.M.csv


In [13]:
from index.IndexComputation.GreenGrowthIndex import GreenGrowthScaler

indicator = (pd.read_csv('data/indicator/TMP/processed/TMP_origin.M.csv')
               .query("Year == 2019")
               .set_index("ISO")[['Value']]
                .rename(columns={"Value": 'TMP'})
            ) # Data Frame with indicators here there is just "TMP", must be for a single year ! 

ST = pd.DataFrame({"Indicator": ['TMP'], "Number of targets": 1, "Relation": 'negative', 'Target 1': 1.104, 'Target 2': np.nan}).set_index('Indicator')
# ST stands for sustainable target:
# - Number of targets is almost always 1 so leave it as 1 by default
# - Relation is wether high value is good/bad for the environnment. Here, lower energy intensity is better so the relation is negative
# - Target 1 and 2 are for the value of the target. To compute the target take the average of the 5 best countries

In [14]:
Normalized_Indicator = GreenGrowthScaler().normalize(indicator, ST) # Call this and that's it.
Normalized_Indicator

Unnamed: 0_level_0,TMP
ISO,Unnamed: 1_level_1
AUS,1.0
AUT,33.413984
BEL,58.685905
BRA,97.143174
CAN,40.006659
CHE,75.167592
CZE,60.883463
DEU,53.192009
DNK,43.302997
ESP,38.90788


In [15]:
Normalized_Indicator.to_csv('data/indicator/TMP/processed/TMP_Normalized_origin.M.csv') # You can share this

In [16]:
pd.read_csv('data/sustainable_targets/ST_2020.csv', index_col=0) # Just so you can see what it looks like for other indicators

Unnamed: 0,Number of targets,Relation,Target 1,Target 2
EE1,1,negative,0.928,
EE2,1,positive,,51.4
EW1,1,positive,,265.757935
EW2,2,negative,25.0,75.0
SL1,1,negative,5.0,
SL2,1,positive,,11.9
ME1,1,negative,0.169685,
ME2,1,negative,5.0,
EQ1,1,negative,10.0,
EQ2,1,negative,0.0,


#

In [231]:
import pandas as pd
import numpy as np
from index.IndexComputation.GreenGrowthIndex import GreenGrowthScaler
from sklearn.preprocessing import MinMaxScaler

def process():
    df_1 = pd.read_csv('data/indicator/GS3/raw/Rural access index.csv').dropna(axis=1).rename(columns={'Value': 'Rural access index'})
    df_2 = pd.read_csv('data/indicator/GS3/raw/Percentage of female workers in transport.csv').dropna(axis=1).rename(columns={'Value': 'Percentage female workers in transport'})
    df_3 = pd.read_csv('data/indicator/GS3/raw/Rapid Transit to Resident Ratio.csv').dropna(axis=1).rename(columns={'Value': 'Rapid Transit to Resident Ratio'})

    
    df = pd.concat([df_1.set_index('Country'), df_2.set_index('Country'), df_3.set_index('Country')], axis=1).drop(columns=['Year']).dropna()
    
    
    ST = pd.DataFrame({"Indicator": ['Percentage female workers in transport', 'Rapid Transit to Resident Ratio'],
                   "Number of targets": [1, 1],
                   "Relation": ['positive', 'positive'],
                   'Target 1': [28, 79.34],
                   'Target 2': [np.nan, np.nan]}).set_index('Indicator')

    #df_norm = GreenGrowthScaler().normalize(df[['Percentage female workers in transport', 'Rapid Transit to Resident Ratio']], ST) # Call this and that's it.
    #df_norm['Rural access index'] = df['Rural access index']
    
    df_norm = pd.DataFrame(MinMaxScaler(feature_range=(0, 100)).fit_transform(df), columns=df.columns, index=df.index)
    return df_norm

In [239]:
df = process().mean(axis=1).to_frame(name='Universal Acess')
df = add_ISO(df.reset_index())
ISO_to_Everything(df)[['Country', 'Universal Acess']].to_csv('data/indicator/GS3/processed/normalized_universal_access_index.csv')

In [227]:
import pandas as pd
import numpy as np
from index.IndexComputation.GreenGrowthIndex import GreenGrowthScaler
from sklearn.preprocessing import MinMaxScaler
from processing.utils import add_ISO
from index.utils import ISO_to_Everything

def process():
    df = pd.read_csv('data/indicator/GS3/raw/Logistics performance index.csv').dropna(axis=1).rename(columns={'Value': 'Logistics performance index'}).set_index('Country')

    
    
    
    ST = pd.DataFrame({"Indicator": ['Logistics performance index'],
                   "Number of targets": [1],
                   "Relation": ['positive'],
                   'Target 1': [4.069],
                   'Target 2': [np.nan]}).set_index('Indicator')

    #df_norm = GreenGrowthScaler().normalize(df[['Logistics performance index']], ST) # Call this and that's it.
    df_norm = pd.DataFrame(MinMaxScaler(feature_range=(1, 100)).fit_transform(df), columns=df.columns, index=df.index)

    return df_norm

In [228]:
df = process()
df = add_ISO(df['Logistics performance index'].reset_index())

In [229]:
ISO_to_Everything(df)[['Country', 'Logistics performance index']].to_csv('data/indicator/GS3/processed/normalized_Logistics_performance_index.csv')

In [241]:
pd.read_csv('data/indicator/GS3/processed/normalized_Logistics_performance_index.csv')

Unnamed: 0,ISO,Country,Logistics performance index
0,AFG,Afghanistan,1.000000
1,ALA,Aland Islands,
2,ALB,Albania,32.256217
3,DZA,Algeria,22.936501
4,ASM,American Samoa,
...,...,...,...
245,WLF,Wallis and Futuna Islands,
246,ESH,Western Sahara,
247,YEM,Yemen,14.891652
248,ZMB,Zambia,26.365453


In [243]:
pd.read_csv('data/indicator/GS3/processed/normalized_universal_access_index.csv')

Unnamed: 0,ISO,Country,Universal Acess
0,AFG,Afghanistan,19.583333
1,ALA,Aland Islands,
2,ALB,Albania,
3,DZA,Algeria,31.582788
4,ASM,American Samoa,
...,...,...,...
245,WLF,Wallis and Futuna Islands,
246,ESH,Western Sahara,
247,YEM,Yemen,
248,ZMB,Zambia,8.548851
