In [1]:
import pandas as pd
import numpy as np

In [2]:
def process_EST():
    df=pd.read_csv('data/indicator/EST/raw/EST_SUM4ALL.M.csv')
    df = df.drop(columns=['Delta', 'Time Series'])
    df=df.dropna()
    return df

config_EST ={'Variable': 'EST',
             'function': process_EST,
             'Description': 'Efficiency in sustainable transport',
             'Source': 'sum4all',
             'URL': 'https://www.sum4all.org/gra-tool/country-performance/indicators'}





In [3]:
# For testing
test = process_EST()
test

Unnamed: 0,Country,Year,Value
0,Afghanistan,2018,1.949
1,Albania,2018,2.660
2,Algeria,2018,2.448
3,Angola,2018,2.046
4,Argentina,2018,2.887
...,...,...,...
160,"Venezuela, RB",2018,2.229
161,Vietnam,2018,3.274
162,"Yemen, Rep.",2018,2.265
163,Zambia,2018,2.526


In [4]:
from tasks.download import download_indicator
from tasks.preprocess import preprocess_APIs_data_in_indicator, preprocess_MANUAL_data_in_indicator
from tasks.process import process_indicator

import pandas as pd

def indicator_pipeline(indicator, fresh_start=True):
    
    print(f'Downloading..')
    download_indicator(indicator, fresh_start)
    
    print('Preprocessing...')
    preprocess_MANUAL_data_in_indicator(indicator)
    preprocess_APIs_data_in_indicator(indicator)
    
    print('Processing...')
    process_indicator(indicator)

In [5]:
from index.IndexComputation.GreenGrowthIndex import GreenGrowthScaler

indicator = (pd.read_csv('data/indicator/EST/processed/EST_origin.M.csv')
               .query("Year == 2019")
               .set_index("ISO")[['Value']]
                .rename(columns={"Value": 'EST'})
            ) # Data Frame with indicators here there is just "TMP", must be for a single year ! 

ST = pd.DataFrame({"Indicator": ['EST'], "Number of targets": 1, "Relation": 'negative', 'Target 1': 1.104, 'Target 2': np.nan}).set_index('Indicator')
# ST stands for sustainable target:
# - Number of targets is almost always 1 so leave it as 1 by default
# - Relation is wether high value is good/bad for the environnment. Here, lower energy intensity is better so the relation is negative
# - Target 1 and 2 are for the value of the target. To compute the target take the average of the 5 best countries
# How can i know the value of best countries( is it countries which have high number of value?)

In [6]:
Normalized_Indicator = GreenGrowthScaler().normalize(indicator, ST) # Call this and that's it.
Normalized_Indicator

Unnamed: 0_level_0,EST
ISO,Unnamed: 1_level_1
AFG,72.715531
AGO,69.626541
ALB,50.073550
ARE,8.802089
ARG,42.844675
...,...
VNM,30.520558
YEM,62.652428
ZAF,27.272342
ZMB,54.340815


In [7]:
Normalized_Indicator.to_csv('data/indicator/EST/processed/EST_Normalized_origin.M.csv')