In [None]:
import csv
import pandas as pd
import numpy as np

In [None]:
SOE_data = pd.read_csv('/content/SOE_raw_na.csv')
COE_data = pd.read_csv('/content/COE_raw_na.csv')
VOE_data = pd.read_csv('/content/VOE_raw_na.csv')

In [None]:
european_countries=["Albania","Andorra","Austria","Belarus","Belgium","Bosnia and Herzegovina","Bulgaria","Croatia","Cyprus","Czech Republic","Denmark","Estonia","Finland","France","Germany","Greece","Hungary","San Marino","Romania",
"Portugal","Poland","North Macedonia","Netherlands","Montenegro","Monaco","Moldova","Malta","Luxembourg","Lithuania","Liechtenstein","Latvia","Kosovo","Italy","Ireland","Iceland","Norway","Serbia","Slovakia","Slovenia","Spain","Sweden","Switzerland","Ukraine","United Kingdom"]

In [None]:

def preprocess_and_extrapolate(data, european_countries, scoring_bounds):

    # Filter only European countries
    data_european = data[data['country'].isin(european_countries)]

    # Drop the 'code' and 'country' column
    data_european = data_european.drop(['code', 'country'], axis=1)

    # Set "iso" as the index
    data_european.set_index('iso', inplace=True)

    # Check for NA values
    missing_values = data_european.isna().sum()
    print("Missing Values:")
    print(missing_values)

    #treating NA values
    data_european.fillna(0, inplace=True)


    # Apply ln(x) transformation
    def custom_ln(x):
        return np.log(x) if x != 0 else x

    data_european = data_european.applymap(custom_ln)

    # Scoring
    best, worst = scoring_bounds
    print("Best and Worst Values:")
    print(best)
    print(worst)

    def custom_scoring(x):
        if x == 0:
            return x  # Leave 0 unchanged
        elif x < best:
            return 100
        elif x > worst:
            return 0
        else:
            return ((x - worst) / (best - worst)) * 100

    data_european = data_european.applymap(custom_scoring)

    data_european.columns = data_european.columns.map(lambda x: int(str(x).split('.')[-1]))


    # Extrapolate values for the years before 2003 using linear interpolation
    extrapolation_years = np.arange(1990, 2003)
    extrapolated_data = pd.DataFrame(index=data_european.index, columns=extrapolation_years)

    for country in data_european.index:
        # Select data for the current country
        country_data = data_european.loc[country, :]

        # Perform linear extrapolation
        extrapolated_values = np.interp(extrapolation_years, country_data.index, country_data.values)

        # Update the DataFrame with extrapolated values
        extrapolated_data.loc[country, extrapolation_years] = extrapolated_values

    # Concatenate the extrapolated data and the original data
    data_european = pd.concat([extrapolated_data, data_european], axis=1)

    return data_european

In [None]:
# SOE indicator
scoring_bounds_SOE = (round(np.log(0.0002787108), 6), round(np.log(0.0626445373 ), 6))

SOE_european = preprocess_and_extrapolate(SOE_data, european_countries, scoring_bounds_SOE)

SOE_european

Missing Values:
SOE.raw.2003    0
SOE.raw.2004    0
SOE.raw.2005    0
SOE.raw.2006    0
SOE.raw.2007    0
SOE.raw.2008    0
SOE.raw.2009    0
SOE.raw.2010    0
SOE.raw.2011    0
SOE.raw.2012    0
SOE.raw.2013    0
SOE.raw.2014    0
SOE.raw.2015    0
SOE.raw.2016    0
SOE.raw.2017    0
SOE.raw.2018    0
SOE.raw.2019    0
dtype: int64
Best and Worst Values:
-8.185336
-2.770279


Unnamed: 0_level_0,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
iso,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ALB,24.175697,24.175697,24.175697,24.175697,24.175697,24.175697,24.175697,24.175697,24.175697,24.175697,...,36.725324,35.123452,36.859526,38.448569,40.325686,39.856376,41.953612,41.824309,41.663263,43.261061
AND,40.081911,40.081911,40.081911,40.081911,40.081911,40.081911,40.081911,40.081911,40.081911,40.081911,...,53.351285,53.778528,55.09869,57.131063,57.598384,58.841693,60.338411,61.791772,64.307063,65.303167
AUT,41.54631,41.54631,41.54631,41.54631,41.54631,41.54631,41.54631,41.54631,41.54631,41.54631,...,51.602258,52.197516,53.7061,54.449048,54.799813,55.374573,56.973659,58.471874,57.682072,59.897752
BLR,45.707773,45.707773,45.707773,45.707773,45.707773,45.707773,45.707773,45.707773,45.707773,45.707773,...,46.77349,47.769628,48.122905,47.779685,48.254636,48.902969,49.267511,49.710463,49.542043,50.10533
BEL,26.280485,26.280485,26.280485,26.280485,26.280485,26.280485,26.280485,26.280485,26.280485,26.280485,...,39.262821,40.62682,42.188134,43.102962,44.230197,46.022064,47.102397,47.945879,49.413943,52.496556
BIH,10.838234,10.838234,10.838234,10.838234,10.838234,10.838234,10.838234,10.838234,10.838234,10.838234,...,23.538302,23.361052,24.636734,26.057518,26.613132,27.266296,28.345695,29.197094,29.644597,30.931567
BGR,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.470967,10.322495,10.915056,11.697648,12.986997,12.846836,13.813221,13.69822,15.128649,15.433652
HRV,20.652403,20.652403,20.652403,20.652403,20.652403,20.652403,20.652403,20.652403,20.652403,20.652403,...,32.529111,32.554685,33.981161,35.160876,35.723923,36.422512,37.497981,38.576808,39.068504,40.436634
CYP,21.212346,21.212346,21.212346,21.212346,21.212346,21.212346,21.212346,21.212346,21.212346,21.212346,...,25.867557,28.238608,28.424309,30.004941,30.753111,30.757054,31.108526,36.289459,38.067523,38.51967
CZE,23.475778,23.475778,23.475778,23.475778,23.475778,23.475778,23.475778,23.475778,23.475778,23.475778,...,31.415391,32.347938,33.935395,34.325444,34.841501,36.15559,36.877005,38.585675,37.82495,40.240002


In [None]:
# COE indicator
scoring_bounds_COE = (round(np.log(0.06247684), 7), round(np.log(0.46986060 ), 7))

COE_european = preprocess_and_extrapolate(COE_data, european_countries, scoring_bounds_COE)

COE_european

Missing Values:
COE.raw.2003    0
COE.raw.2004    0
COE.raw.2005    0
COE.raw.2006    0
COE.raw.2007    0
COE.raw.2008    0
COE.raw.2009    0
COE.raw.2010    0
COE.raw.2011    0
COE.raw.2012    0
COE.raw.2013    0
COE.raw.2014    0
COE.raw.2015    0
COE.raw.2016    0
COE.raw.2017    0
COE.raw.2018    0
COE.raw.2019    0
dtype: int64
Best and Worst Values:
-2.7729594
-0.7553192


Unnamed: 0_level_0,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
iso,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ALB,43.127101,43.127101,43.127101,43.127101,43.127101,43.127101,43.127101,43.127101,43.127101,43.127101,...,54.650161,50.62065,47.633349,56.375208,57.86082,54.595044,58.968269,56.87268,60.254554,61.484969
AND,42.521762,42.521762,42.521762,42.521762,42.521762,42.521762,42.521762,42.521762,42.521762,42.521762,...,53.167038,53.762828,53.142014,55.036151,56.186236,55.068241,56.176823,56.024847,57.650262,58.535733
AUT,32.075783,32.075783,32.075783,32.075783,32.075783,32.075783,32.075783,32.075783,32.075783,32.075783,...,42.446595,44.498177,46.339813,45.948521,46.489457,45.807266,48.621415,49.623317,50.910134,52.919613
BLR,50.502506,50.502506,50.502506,50.502506,50.502506,50.502506,50.502506,50.502506,50.502506,50.502506,...,54.294212,59.177651,58.255043,59.096503,59.24964,59.054744,60.193507,62.065535,61.643633,62.279318
BEL,23.698816,23.698816,23.698816,23.698816,23.698816,23.698816,23.698816,23.698816,23.698816,23.698816,...,39.445008,41.744632,43.549124,43.229754,45.679888,47.803109,47.62236,48.741564,51.450609,54.298013
BIH,38.579564,38.579564,38.579564,38.579564,38.579564,38.579564,38.579564,38.579564,38.579564,38.579564,...,50.11,48.64106,47.914104,52.583316,53.560736,51.321729,54.041,52.428103,55.801517,57.046441
BGR,38.14373,38.14373,38.14373,38.14373,38.14373,38.14373,38.14373,38.14373,38.14373,38.14373,...,46.570405,46.341569,47.27973,49.696916,51.087791,49.894639,51.177786,51.588549,53.592953,53.692091
HRV,29.856559,29.856559,29.856559,29.856559,29.856559,29.856559,29.856559,29.856559,29.856559,29.856559,...,42.367294,40.544269,42.411673,44.27033,46.244664,42.324332,45.461619,46.240794,48.62941,50.329415
CYP,52.146465,52.146465,52.146465,52.146465,52.146465,52.146465,52.146465,52.146465,52.146465,52.146465,...,57.795709,61.939376,60.045207,59.616807,63.770539,62.05734,61.373989,64.346384,65.972736,67.251924
CZE,27.511867,27.511867,27.511867,27.511867,27.511867,27.511867,27.511867,27.511867,27.511867,27.511867,...,36.426863,39.259429,41.87098,41.735746,41.335195,42.824159,43.869526,45.201863,47.554376,48.2606


In [None]:
# VOE indicator
scoring_bounds_VOE = (round(np.log(0.000769655), 6), round(np.log(0.095845771 ), 6))

VOE_european = preprocess_and_extrapolate(VOE_data, european_countries, scoring_bounds_VOE)

VOE_european

Missing Values:
VOE.raw.2003    0
VOE.raw.2004    0
VOE.raw.2005    0
VOE.raw.2006    0
VOE.raw.2007    0
VOE.raw.2008    0
VOE.raw.2009    0
VOE.raw.2010    0
VOE.raw.2011    0
VOE.raw.2012    0
VOE.raw.2013    0
VOE.raw.2014    0
VOE.raw.2015    0
VOE.raw.2016    0
VOE.raw.2017    0
VOE.raw.2018    0
VOE.raw.2019    0
dtype: int64
Best and Worst Values:
-7.169568
-2.345015


Unnamed: 0_level_0,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
iso,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ALB,47.102425,47.102425,47.102425,47.102425,47.102425,47.102425,47.102425,47.102425,47.102425,47.102425,...,46.651558,43.531015,41.005469,49.460067,54.426134,46.083361,49.778931,44.207098,47.169753,46.94618
AND,42.708745,42.708745,42.708745,42.708745,42.708745,42.708745,42.708745,42.708745,42.708745,42.708745,...,47.522483,43.511854,43.860131,47.577951,49.382907,46.076477,44.091072,41.93917,43.891641,44.059107
AUT,45.1061,45.1061,45.1061,45.1061,45.1061,45.1061,45.1061,45.1061,45.1061,45.1061,...,52.892261,49.196415,48.972647,51.152094,53.44107,47.946536,49.205577,48.562317,49.75235,50.26324
BLR,66.135765,66.135765,66.135765,66.135765,66.135765,66.135765,66.135765,66.135765,66.135765,66.135765,...,60.940056,65.438299,66.493507,64.112597,63.425316,62.947843,64.86259,70.21431,66.069398,65.506247
BEL,56.265337,56.265337,56.265337,56.265337,56.265337,56.265337,56.265337,56.265337,56.265337,56.265337,...,62.972426,64.425455,65.361967,63.92006,64.608113,66.13104,66.526622,65.569789,66.47981,66.95487
BIH,33.319032,33.319032,33.319032,33.319032,33.319032,33.319032,33.319032,33.319032,33.319032,33.319032,...,39.379387,31.419525,29.040975,36.882562,43.200085,35.203534,38.352012,30.62191,35.316205,34.098116
BGR,38.729737,38.729737,38.729737,38.729737,38.729737,38.729737,38.729737,38.729737,38.729737,38.729737,...,40.951736,37.778121,34.831227,41.120215,46.46342,40.907217,41.437025,39.949625,42.119412,40.900341
HRV,34.372564,34.372564,34.372564,34.372564,34.372564,34.372564,34.372564,34.372564,34.372564,34.372564,...,41.387897,33.59167,32.164641,38.155146,43.673566,36.149266,37.942602,32.440548,35.719119,35.388444
CYP,37.38133,37.38133,37.38133,37.38133,37.38133,37.38133,37.38133,37.38133,37.38133,37.38133,...,33.271883,34.807698,29.48319,36.261208,41.08643,35.774371,24.864232,38.137668,44.723957,40.936733
CZE,48.576959,48.576959,48.576959,48.576959,48.576959,48.576959,48.576959,48.576959,48.576959,48.576959,...,54.530704,53.262996,52.377433,54.537101,54.602956,52.029658,52.64453,54.545624,55.16232,54.358812


In [None]:
# Save DataFrame to a CSV file
SOE_european.to_csv('/content/SOE_transformed.csv')
COE_european.to_csv('/content/COE_transformed.csv')
VOE_european.to_csv('/content/VOE_transformed.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>