# Pré-traitement des données environnementales (*The World Bank*)

[Source](https://data.worldbank.org/topic/6)

- `API_6_DS2_en_csv` : les données
- `Metadata_Country_API_6_DS2_en_csv` métadonnées par pays
- `Metadata_Indicator_API_6_DS2_en_csv` liste des indicateurs disponibles


## Indicateurs retenus

- `ER.H2O.INTR.PC` : `Renewable internal freshwater resources per capita (cubic meters)`
- `ER.H2O.INTR.K3` : `Renewable internal freshwater resources, total (billion cubic meters)`
- `AG.LND.FRST.ZS` : `Surface forestière (% du territoire)`
- `EG.ELC.FOSL.ZS` : `Production d'électricité à partir des sources en pétrole, gaz et charbon (% du total)`
- `EG.ELC.RNEW.ZS` : `Production d’électricité renouvelable (% de la production totale d’électricité)`
- `EN.ATM.CO2E.KT` : `Émissions de CO2 (kt)`

In [1]:
import pandas as pd

country_metadata = pd.read_csv('../data/other/Metadata_Country_API_6_DS2_fr_csv_v2_1764366.csv')
indicator_metadata = pd.read_csv('../data/other/Metadata_Indicator_API_6_DS2_fr_csv_v2_1764366.csv')
main_csv = pd.read_csv('../data/other/API_6_DS2_fr_csv_v2_1764366.csv', skiprows=4)

indics = ['ER.H2O.INTR.PC', 'ER.H2O.INTR.K3', 'AG.LND.FRST.ZS', 'EG.ELC.FOSL.ZS', 'EG.ELC.RNEW.ZS', 'EN.ATM.CO2E.KT']

main_csv.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,Unnamed: 65
0,Aruba,ABW,"Mortality rate attributed to unsafe water, uns...",SH.STA.WASH.P5,,,,,,,...,,,,,,,,,,
1,Aruba,ABW,People using safely managed sanitation service...,SH.STA.SMSS.ZS,,,,,,,...,,,,,,,,,,
2,Aruba,ABW,People using safely managed sanitation service...,SH.STA.SMSS.UR.ZS,,,,,,,...,,,,,,,,,,
3,Aruba,ABW,People using safely managed sanitation service...,SH.STA.SMSS.RU.ZS,,,,,,,...,,,,,,,,,,
4,Aruba,ABW,Mortality rate attributed to unintentional poi...,SH.STA.POIS.P5.MA,,,,,,,...,,,,,,,,,,


In [2]:
import json

pays = ['France', 'Monde']

def get_indicator(ind_name):
    df = main_csv[main_csv['Indicator Code'] == ind_name].copy()
    df.drop(columns=['Indicator Name', 'Indicator Code', 'Unnamed: 65', 'Country Code'], inplace=True)
    df.dropna(axis='columns', how='all', inplace=True)
    return df[df['Country Name'].isin(pays)]

def get_output(ind_name):
    output = {}
    ind = get_indicator(ind_name)
    for p in pays:
        pays_data = ind[ind['Country Name'] == p].iloc[:, 1:]
        pays_array = [{"year": year, "value": pays_data[year].values[0]} for year in pays_data.columns]
        output[p] = pays_array
    return output


for ind in indics:
    f = open('../data/' + ind + '.json', 'w')
    json.dump(get_output(ind), f)


In [3]:
export_co2 = pd.read_csv('../data/viz_4/emissions_import.csv')
export_co2.head(3)

Unnamed: 0,Country Code,Country Name,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,AUS,Australia,119.4,131.7,143.0,154.2,131.2,154.8,172.4,178.5,167.9,158.2,143.7
1,AUT,Austria,65.1,71.3,74.7,74.1,62.6,72.5,77.6,73.5,71.4,69.9,66.3
2,BEL,Belgium,87.8,92.7,99.7,113.3,90.0,89.8,99.2,90.7,88.4,87.9,86.5


In [4]:
df = export_co2.copy()
df.drop(columns=['Country Name'], inplace=True)
df.head(3)

Unnamed: 0,Country Code,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,AUS,119.4,131.7,143.0,154.2,131.2,154.8,172.4,178.5,167.9,158.2,143.7
1,AUT,65.1,71.3,74.7,74.1,62.6,72.5,77.6,73.5,71.4,69.9,66.3
2,BEL,87.8,92.7,99.7,113.3,90.0,89.8,99.2,90.7,88.4,87.9,86.5


In [5]:
output = {}
for p in export_co2['Country Code']:
    pays_data = df[df['Country Code'] == p].iloc[:, 1:]
    if not pays_data.empty:
        pays_array = [{"year": year, "value": pays_data[year].values[0]} for year in pays_data.columns]
        output[p] = pays_array

f = open('../data/viz4/emission_import.json', 'w')
json.dump(output, f)