# Build a table: region code -> region polygon

In this section we will build a static reference of region to be used to plot a Colorpleth of France.
Region path will be indexed by region code, the code is retrieved from the INSEE.
We will use these files:
* region2019-csv: columns **Code région** and **Nom en clair (majuscules)**
* regions-version-simplifiee.geojson: The files provided by Mohamed containing regions paths

In [1]:
import json
import pandas as pd

In [2]:
# Download INSEE 2019 region data
!wget https://www.insee.fr/fr/statistiques/fichier/3720946/region2019-csv.zip
!unzip region2019-csv.zip
!ls

'wget' n'est pas reconnu en tant que commande interne
ou externe, un programme ex‚cutable ou un fichier de commandes.
'unzip' n'est pas reconnu en tant que commande interne
ou externe, un programme ex‚cutable ou un fichier de commandes.
'ls' n'est pas reconnu en tant que commande interne
ou externe, un programme ex‚cutable ou un fichier de commandes.


In [3]:
# Load new region codes
region2019_df = pd.read_csv("./region2019.csv")
region2019_df[region2019_df["reg"] ==  27]

Unnamed: 0,reg,cheflieu,tncc,ncc,nccenr,libelle
7,27,21231,0,BOURGOGNE FRANCHE COMTE,Bourgogne-Franche-Comté,Bourgogne-Franche-Comté


In [4]:
v = region2019_df[region2019_df["reg"] ==  27]["ncc"]

In [5]:
with open("regions-version-simplifiee.geojson") as f:
    geodata = json.load(f)

In [6]:
regions_paths = {}
for f in geodata["features"]:
    code = f["properties"]["code"]
    old_name = f["properties"]["nom"]
    name = region2019_df[region2019_df["reg"] ==  int(code)].iloc[0]["libelle"]
    regions_paths[code] = {
        "name":name,
        "old_name": old_name,
        "feature": f
    }

In [7]:
with open("regions.json", "w") as f:
    json.dump(regions_paths, f)

In [8]:
with open("regions_codes.txt", "w") as f:
    f.write("code\n")
    for code in regions_paths.keys():
        f.write("%s\n" % code)

# Build a table region -> total consumption

We want to have a consumption table. The table would be ideally indexed by INSEE region code so for each region we can retrieve directly its consumption.

In [9]:
regions_codes = pd.read_csv("regions_codes.txt")

In [10]:
df = pd.read_csv("consommation-electrique-par-secteur-dactivite-region.csv", delimiter=";")
df.columns

Index(['Année', 'Nom région', 'Code région', 'Nb sites Résidentiel',
       'Conso totale Résidentiel (MWh)', 'Conso moyenne Résidentiel (MWh)',
       'Nb sites Professionnel', 'Conso totale Professionnel (MWh)',
       'Conso moyenne Professionnel (MWh)', 'Nb sites Agriculture',
       'Conso totale Agriculture (MWh)', 'Nb sites Industrie',
       'Conso totale Industrie (MWh)', 'Nb sites Tertiaire',
       'Conso totale Tertiaire (MWh)', 'Nb sites Secteur non affecté',
       'Conso totale Secteur non affecté (MWh)', 'Nombre d'habitants',
       'Taux de logements collectifs', 'Taux de résidences principales',
       'Superficie des logements < 30 m2',
       'Superficie des logements 30 à 40 m2',
       'Superficie des logements 40 à 60 m2',
       'Superficie des logements 60 à 80 m2',
       'Superficie des logements 80 à 100 m2',
       'Superficie des logements > 100 m2',
       'Résidences principales avant 1919',
       'Résidences principales de 1919 à 1945',
       'Résiden

In [11]:
df.head()

Unnamed: 0,Année,Nom région,Code région,Nb sites Résidentiel,Conso totale Résidentiel (MWh),Conso moyenne Résidentiel (MWh),Nb sites Professionnel,Conso totale Professionnel (MWh),Conso moyenne Professionnel (MWh),Nb sites Agriculture,...,Résidences principales avant 1919,Résidences principales de 1919 à 1945,Résidences principales de 1946 à 1970,Résidences principales de 1971 à 1990,Résidences principales de 1991 à 2005,Résidences principales de 2006 à 2010,Résidences principales après 2011,Taux de chauffage électrique,Geo Shape,Geo Point 2D
0,2017,Normandie,28,1715164,8760796.0,5.107847,231493,2322050.0,10.030757,924,...,18.749732,8.027451,22.758724,28.431447,13.204561,6.315746,2.512339,28.874447,"{""type"": ""MultiPolygon"", ""coordinates"": [[[[0....","49.1202642048, 0.11075229648"
1,2017,Centre-Val de Loire,24,1243098,6644661.0,5.345243,169765,1561813.0,9.199851,4430,...,20.612322,7.641128,19.415312,30.739335,13.688412,5.843834,2.059658,28.25605,"{""type"": ""Polygon"", ""coordinates"": [[[0.614432...","47.4847679099, 1.6843888533"
2,2012,Provence-Alpes-Côte d'Azur,93,2850134,14584700.0,5.117198,431940,4066792.0,9.415177,1395,...,11.887384,8.437642,26.354425,30.928204,14.597186,5.673194,2.121965,40.334985,,
3,2015,Bourgogne-Franche-Comté,27,1453993,6691835.0,4.602385,224364,1962000.0,8.74472,779,...,23.215239,8.401511,20.102883,27.404423,13.165538,5.691796,2.01861,17.418529,,
4,2012,Centre-Val de Loire,24,1203311,6868343.0,5.70787,168743,1664268.0,9.862741,4355,...,20.612322,7.641128,19.415312,30.739335,13.688412,5.843834,2.059658,28.25605,,


In [12]:
df['Agriculture'] = df['Conso totale Agriculture (MWh)']/df['Nb sites Agriculture']
df['Industrie'] = df['Conso totale Industrie (MWh)']/df['Nb sites Industrie']
df['Tertiaire'] = df['Conso totale Tertiaire (MWh)']/df['Nb sites Tertiaire']
df['Autre'] = df['Conso totale Secteur non affecté (MWh)']/df['Nb sites Secteur non affecté']
df['Résidentiel'] = df['Conso moyenne Résidentiel (MWh)'] / (df['Superficie des logements 30 à 40 m2']
                                                                          + df['Superficie des logements 40 à 60 m2']
                                                                          + df['Superficie des logements 60 à 80 m2']
                                                                          + df['Superficie des logements 80 à 100 m2']
                                                                          + df['Superficie des logements < 30 m2']
                                                                          + df['Superficie des logements > 100 m2'])
df['Total'] = (df['Agriculture'] + df['Industrie'] + df['Tertiaire'] + df['Autre'] + df['Résidentiel'])/5



In [13]:
filtered_df = regions_codes.merge(df, right_on="Code région", left_on="code", how="inner")
filtered_df.shape, df.shape, regions_codes.shape
filtered_df = filtered_df[['Année','code',
            'Total',
             'Agriculture',
             'Industrie',
             'Tertiaire',
             'Résidentiel',
             'Autre']]

# Moyenne Consommation

In [14]:
general = 'ConsommationMoyenne'

filtered_df = regions_codes.merge(df, right_on="Code région", left_on="code", how="inner")
filtered_df.shape, df.shape, regions_codes.shape
filtered_df = filtered_df[['Année','code',
            'Total',
             'Agriculture',
             'Industrie',
             'Tertiaire',
             'Résidentiel',
             'Autre']]

availible_years = filtered_df["Année"].unique()
regions_consumption = {general:{}}
for param in ['Total',
             'Agriculture',
             'Industrie',
             'Tertiaire',
             'Résidentiel',
             'Autre']:
    regions_consumption[general][param] = {}
    for code, region_groupdf in filtered_df.groupby("code"):
        regions_consumption[general][param][code] = {}

        for year, year_groupdf in  region_groupdf.groupby("Année"):
            regions_consumption[general][param][code][year] = year_groupdf.iloc[0][param]
            
regions_consumption[general]['histogram'] = 1

# Consommation Total

In [15]:
general = 'ConsommationTotal'
features = ['Conso totale Professionnel (MWh)',
            'Conso totale Agriculture (MWh)',
            'Conso totale Industrie (MWh)',
            'Conso totale Tertiaire (MWh)',
            'Conso totale Résidentiel (MWh)',
            'Conso totale Secteur non affecté (MWh)']

features_renamed = ['Total',
                     'Agriculture',
                     'Industrie',
                     'Tertiaire',
                     'Résidentiel',
                     'Autre']

filtered_df = regions_codes.merge(df, right_on="Code région", left_on="code", how="inner")
filtered_df.shape, df.shape, regions_codes.shape
filtered_df = filtered_df[['Année','code']+features].rename(columns={features[i]:features_renamed[i] for i in range(len(features))})

availible_years = filtered_df["Année"].unique()
regions_consumption[general] = {}
for param in features_renamed:
    regions_consumption[general][param] = {}
    for code, region_groupdf in filtered_df.groupby("code"):
        regions_consumption[general][param][code] = {}

        for year, year_groupdf in  region_groupdf.groupby("Année"):
            regions_consumption[general][param][code][year] = year_groupdf.iloc[0][param].astype(float)
            
regions_consumption[general]['histogram'] = 1

In [46]:
df_prod = pd.read_csv('donnees_economix.csv', sep=',')
df_prod = df_prod.rename(columns={'annee':'Année','code':'Code région'})

general = 'ProductionTotale'
features = ['Production_totale',
            'Production_nucleaire',
            'Production_thermique_totale',
            'Production_hydraulique',
            'Production_eolien',
            'Production_solaire',
            'Production_bioernergies',]

features_renamed = ['Total',
                    'Nucleaire',
                    'Thermique',
                    'Hydraulique',
                    'Eolien',
                    'Solaire',
                    'Bioernergies']

filtered_df = regions_codes.merge(df_prod, right_on="Code région", left_on="code", how="inner")
filtered_df.shape, df_prod.shape, regions_codes.shape
filtered_df = filtered_df[['Année','code']+features].rename(columns={features[i]:features_renamed[i] for i in range(len(features))})

availible_years = filtered_df["Année"].unique()
regions_consumption[general] = {}
for param in features_renamed:
    regions_consumption[general][param] = {}
    for code, region_groupdf in filtered_df.groupby("code"):
        regions_consumption[general][param][code] = {}

        for year, year_groupdf in  region_groupdf.groupby("Année"):
            regions_consumption[general][param][code][year] = year_groupdf.iloc[0][param].astype(float)
            
regions_consumption[general]['histogram'] = 1

# Production CO2

In [47]:
filtered_df['Total'] = filtered_df['Nucleaire']*6 + filtered_df['Thermique']*900 + filtered_df['Hydraulique']*4 + \
                        filtered_df['Eolien']*10 + filtered_df['Solaire']*100

In [48]:
filtered_df[filtered_df.code == 44]

Unnamed: 0,Année,code,Total,Nucleaire,Thermique,Hydraulique,Eolien,Solaire,Bioernergies
25,2013,44,10434,78,11,9,3,0,0
26,2014,44,5972,85,6,8,3,0,0
27,2015,44,6892,85,7,8,5,0,0
28,2016,44,9568,82,10,9,4,0,0
29,2017,44,11338,76,12,8,5,0,0


In [49]:
general = 'ProductionCO2'
regions_consumption[general] = {}

for param in ['Total']:
    regions_consumption[general][param] = {}
    for code, region_groupdf in filtered_df.groupby("code"):
        regions_consumption[general][param][code] = {}

        for year, year_groupdf in  region_groupdf.groupby("Année"):
            regions_consumption[general][param][code][year] = year_groupdf.iloc[0][param].astype(float)
                          
regions_consumption[general]['histogram'] = 0

# Excedant

In [40]:
filtered_df = regions_codes.merge(df_prod, right_on="Code région", left_on="code", how="inner")
filtered_df.shape, df.shape, regions_codes.shape
filtered_df['Total'] = filtered_df['Excedent_Deficit']
filtered_df = filtered_df[['Année','code','Total']]

general = 'Excedent'
regions_consumption[general] = {}

for param in ['Total']:
    regions_consumption[general][param] = {}
    for code, region_groupdf in filtered_df.groupby("code"):
        regions_consumption[general][param][code] = {}

        for year, year_groupdf in  region_groupdf.groupby("Année"):
            regions_consumption[general][param][code][year] = year_groupdf.iloc[0][param].astype(float)
                          
regions_consumption[general]['histogram'] = 0

In [41]:
df_prod[df_prod['Code région'] == 44]

Unnamed: 0,Année,Code région,Qualite,NOM_REGION,Production_totale,Production_nucleaire,Production_thermique_totale,Production_hydraulique,Production_eolien,Production_solaire,...,Grande_industrie_PME_PMI,Energie_industrie_agriculture,Chime_parachimie,Construction_automobile,Metallurgie_mecanique,Mineraux_materiaux,Papier_carton,Siderurgie,Autres_industries,Excedent_Deficit
4,2013,44,DonnÃ©es consolidÃ©es,Grand-Est,101,78,11,9,3,0,...,43,17,2,1,2,1,2,1.0,2,30
16,2014,44,DonnÃ©es consolidÃ©es,Grand-Est,102,85,6,8,3,0,...,24,17,2,1,2,1,2,1.0,2,50
28,2015,44,DonnÃ©es consolidÃ©es,Grand-Est,105,85,7,8,5,0,...,24,17,2,1,2,1,2,1.0,1,54
40,2016,44,DonnÃ©es consolidÃ©es,Grand-Est,105,82,10,9,4,0,...,24,17,2,1,2,1,2,1.0,2,53
52,2017,44,DonnÃ©es consolidÃ©es,Grand-Est,101,76,12,8,5,0,...,24,17,2,0,2,1,2,1.0,2,50


In [50]:
for elmt in regions_consumption.keys():
    with open("all_consumption.json", "w") as f:
        json.dump(regions_consumption, f)

In [44]:
regions_consumption['Excedent']['Total']

{11: {2013: -78.0, 2014: -45.0, 2015: -44.0, 2016: -43.0, 2017: -42.0},
 24: {2013: 47.0, 2014: 66.0, 2015: 68.0, 2016: 64.0, 2017: 63.0},
 27: {2013: -27.0, 2014: -19.0, 2015: -20.0, 2016: -20.0, 2017: -19.0},
 28: {2013: 36.0, 2014: 48.0, 2015: 40.0, 2016: 36.0, 2017: 32.0},
 32: {2013: -42.0, 2014: -25.0, 2015: -19.0, 2016: -22.0, 2017: -18.0},
 44: {2013: 30.0, 2014: 50.0, 2015: 54.0, 2016: 53.0, 2017: 50.0},
 52: {2013: -28.0, 2014: -17.0, 2015: -17.0, 2016: -16.0, 2017: -14.0},
 53: {2013: -25.0, 2014: -13.0, 2015: -13.0, 2016: -13.0, 2017: -14.0},
 75: {2013: -9.0, 2014: 16.0, 2015: 14.0, 2016: 17.0, 2017: 21.0},
 76: {2013: -8.0, 2014: 9.0, 2015: 10.0, 2016: 12.0, 2017: 10.0},
 84: {2013: 25.0, 2014: 47.0, 2015: 46.0, 2016: 31.0, 2017: 32.0},
 93: {2013: -37.0, 2014: -18.0, 2015: -18.0, 2016: -13.0, 2017: -14.0}}