# Build a table: region code -> region polygon

In this section we will build a static reference of region to be used to plot a Colorpleth of France.
Region path will be indexed by region code, the code is retrieved from the INSEE.
We will use these files:
* region2019-csv: columns **Code région** and **Nom en clair (majuscules)**
* regions-version-simplifiee.geojson: The files provided by Mohamed containing regions paths

In [46]:
import json
import pandas as pd
import numpy as np

In [47]:
# Download INSEE 2019 region data
!wget https://www.insee.fr/fr/statistiques/fichier/3720946/region2019-csv.zip
!unzip region2019-csv.zip
!ls

'wget' n'est pas reconnu en tant que commande interne
ou externe, un programme ex‚cutable ou un fichier de commandes.
'unzip' n'est pas reconnu en tant que commande interne
ou externe, un programme ex‚cutable ou un fichier de commandes.
'ls' n'est pas reconnu en tant que commande interne
ou externe, un programme ex‚cutable ou un fichier de commandes.


In [48]:
# Load new region codes
region2019_df = pd.read_csv("./region2019.csv")
region2019_df[region2019_df["reg"] ==  27]

Unnamed: 0,reg,cheflieu,tncc,ncc,nccenr,libelle
7,27,21231,0,BOURGOGNE FRANCHE COMTE,Bourgogne-Franche-Comté,Bourgogne-Franche-Comté


In [49]:
v = region2019_df[region2019_df["reg"] ==  27]["ncc"]

In [50]:
with open("regions-version-simplifiee.geojson") as f:
    geodata = json.load(f)

In [51]:
regions_paths = {}
for f in geodata["features"]:
    code = f["properties"]["code"]
    old_name = f["properties"]["nom"]
    name = region2019_df[region2019_df["reg"] ==  int(code)].iloc[0]["libelle"]
    regions_paths[code] = {
        "name":name,
        "old_name": old_name,
        "feature": f
    }

In [52]:
with open("regions.json", "w") as f:
    json.dump(regions_paths, f)

In [53]:
with open("regions_codes.txt", "w") as f:
    f.write("code\n")
    for code in regions_paths.keys():
        f.write("%s\n" % code)

# Build a table region -> total consumption

We want to have a consumption table. The table would be ideally indexed by INSEE region code so for each region we can retrieve directly its consumption.

In [54]:
regions_codes = pd.read_csv("regions_codes.txt")

In [55]:
df = pd.read_csv("consommation-electrique-par-secteur-dactivite-region.csv", delimiter=";")

In [56]:
df['Agriculture'] = np.round(df['Conso totale Agriculture (MWh)']/df['Nb sites Agriculture'],-1)
df['Industrie'] = np.round(df['Conso totale Industrie (MWh)']/df['Nb sites Industrie'],-1)
df['Tertiaire'] = np.round(df['Conso totale Tertiaire (MWh)']/df['Nb sites Tertiaire'],-1)
df['Autre'] = np.round(df['Conso totale Secteur non affecté (MWh)']/df['Nb sites Secteur non affecté'],-1)
df['Résidentiel'] = np.round(df['Conso moyenne Résidentiel (MWh)'] / (df['Superficie des logements 30 à 40 m2']
                                                                          + df['Superficie des logements 40 à 60 m2']
                                                                          + df['Superficie des logements 60 à 80 m2']
                                                                          + df['Superficie des logements 80 à 100 m2']
                                                                          + df['Superficie des logements < 30 m2']
                                                                          + df['Superficie des logements > 100 m2']),3)
df['Total'] = np.round((df['Agriculture'] + df['Industrie'] + df['Tertiaire'] + df['Autre'] + df['Résidentiel'])/5,-1)


In [57]:
filtered_df = regions_codes.merge(df, right_on="Code région", left_on="code", how="inner")
filtered_df.shape, df.shape, regions_codes.shape
filtered_df = filtered_df[['Année','code',
            'Total',
             'Agriculture',
             'Industrie',
             'Tertiaire',
             'Résidentiel',
             'Autre']]

# Moyenne Consommation

In [58]:
general = 'ConsommationMoyenne'

filtered_df = regions_codes.merge(df, right_on="Code région", left_on="code", how="inner")
filtered_df.shape, df.shape, regions_codes.shape
filtered_df = filtered_df[['Année','code',
            'Total',
             'Agriculture',
             'Industrie',
             'Tertiaire',
             'Résidentiel',
             'Autre']]

availible_years = filtered_df["Année"].unique()
regions_consumption = {general:{}}
for param in ['Total',
             'Agriculture',
             'Industrie',
             'Tertiaire',
             'Résidentiel',
             'Autre']:
    regions_consumption[general][param] = {}
    for code, region_groupdf in filtered_df.groupby("code"):
        regions_consumption[general][param][code] = {}

        for year, year_groupdf in  region_groupdf.groupby("Année"):
            regions_consumption[general][param][code][year] = year_groupdf.iloc[0][param]
            
regions_consumption[general]['histogram'] = 1

In [59]:

filtered_df = regions_codes.merge(df, right_on="Code région", left_on="code", how="inner")
filtered_df.shape, df.shape, regions_codes.shape
filtered_df = filtered_df[['Année','code',
            'Total',
             'Agriculture',
             'Industrie',
             'Tertiaire',
             'Résidentiel',
             'Autre']]

availible_years = filtered_df["Année"].unique()

for general in ['Agriculture',
             'Industrie',
             'Tertiaire',
             'Résidentiel',
             'Autre']:
    
    regions_consumption[general+'Moyenne'] = {}
    df_ = filtered_df[['Année','code',general]].rename(columns={general:'Total'})
    for param in ['Total']:
        regions_consumption[general+'Moyenne'][param] = {}
        for code, region_groupdf in df_.groupby("code"):
            regions_consumption[general+'Moyenne'][param][code] = {}

            for year, year_groupdf in  region_groupdf.groupby("Année"):
                regions_consumption[general+'Moyenne'][param][code][year] = year_groupdf.iloc[0][param].astype(float)

    regions_consumption[general+'Moyenne']['histogram'] = 0

# Consommation Total

In [60]:
general = 'ConsommationTotal'
features = ['Conso totale Professionnel (MWh)',
            'Conso totale Agriculture (MWh)',
            'Conso totale Industrie (MWh)',
            'Conso totale Tertiaire (MWh)',
            'Conso totale Résidentiel (MWh)',
            'Conso totale Secteur non affecté (MWh)']

features_renamed = ['Total',
                     'Agriculture',
                     'Industrie',
                     'Tertiaire',
                     'Résidentiel',
                     'Autre']

filtered_df = regions_codes.merge(df, right_on="Code région", left_on="code", how="inner")
filtered_df.shape, df.shape, regions_codes.shape
filtered_df = filtered_df[['Année','code']+features].rename(columns={features[i]:features_renamed[i] for i in range(len(features))})

availible_years = filtered_df["Année"].unique()
regions_consumption[general] = {}
for param in features_renamed:
    regions_consumption[general][param] = {}
    for code, region_groupdf in filtered_df.groupby("code"):
        regions_consumption[general][param][code] = {}

        for year, year_groupdf in  region_groupdf.groupby("Année"):
            regions_consumption[general][param][code][year] = year_groupdf.iloc[0][param].astype(float)
            
regions_consumption[general]['histogram'] = 1

In [61]:
df_prod = pd.read_csv('donnees_economix.csv', sep=',')
df_prod = df_prod.rename(columns={'annee':'Année','code':'Code région'})

general = 'ProductionTotale'
features = ['Production_totale',
            'Production_nucleaire',
            'Production_thermique_totale',
            'Production_hydraulique',
            'Production_eolien',
            'Production_solaire',
            'Production_bioernergies',]

features_renamed = ['Total',
                    'Nucleaire',
                    'Thermique',
                    'Hydraulique',
                    'Eolien',
                    'Solaire',
                    'Bioernergies']

filtered_df = regions_codes.merge(df_prod, right_on="Code région", left_on="code", how="inner")
filtered_df.shape, df_prod.shape, regions_codes.shape
filtered_df = filtered_df[['Année','code']+features].rename(columns={features[i]:features_renamed[i] for i in range(len(features))})

availible_years = filtered_df["Année"].unique()
regions_consumption[general] = {}
for param in features_renamed:
    regions_consumption[general][param] = {}
    for code, region_groupdf in filtered_df.groupby("code"):
        regions_consumption[general][param][code] = {}

        for year, year_groupdf in  region_groupdf.groupby("Année"):
            regions_consumption[general][param][code][year] = year_groupdf.iloc[0][param].astype(float)
            
regions_consumption[general]['histogram'] = 1

# Production CO2

In [62]:
filtered_df['Total'] = filtered_df['Nucleaire']*12 + filtered_df['Thermique']*850 + filtered_df['Hydraulique']*24 + \
                        filtered_df['Eolien']*11.5 + filtered_df['Solaire']*40 + filtered_df['Bioernergies']*230

In [63]:
general = 'ProductionCO2'
regions_consumption[general] = {}

for param in ['Total']:
    regions_consumption[general][param] = {}
    for code, region_groupdf in filtered_df.groupby("code"):
        regions_consumption[general][param][code] = {}

        for year, year_groupdf in  region_groupdf.groupby("Année"):
            regions_consumption[general][param][code][year] = year_groupdf.iloc[0][param].astype(float)
                          
regions_consumption[general]['histogram'] = 0

# Excedant

In [64]:
filtered_df = regions_codes.merge(df_prod, right_on="Code région", left_on="code", how="inner")
filtered_df.shape, df.shape, regions_codes.shape
filtered_df['Total'] = filtered_df['Excedent_Deficit']
filtered_df = filtered_df[['Année','code','Total']]

general = 'Excedent'
regions_consumption[general] = {}

for param in ['Total']:
    regions_consumption[general][param] = {}
    for code, region_groupdf in filtered_df.groupby("code"):
        regions_consumption[general][param][code] = {}

        for year, year_groupdf in  region_groupdf.groupby("Année"):
            regions_consumption[general][param][code][year] = year_groupdf.iloc[0][param].astype(float)
                          
regions_consumption[general]['histogram'] = 0

In [66]:
for elmt in regions_consumption.keys():
    with open("all_consumption.json", "w") as f:
        json.dump(regions_consumption, f)

In [71]:
list(regions_consumption['ProductionTotale'].keys())

['Total',
 'Nucleaire',
 'Thermique',
 'Hydraulique',
 'Eolien',
 'Solaire',
 'Bioernergies',
 'histogram']