# Electric power consumption EDA 

In [3]:
%run "../config/notebook.ipynb"
%run "../config/files.ipynb"

import pandas as pd
import numpy as np

## Load the file 

In [5]:
df_conso = pd.read_csv(ELECTRIC_STATISTICS_FILE_PATH, sep=';', header=0, low_memory=False, dtype={'Code département':'str'})

## Extract some basic informations

#### Does all departments have a row for min and max years ?

In [6]:
def check_values():
    years = df_conso['Année'].unique()
    
    starting_year = min(years)
    ending_year = max(years)

    df_conso_start = df_conso[df_conso['Année'] == starting_year]
    df_conso_end = df_conso[df_conso['Année'] == ending_year]

    print("The dataset start in {} with {} values and finish in {} with {} values".format(starting_year, \
                                                                                          len(df_conso_start.index), \
                                                                                           ending_year, \
                                                                                         len(df_conso_end.index)))

### Total electric consumption 

In [7]:
df_conso['conso_totale'] = df_conso['Conso totale Résidentiel (MWh)'] + \
df_conso['Conso totale Professionnel (MWh)'] + \
df_conso['Conso totale Agriculture (MWh)'] + \
df_conso['Conso totale Industrie (MWh)'] + \
df_conso['Conso totale Tertiaire (MWh)'] 

In [8]:
df_total_consumptions = df_conso.loc[:,['Code département', 'Année', 'Conso totale Résidentiel (MWh)', \
                                        'Conso totale Professionnel (MWh)', 'Conso totale Agriculture (MWh)', \
                                        'Conso totale Industrie (MWh)','Conso totale Tertiaire (MWh)','conso_totale']].astype(int)

In [9]:
df_total_consumptions.columns = ['code_insee', 'year', 'total_residential', 'total_pro', 'total_farming', 'total_industry', 'total_tertiary', 'total']
df_total_consumptions['code_insee'] = df_total_consumptions['code_insee'].astype('str')

### By sites electric consumption

In [10]:
# averages calculations
df_conso['Conso moyenne Agriculture (MWh)'] = df_conso['Conso totale Agriculture (MWh)'] / df_conso['Nb sites Agriculture']
df_conso['Conso moyenne Industrie (MWh)'] = df_conso['Conso totale Industrie (MWh)'] / df_conso['Nb sites Industrie']
df_conso['Conso moyenne Tertiaire (MWh)'] = df_conso['Conso totale Tertiaire (MWh)'] / df_conso['Nb sites Tertiaire']

In [11]:
# select only useful columns
df_avg_consumptions = df_conso.loc[:,['Code département', 'Année', 'Conso moyenne Résidentiel (MWh)', \
                                        'Conso moyenne Professionnel (MWh)', 'Conso moyenne Agriculture (MWh)', \
                                        'Conso moyenne Industrie (MWh)','Conso moyenne Tertiaire (MWh)']]

# rename columuns
df_avg_consumptions.columns = ['code_insee', 'year', 'avg_residential', 'avg_pro', 'avg_farming', 'avg_industry', 'avg_tertiary']
df_avg_consumptions['code_insee'] = df_avg_consumptions['code_insee'].astype('str')

## Output

In [12]:
print("Availables dataframes:")
print("df_total_consumptions: total consumptions by department by year")
print("df_avg_consumptions: average consumptions by department by year")

Availables dataframes:
df_total_consumptions: total consumptions by department by year
df_avg_consumptions: average consumptions by department by year


In [13]:
df_avg_consumptions

Unnamed: 0,code_insee,year,avg_residential,avg_pro,avg_farming,avg_industry,avg_tertiary
0,57,2017,4.919000,8.903000,245.921485,868.083116,222.639218
1,73,2017,4.339000,8.305000,119.842245,341.727005,210.241153
2,10,2017,5.142000,9.096000,117.098734,713.155898,242.785875
3,01,2017,5.811000,9.371000,173.624584,780.762368,235.026655
4,42,2017,4.073000,8.954000,115.615616,674.387635,241.485070
...,...,...,...,...,...,...,...
653,51,2012,5.256278,9.906554,83.610821,1334.669509,273.335887
654,18,2011,5.095337,9.159684,75.151518,626.468373,247.815824
655,10,2016,5.269527,9.369452,129.000305,787.267527,217.072951
656,04,2011,4.621716,8.947669,115.262651,416.962470,188.973693
