### Import modules

In [None]:
# Import external modules
import os
import pandas as pd
pd.options.plotting.backend = "plotly"

In [None]:
# Import initial internal ressources (class, functions, variables, etc..)
from EnergyDisaggregation.energydisaggregation.dataloader.dataloader import Dataloader
from EnergyDisaggregation.energydisaggregation.dataloader.config import DATACONFIG, CONFIG_WEATHER, CONFIG_POWER

In [None]:
# Import new internal ressources (class, functions, variables, etc..)
from EnergyDisaggregation.energydisaggregation.feature_engineering.historical import test_historical
from EnergyDisaggregation.energydisaggregation.feature_engineering.calendar import test_calendar
from EnergyDisaggregation.energydisaggregation.feature_engineering.cyclical_transformation import test_cyclical_transformation
from EnergyDisaggregation.energydisaggregation.feature_engineering.reshape_timeseries import test_reshape_timeseries
from EnergyDisaggregation.energydisaggregation.feature_engineering.temperature import test_temperature
from EnergyDisaggregation.energydisaggregation.feature_engineering.selection import test_selection

### Perform test on internal imported new modules

In [None]:
# Test initial modules functions
test_historical()
test_calendar()
test_cyclical_transformation()
test_reshape_timeseries()
test_temperature()
test_selection()

### Perform exploratory analysis

In [None]:
# Define used paths & filenames
raw_data_path = "EnergyDisaggregation/energydisaggregation/data_storage/raw_data"
raw_power_filename = os.path.join(raw_data_path, "consommation-quotidienne-brute-regionale.csv")
raw_weather_filename = os.path.join(raw_data_path, "donnees-synop-essentielles-omm.csv")

In [None]:
df_tot = Dataloader.load_data(path_power=raw_power_filename, path_weather=raw_weather_filename)

In [None]:
df_tot.xs("Bretagne",level=DATACONFIG["Region"]).loc["2019"].plot()

In [None]:
df_power = Dataloader.load_power(path=raw_power_filename)

In [None]:
df_power.unstack(level=CONFIG_POWER["Region"]).loc["2019"].droplevel(level=0, axis=1).plot()

In [None]:
df_weather = Dataloader.load_weather(path=raw_weather_filename)

In [None]:
df_weather.unstack(level=CONFIG_WEATHER["Region"]).loc["2019",CONFIG_WEATHER["Temperature"]].plot()

In [None]:
df_tot.head(10)

In [137]:
import pandas as pd
from datetime import datetime

def day_of_year(date_str):
    return date_str.timetuple().tm_yday


df= pd.read_csv("EnergyDisaggregation/energydisaggregation/data_storage/df_process.csv")

df["Data - Heure"] = pd.to_datetime(d["Date - Heure"])
df.set_index(["Data - Heure","Région"], inplace=True)
df['day_of_year'] = df.index.get_level_values(0).map(day_of_year)
df['day'] = df.index.get_level_values(0).map(lambda x: x.day)
df['month'] = df.index.get_level_values(0).map(lambda x: x.month)



In [138]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Date - Heure,consommation brute électricité (mw) - rte,température (°c),nebulosité totale,vitesse du vent moyen 10 mn,vitesse du vent en km/h,température ressentie,saison,is_holiday,is_bank_holiday,...,température (°c)_lag_1,température (°c)_lag_2,température (°c)_lag_3,température (°c)_lag_4,température (°c)_lag_5,température (°c)_lag_6,température (°c)_lag_7,day_of_year,day,month
Data - Heure,Région,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2013-01-01 00:00:00+01:00,Auvergne-Rhône-Alpes,2013-01-01 00:00:00+01:00,8173.0,9.375000,50.000000,10.275000,36.99,5.309697,3,True,True,...,,,,,,,,1,1,1
2013-01-01 00:00:00+01:00,Bourgogne-Franche-Comté,2013-01-01 00:00:00+01:00,2357.0,2.675000,98.750000,1.825000,6.57,0.849588,3,True,True,...,,,,,,,,1,1,1
2013-01-01 00:00:00+01:00,Bretagne,2013-01-01 00:00:00+01:00,3050.0,9.475000,81.250000,3.975000,14.31,7.354916,3,True,True,...,,,,,,,,1,1,1
2013-01-01 00:00:00+01:00,Centre-Val de Loire,2013-01-01 00:00:00+01:00,2476.0,8.200000,90.000000,5.950000,21.42,4.960144,3,True,True,...,,,,,,,,1,1,1
2013-01-01 00:00:00+01:00,Grand Est,2013-01-01 00:00:00+01:00,4943.0,5.075000,100.000000,5.550000,19.98,1.164014,3,True,True,...,,,,,,,,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-31 23:00:00+01:00,Nouvelle-Aquitaine,2019-12-31 23:00:00+01:00,12110.0,8.133333,100.000000,1.325000,4.77,7.660086,3,True,False,...,-0.133333,-0.216667,-0.283333,-0.350000,-0.666667,-1.066667,-1.466667,365,31,12
2019-12-31 23:00:00+01:00,Occitanie,2019-12-31 23:00:00+01:00,10984.0,5.000000,100.000000,2.116667,7.62,3.235897,3,True,False,...,-0.800000,-1.650000,-2.516667,-3.383333,-4.375000,-5.408333,-6.441667,365,31,12
2019-12-31 23:00:00+01:00,Pays de la Loire,2019-12-31 23:00:00+01:00,8133.0,7.525000,100.416667,3.158333,11.37,5.423246,3,True,False,...,0.100000,0.125000,0.125000,0.125000,-0.175000,-0.575000,-0.975000,365,31,12
2019-12-31 23:00:00+01:00,Provence-Alpes-Côte d'Azur,2019-12-31 23:00:00+01:00,12127.0,4.450000,26.250000,1.458333,5.25,3.361508,3,True,False,...,2.200000,1.225000,-0.808333,-2.841667,-4.425000,-5.858333,-7.291667,365,31,12


In [71]:
df['is_holiday']

Data - Heure               Région                    
2013-01-01 00:00:00+01:00  Auvergne-Rhône-Alpes          True
                           Bourgogne-Franche-Comté       True
                           Bretagne                      True
                           Centre-Val de Loire           True
                           Grand Est                     True
                                                         ... 
2019-12-31 23:00:00+01:00  Nouvelle-Aquitaine            True
                           Occitanie                     True
                           Pays de la Loire              True
                           Provence-Alpes-Côte d'Azur    True
                           Île-de-France                 True
Name: is_holiday, Length: 736128, dtype: bool

In [68]:
df.index.

MultiIndex([('2013-01-01 00:00:00+01:00',       'Auvergne-Rhône-Alpes'),
            ('2013-01-01 00:00:00+01:00',    'Bourgogne-Franche-Comté'),
            ('2013-01-01 00:00:00+01:00',                   'Bretagne'),
            ('2013-01-01 00:00:00+01:00',        'Centre-Val de Loire'),
            ('2013-01-01 00:00:00+01:00',                  'Grand Est'),
            ('2013-01-01 00:00:00+01:00',            'Hauts-de-France'),
            ('2013-01-01 00:00:00+01:00',                  'Normandie'),
            ('2013-01-01 00:00:00+01:00',         'Nouvelle-Aquitaine'),
            ('2013-01-01 00:00:00+01:00',                  'Occitanie'),
            ('2013-01-01 00:00:00+01:00',           'Pays de la Loire'),
            ...
            ('2019-12-31 23:00:00+01:00',                   'Bretagne'),
            ('2019-12-31 23:00:00+01:00',        'Centre-Val de Loire'),
            ('2019-12-31 23:00:00+01:00',                  'Grand Est'),
            ('2019-12-31 23:00:00+0

In [131]:
from datetime import datetime

def day_of_year(date_str):
    return date_str.timetuple().tm_yday

df['day_of_year'] = df.index.get_level_values(0).map(day_of_year)


In [134]:
df['day_of_year']

Data - Heure               Région                    
2013-01-01 00:00:00+01:00  Auvergne-Rhône-Alpes            1
                           Bourgogne-Franche-Comté         1
                           Bretagne                        1
                           Centre-Val de Loire             1
                           Grand Est                       1
                                                        ... 
2019-12-31 23:00:00+01:00  Nouvelle-Aquitaine            365
                           Occitanie                     365
                           Pays de la Loire              365
                           Provence-Alpes-Côte d'Azur    365
                           Île-de-France                 365
Name: day_of_year, Length: 736128, dtype: int64