# Obtain weekday prototypes
The main objective of this task is to create every weekday prototypes. We want to model two types of days based on the consumption activity of each consumer type:
- **Active** day.
- **Inactive** day.

Thus, for each consumer type, we'll get 14 day prototypes (7 days * 2 types of days).

In [1]:
CONS_PATH = '../data/'

In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from typing import Tuple

In [3]:
raw = pd.read_pickle(CONS_PATH + 'consumptions.zip')
raw

Unnamed: 0_level_0,building_id,weekday,active,type,consumptions
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-07-27,27,2,True,2,"[17.0, 19.0, 18.35079465, 35.84631282, 47.8462..."
2011-07-28,27,3,True,2,"[18.88870418, 18.80300889, 18.84589226, 35.845..."
2011-07-29,27,4,True,2,"[20.0, 21.0, 20.0, 37.78877899, 45.84570469, 5..."
2011-07-30,27,5,False,1,"[17.29811321, 17.0, 17.23969745, 17.84583302, ..."
2011-07-31,27,6,False,1,"[18.84559887, 17.15385255, 18.0, 18.0, 18.0, 2..."
...,...,...,...,...,...
2020-05-09,2233,5,False,1,"[9.50106059, 8.96931442, 8.60189324, 9.1773389..."
2020-05-11,2233,0,False,1,"[8.57166388, 10.30621946, 10.4596744, 12.97239..."
2020-05-12,2233,1,False,1,"[8.31569322, 9.21411531, 10.19029213, 12.96810..."
2020-05-13,2233,2,False,1,"[8.71920264, 9.09413051, 10.02307814, 11.23125..."


In [4]:
def getBoxPlot(df: pd.DataFrame):
    df[['h' + str(i) for i in range(24)]] = df.apply(lambda x: x[-1], axis=1, result_type='expand')
    df = df.drop(['building_id', 'weekday', 'active', 'type', 'consumptions'], axis=1)
    
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)

    df.plot(kind='box', figsize=(12,10), ax=ax)
    df.mean().plot(ax=ax, color='red')
    
    return fig

In [5]:
def get_prototype(df: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
    weekday = df['weekday'].iloc[0]
    active = df['active'].iloc[0]
    consumer_type = df['type'].iloc[0]
    
#     fig = getBoxPlot(df)
    
    mean, std = [], []
    for i in range(24):
        i_consumptions = []
        for j in range(df.shape[0]):
            i_consumptions.append(df['consumptions'].iloc[j][i])

        std.append(np.nanstd(i_consumptions))
        mean.append(np.nanmean(i_consumptions))
            
    return pd.DataFrame({'weekday': weekday, 'active': active, 'type': consumer_type, 'consumptions': [mean]}), pd.DataFrame({'weekday': weekday, 'active': active, 'type': consumer_type, 'consumptions': [std]})

In [6]:
mean_proto, std_proto = pd.DataFrame(), pd.DataFrame()
for d in range(0, 7):
    df = raw[raw['weekday'] == d]
    
    for a in (True, False):
        df_a = df[df['active'] == a]
        
        for t in df_a['type'].unique():
            df_t = df_a[df_a['type'] == t]
            
            mean, std = get_prototype(df_t)
            mean_proto = mean_proto.append(mean, ignore_index=True)
            std_proto = std_proto.append(std, ignore_index=True)

mean_proto

Unnamed: 0,weekday,active,type,consumptions
0,0,True,2,"[38.297283840311174, 50.3358496850258, 67.3876..."
1,0,True,1,"[11.001889049292503, 14.487953263394882, 17.83..."
2,0,True,0,"[2.1012870729867266, 2.4953317447056174, 2.907..."
3,0,False,1,"[21.868695459806606, 24.68378849828118, 27.236..."
4,0,False,0,"[2.1862116565256593, 2.63146236951444, 2.92304..."
5,1,True,2,"[38.85675383610426, 50.97112868400734, 68.3377..."
6,1,True,1,"[11.134594094896672, 14.524566938550246, 17.93..."
7,1,True,0,"[2.2343469702376284, 2.6159155327329535, 3.008..."
8,1,False,1,"[21.9559986566389, 24.80209455806773, 27.63495..."
9,1,False,0,"[2.231630536527288, 2.6820360293384935, 3.0027..."
