## Classification Methods

Description : 

This notebook presents the classification methods used in the paper to classify either if there is a consumption (0,1) or the category of consumption (0,1,2) based on the percentage or quantile method.

In [145]:
import pandas as pd
import numpy as np

from helpers import * 

# Remove warnings
import warnings
warnings.filterwarnings("ignore")

### Table 5

In [146]:
houses = [6,40,59,72,87,60] # ID of house selected in the EWHDataset.csv file, respectively called 1-6 in the paper

#### Percentages

In [158]:
for idx, house in enumerate(houses):
    
    y = pd.read_csv(f'Data/house_{house}_Target.csv',index_col=0)
    y.index = pd.to_datetime(y.index)

    y_test = y.loc['2018-03':]
    
    print(30*'-')
    print(f'House {house} - id {idx+1}')
    print(30*'-')

    print(round(y_test['Percentage_Consumption'].value_counts()/y_test['Percentage_Consumption'].count(),3)*100)

------------------------------
House 6 - id 1
------------------------------
0    77.2
1    15.1
2     7.6
Name: Percentage_Consumption, dtype: float64
------------------------------
House 40 - id 2
------------------------------
0    85.8
2     7.9
1     6.2
Name: Percentage_Consumption, dtype: float64
------------------------------
House 59 - id 3
------------------------------
0    72.5
1    21.7
2     5.8
Name: Percentage_Consumption, dtype: float64
------------------------------
House 72 - id 4
------------------------------
0    79.3
1    14.2
2     6.5
Name: Percentage_Consumption, dtype: float64
------------------------------
House 87 - id 5
------------------------------
0    75.1
1    16.5
2     8.3
Name: Percentage_Consumption, dtype: float64
------------------------------
House 60 - id 6
------------------------------
0    87.8
2     6.8
1     5.4
Name: Percentage_Consumption, dtype: float64


#### Quartiles

In [159]:
for idx, house in enumerate(houses):
    
    y = pd.read_csv(f'Data/house_{house}_Target.csv',index_col=0)
    y.index = pd.to_datetime(y.index)

    y_test = y.loc['2018-03':]
    
    print(30*'-')
    print(f'House {house} - id {idx+1}')
    print(30*'-')

    print(round(y_test['Quantile_Consumption'].value_counts()/y_test['Quantile_Consumption'].count(),3)*100)

------------------------------
House 6 - id 1
------------------------------
0    78.1
1    17.1
2     4.9
Name: Quantile_Consumption, dtype: float64
------------------------------
House 40 - id 2
------------------------------
0    85.3
1    10.3
2     4.4
Name: Quantile_Consumption, dtype: float64
------------------------------
House 59 - id 3
------------------------------
0    66.9
1    22.8
2    10.3
Name: Quantile_Consumption, dtype: float64
------------------------------
House 72 - id 4
------------------------------
0    78.8
1    14.4
2     6.8
Name: Quantile_Consumption, dtype: float64
------------------------------
House 87 - id 5
------------------------------
0    70.8
1    18.9
2    10.3
Name: Quantile_Consumption, dtype: float64
------------------------------
House 60 - id 6
------------------------------
0    96.2
1     2.4
2     1.4
Name: Quantile_Consumption, dtype: float64


### Table 6

In [149]:
def TrainTest(house_number,output='Demand'):
    """
    output is either Demand, Binary_Consumption, Percentage_Consumption or Quantile_Consumption
    """
    X = pd.read_csv(f'Data/house_{house_number}_Features.csv',index_col=0)
    y = pd.read_csv(f'Data/house_{house_number}_Target.csv',index_col=0)
    X.index=pd.to_datetime(X.index)
    y.index=pd.to_datetime(y.index)
    
    X_train = X.loc[:'2018-02']
    X_test = X.loc['2018-03':]
    
    y_train = y.loc[:'2018-02',[output]]
    y_test = y.loc['2018-03':,[output]]
        
    return X_train,y_train,X_test,y_test

#### Random Forest - Percentage

In [150]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

SEED = 123

In [165]:
for idx, house in enumerate(houses):
    
    X_train,y_train,X_test,y_test = TrainTest(house,output='Percentage_Consumption')

    clf = RandomForestClassifier(n_estimators = 500, 
                            max_depth=9,
                            random_state=SEED)

    clf.fit(X_train,y_train)

    y_preds = clf.predict(X_test)
    
    # R-coef
    F1_score = f1_score(y_test,y_preds,average='macro')

    print(30*'-')
    print(f'House {house} - id {idx+1}')
    print(30*'-')
    print(f'\tF1 Score : {F1_score}')
    print()

------------------------------
House 6 - id 1
------------------------------
	F1 Score : 0.48536761242617116

------------------------------
House 40 - id 2
------------------------------
	F1 Score : 0.5280800390269212

------------------------------
House 59 - id 3
------------------------------
	F1 Score : 0.6079345958129274

------------------------------
House 72 - id 4
------------------------------
	F1 Score : 0.5936541085756498

------------------------------
House 87 - id 5
------------------------------
	F1 Score : 0.5820836570836571

------------------------------
House 60 - id 6
------------------------------
	F1 Score : 0.40767396446318904



#### Random Forest - Quantile

In [166]:
for idx, house in enumerate(houses):
    
    X_train,y_train,X_test,y_test = TrainTest(house,output='Quantile_Consumption')

    clf = RandomForestClassifier(n_estimators = 200, 
                            max_depth=2,
                            random_state=SEED)

    clf.fit(X_train,y_train)

    y_preds = clf.predict(X_test)
    
    # F1_score
    F1_score = f1_score(y_test,y_preds,average='macro')

    print(30*'-')
    print(f'House {house} - id {idx+1}')
    print(30*'-')
    print(f'\tF1 Score : {F1_score}')
    print()

------------------------------
House 6 - id 1
------------------------------
	F1 Score : 0.4685188108392529

------------------------------
House 40 - id 2
------------------------------
	F1 Score : 0.30684657671164417

------------------------------
House 59 - id 3
------------------------------
	F1 Score : 0.3172236101324564

------------------------------
House 72 - id 4
------------------------------
	F1 Score : 0.43994432003873385

------------------------------
House 87 - id 5
------------------------------
	F1 Score : 0.2764227642276423

------------------------------
House 60 - id 6
------------------------------
	F1 Score : 0.326963906581741



#### MLP - Percentage 

In [170]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

In [179]:
for idx, house in enumerate(houses):
    X_train,y_train,X_test,y_test = TrainTest(house,output='Percentage_Consumption')
    
    # Convert to one hot encodings
    one_hot_y_train = to_categorical(y_train)
    
    # define model
    inputs = Input(shape=(19,))

    # a layer instance is callable on a tensor, and returns a tensor
    x = Dense(16, activation='relu')(inputs)
    x = Dense(32, activation='relu')(x)
    predictions = Dense(3, activation='softmax')(x)
    
    # This creates a model that includes
    # the Input layer and one Dense layer
    model = Model(inputs=inputs, outputs=predictions)
    model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    
    model.fit(X_train, one_hot_y_train,epochs=128,verbose=False)
    
    y_preds = np.argmax(model.predict(X_test),axis=1)
    
    # F1_score
    F1_score = f1_score(y_test,y_preds,average='macro')

    print(30*'-')
    print(f'House {house} - id {idx+1}')
    print(30*'-')
    print(f'\tF1 Score : {F1_score}')
    print()

------------------------------
House 6 - id 1
------------------------------
	F1 Score : 0.5112978524743231

------------------------------
House 40 - id 2
------------------------------
	F1 Score : 0.5684393406840322

------------------------------
House 59 - id 3
------------------------------
	F1 Score : 0.6310943528084978

------------------------------
House 72 - id 4
------------------------------
	F1 Score : 0.5489329689382585

------------------------------
House 87 - id 5
------------------------------
	F1 Score : 0.5243652615531361

------------------------------
House 60 - id 6
------------------------------
	F1 Score : 0.43666994573338985



#### MLP - Quantile 

In [180]:
for idx, house in enumerate(houses):
    X_train,y_train,X_test,y_test = TrainTest(house,output='Quantile_Consumption')
    
    # Convert to one hot encodings
    one_hot_y_train = to_categorical(y_train)
    
    # define model
    inputs = Input(shape=(19,))

    # a layer instance is callable on a tensor, and returns a tensor
    x = Dense(16, activation='relu')(inputs)
    x = Dense(32, activation='relu')(x)
    predictions = Dense(3, activation='softmax')(x)
    
    # This creates a model that includes
    # the Input layer and one Dense layer
    model = Model(inputs=inputs, outputs=predictions)
    model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
    
    model.fit(X_train, one_hot_y_train,epochs=128,verbose=False)
    
    y_preds = np.argmax(model.predict(X_test),axis=1)
    
    # F1_score
    F1_score = f1_score(y_test,y_preds,average='macro')

    print(30*'-')
    print(f'House {house} - id {idx+1}')
    print(30*'-')
    print(f'\tF1 Score : {F1_score}')
    print()

------------------------------
House 6 - id 1
------------------------------
	F1 Score : 0.6019032358515274

------------------------------
House 40 - id 2
------------------------------
	F1 Score : 0.4950743636070381

------------------------------
House 59 - id 3
------------------------------
	F1 Score : 0.6137904710680778

------------------------------
House 72 - id 4
------------------------------
	F1 Score : 0.5710529353418589

------------------------------
House 87 - id 5
------------------------------
	F1 Score : 0.5676908509430053

------------------------------
House 60 - id 6
------------------------------
	F1 Score : 0.3267233238904627

