## Sequential and Multi Task Models

In [96]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical

from sklearn.metrics import roc_curve,roc_auc_score, f1_score

# Seed for reproductibility
SEED = 123

import numpy.random
np.random.seed(SEED)

import random
random.seed(SEED)

# Remove warnings
import warnings
warnings.filterwarnings("ignore")

In [10]:
houses = [6,40,59,72,87,60]

In [11]:
def TrainTest(house_number,output_1,output_2):
    """
    output is either Demand, Binary_Consumption, Percentage_Consumption or Quantile_Consumption
    """
    X = pd.read_csv(f'Data/house_{house_number}_Features.csv',index_col=0)
    y = pd.read_csv(f'Data/house_{house_number}_Target.csv',index_col=0)
    X.index=pd.to_datetime(X.index)
    y.index=pd.to_datetime(y.index)
    
    X_train = X.loc[:'2018-02']
    X_test = X.loc['2018-03':]
    
    y_train_1 = y.loc[:'2018-02',[output_1]]
    y_test_1 = y.loc['2018-03':,[output_1]]
    
    y_train_2 = y.loc[:'2018-02',[output_2]]
    y_test_2 = y.loc['2018-03':,[output_2]]
        
    return X_train, y_train_1, y_train_2, X_test, y_test_1, y_test_2

### Table 7

**RF Binary-RF**

In [68]:
for idx, house in enumerate(houses):
    # Train classifier
    X_train, y_train_1, y_train_2, X_test, y_test_1, y_test_2 = TrainTest(house,
                                                                      'Binary_Consumption',
                                                                      'Demand')
    
    clf_consumption = RandomForestClassifier(n_estimators=500, max_depth=9)
    
    clf_consumption.fit(X_train,y_train_1)
    
    # Train regressor 
    reg_demand = RandomForestRegressor(n_estimators=500, max_depth=9)
    
    reg_demand.fit(pd.concat([X_train,y_train_1],axis=1),
                   y_train_2)
    
    # Test 
    y_preds_consumption = clf_consumption.predict(X_test)
    
    y_preds_demand = reg_demand.predict(pd.concat([X_test,pd.Series(y_preds_consumption,name='Pred_Conso',index=X_test.index)],axis=1))
    
    # R-coef
    R_coef = np.corrcoef(y_test_2.values.reshape(-1),
            y_preds_demand.reshape(-1))[1,0]
    
    # RMSE
    RMSE = np.sqrt(mean_squared_error(y_test_2.values.reshape(-1),
                           y_preds_demand.reshape(-1)))
    print(30*'-')
    print(f'House {house} - id {idx+1}')
    print(30*'-')
    print(f'\tR : {R_coef}')
    print(f'\tRMSE : {RMSE}')
    print()

------------------------------
House 6 - id 1
------------------------------
	R : 0.6215736575406036
	RMSE : 7.288189569663408

------------------------------
House 40 - id 2
------------------------------
	R : 0.5322331899100274
	RMSE : 10.312672218449688

------------------------------
House 59 - id 3
------------------------------
	R : 0.6559850128940404
	RMSE : 15.46158591732804

------------------------------
House 72 - id 4
------------------------------
	R : 0.6943128726276151
	RMSE : 14.807390143932928

------------------------------
House 87 - id 5
------------------------------
	R : 0.5594981550411345
	RMSE : 11.010376990105318

------------------------------
House 60 - id 6
------------------------------
	R : 0.1738099877891489
	RMSE : 2.428861917855822



**MLP binary-MLP**

In [97]:
for idx, house in enumerate(houses):
    X_train, y_train_1, y_train_2, X_test, y_test_1, y_test_2 = TrainTest(house,
                                                                      'Binary_Consumption',
                                                                      'Demand')
    # Sequential 
    # Model predicts if consumption or not :
    
    inputs = Input(shape=(19,),name='inputs') # All features
    x = Dense(16, activation='relu')(inputs)
    auxiliary_output = Dense(1, activation='sigmoid',name='aux_output')(x)
    
    # Model predicts the amount of hot water :
    
    auxiliary_input = Input(shape=(19,), name='aux_input')
    x = tf.keras.layers.concatenate([auxiliary_output, auxiliary_input])
    
    # We stack a deep densely-connected network on top
    x = Dense(32, activation='relu')(x)
    main_output = Dense(1, activation='relu', name='main_output')(x)
    
    #create the model
    model = Model(inputs=[inputs, auxiliary_input], outputs=[main_output, auxiliary_output])
    
    #compile
    model.compile(optimizer='adam',
                  loss={'main_output':'mse','aux_output':'binary_crossentropy'},
                  metrics={'main_output':'mse','aux_output':'accuracy'})
    #fit 
    log_seq = model.fit({'inputs': X_train, 'aux_input': X_train},
              {'main_output': y_train_2, 'aux_output': y_train_1},
              epochs=32, batch_size=16,verbose=0)
    
    y_preds =  model.predict({'inputs': X_test, 'aux_input': X_test})[0]
    
    # R-coef
    R_coef = np.corrcoef(y_test_2.values.reshape(-1),
            y_preds.reshape(-1))[1,0]
    
    # RMSE
    RMSE = np.sqrt(mean_squared_error(y_test_2.values.reshape(-1),
                           y_preds.reshape(-1)))
    print(30*'-')
    print(f'House {house} - id {idx+1}')
    print(30*'-')
    print(f'\tR : {R_coef}')
    print(f'\tRMSE : {RMSE}')
    print()

------------------------------
House 6 - id 1
------------------------------
	R : 0.6394499061959444
	RMSE : 6.9292404951360975

------------------------------
House 40 - id 2
------------------------------
	R : 0.5839536521376998
	RMSE : 9.468639039277857

------------------------------
House 59 - id 3
------------------------------
	R : 0.6452757187362995
	RMSE : 15.701645305386421

------------------------------
House 72 - id 4
------------------------------
	R : 0.6546827590211344
	RMSE : 15.277917480953228

------------------------------
House 87 - id 5
------------------------------
	R : 0.5950128531142916
	RMSE : 9.921838545560057

------------------------------
House 60 - id 6
------------------------------
	R : 0.3093744248706635
	RMSE : 2.3076558137682133



**RF percentage-MLP**

In [100]:
for idx, house in enumerate(houses):
    # For idx, house in enumerate(houses):
    # Train classifier
    X_train, y_train_1, y_train_2, X_test, y_test_1, y_test_2 = TrainTest(house,
                                                                      'Percentage_Consumption',
                                                                      'Demand')
    
    clf_consumption = RandomForestClassifier(n_estimators=500, max_depth=9)
    
    clf_consumption.fit(X_train,y_train_1)
    
    # Train regression - Model predicts the amount of hot water :
    X_train_tot = pd.concat([X_train,y_train_1],axis=1) # Concatenate 
    
    # Create the architecture of the model 
    inputs = Input(shape=(20,), name='inputs')
    x = Dense(16, activation='relu')(inputs)
    demand = Dense(1, activation='relu',name='demand')(x)
    
    # Create the model
    model = Model(inputs=[inputs], outputs=[demand])
    
    # Compile
    model.compile(optimizer='adam',
                  loss={'demand':'mse'},
                  metrics={'demand':'mse'})
    # Fit 
    log_seq = model.fit({'inputs': X_train_tot},{'demand': y_train_2},
              epochs=64, batch_size=16,verbose=0)
    
    # Test
    # First predict the category of consumption 
    category_consumption_preds = clf_consumption.predict(X_test)
    # Then 
    y_preds = model.predict(pd.concat([X_test,pd.Series(category_consumption_preds,index=y_test_2.index)],axis=1))
    
    # R-coef
    R_coef = np.corrcoef(y_test_2.values.reshape(-1),
            y_preds.reshape(-1))[1,0]
    
    # RMSE
    RMSE = np.sqrt(mean_squared_error(y_test_2.values.reshape(-1),
                           y_preds.reshape(-1)))
    print(30*'-')
    print(f'House {house} - id {idx+1}')
    print(30*'-')
    print(f'\tR : {R_coef}')
    print(f'\tRMSE : {RMSE}')
    print()

------------------------------
House 6 - id 1
------------------------------
	R : 0.6412027972494814
	RMSE : 7.3004506900171995

------------------------------
House 40 - id 2
------------------------------
	R : 0.45853448606897595
	RMSE : 10.972801152512384

------------------------------
House 59 - id 3
------------------------------
	R : 0.5273359162160591
	RMSE : 18.774852432218182

------------------------------
House 72 - id 4
------------------------------
	R : 0.6623428509402094
	RMSE : 15.54161424784505

------------------------------
House 87 - id 5
------------------------------
	R : 0.41686424805684974
	RMSE : 12.458388487395474

------------------------------
House 60 - id 6
------------------------------
	R : 0.0035871379582801954
	RMSE : 2.4376873173869402



**RF quartile-MLP**

In [99]:
for idx, house in enumerate(houses):
    # Train classifier
    X_train, y_train_1, y_train_2, X_test, y_test_1, y_test_2 = TrainTest(house,
                                                                      'Quantile_Consumption',
                                                                      'Demand')
    
    clf_consumption = RandomForestClassifier(n_estimators=500, max_depth=9)
    
    clf_consumption.fit(X_train,y_train_1)
    
    # Train regression - Model predicts the amount of hot water :
    X_train_tot = pd.concat([X_train,y_train_1],axis=1) # Concatenate 
    
    # Create the architecture of the model 
    inputs = Input(shape=(20,), name='inputs')
    x = Dense(16, activation='relu')(inputs)
    demand = Dense(1, activation='relu',name='demand')(x)
    
    # Create the model
    model = Model(inputs=[inputs], outputs=[demand])
    
    # Compile
    model.compile(optimizer='adam',
                  loss={'demand':'mse'},
                  metrics={'demand':'mse'})
    # Fit 
    log_seq = model.fit({'inputs': X_train_tot},{'demand': y_train_2},
              epochs=32, batch_size=16,verbose=0)
    
    # Test
    # First predict the category of consumption 
    category_consumption_preds = clf_consumption.predict(X_test)
    # Then 
    y_preds = model.predict(pd.concat([X_test,pd.Series(category_consumption_preds,index=y_test_2.index)],axis=1))
    
    # R-coef
    R_coef = np.corrcoef(y_test_2.values.reshape(-1),
            y_preds.reshape(-1))[1,0]
    
    # RMSE
    RMSE = np.sqrt(mean_squared_error(y_test_2.values.reshape(-1),
                           y_preds.reshape(-1)))
    print(30*'-')
    print(f'House {house} - id {idx+1}')
    print(30*'-')
    print(f'\tR : {R_coef}')
    print(f'\tRMSE : {RMSE}')
    print()

------------------------------
House 6 - id 1
------------------------------
	R : 0.6357659387664808
	RMSE : 7.367085313056188

------------------------------
House 40 - id 2
------------------------------
	R : 0.3821138548872737
	RMSE : 11.182392546042383

------------------------------
House 59 - id 3
------------------------------
	R : 0.5781956321745817
	RMSE : 17.096189767111845

------------------------------
House 72 - id 4
------------------------------
	R : 0.684815665197323
	RMSE : 15.161331988754393

------------------------------
House 87 - id 5
------------------------------
	R : 0.4641494797402655
	RMSE : 12.502290993134856

------------------------------
House 60 - id 6
------------------------------
	R : 0.04200483918264847
	RMSE : 2.4225477667196826



### Table 8

**RF parallel**

In [107]:
for idx, house in enumerate(houses):
    X_train, y_train_1, y_train_2, X_test, y_test_1, y_test_2 = TrainTest(house,
                                                                      'Binary_Consumption',
                                                                      'Demand')
    # Train the model
    clf_consumption = RandomForestClassifier(n_estimators=500, max_depth=9)
    clf_consumption.fit(X_train,y_train_1)
    
    reg_demand = RandomForestRegressor(n_estimators=500, max_depth=9)
    reg_demand.fit(pd.concat([X_train,y_train_1],axis=1),y_train_2)
    
    # Test
    y_preds_consumption = clf_consumption.predict(X_test)
    
    y_preds = reg_demand.predict(pd.concat([X_test,pd.Series(y_preds_consumption,index=y_test_2.index)],axis=1))
    
    # R-coef
    R_coef = np.corrcoef(y_test_2.values.reshape(-1),
            y_preds.reshape(-1))[1,0]
    
    # RMSE
    RMSE = np.sqrt(mean_squared_error(y_test_2.values.reshape(-1),
                           y_preds.reshape(-1)))
    print(30*'-')
    print(f'House {house} - id {idx+1}')
    print(30*'-')
    print(f'\tR : {R_coef}')
    print(f'\tRMSE : {RMSE}')
    print()

------------------------------
House 6 - id 1
------------------------------
	R : 0.6173648619053879
	RMSE : 7.325053021806932

------------------------------
House 40 - id 2
------------------------------
	R : 0.5184943428183983
	RMSE : 10.454855721486247

------------------------------
House 59 - id 3
------------------------------
	R : 0.6603522762464819
	RMSE : 15.388267488798718

------------------------------
House 72 - id 4
------------------------------
	R : 0.6992080698696164
	RMSE : 14.719558911754726

------------------------------
House 87 - id 5
------------------------------
	R : 0.5653425228622144
	RMSE : 10.968309942560502

------------------------------
House 60 - id 6
------------------------------
	R : 0.16463123203588148
	RMSE : 2.4233520157850807



**MLP parallel**

In [45]:
for idx, house in enumerate(houses):
    X_train, y_train_1, y_train_2, X_test, y_test_1, y_test_2 = TrainTest(house,
                                                                      'Binary_Consumption',
                                                                      'Demand')
    
    inputs = Input(shape=(19,),name='inputs') # All features
    x = Dense(32, activation='relu')(inputs)
    x = Dense(16, activation='relu')(x)
    demand = Dense(1, activation='relu', name='Demand')(x)
    binary_consumption = Dense(1, activation='sigmoid',name='Binary_Consumption')(x)
    
    
    model = Model(inputs=[inputs], outputs=[demand, binary_consumption])
    # Compile
    model.compile(optimizer='adam',
                  loss={'Demand':'MSE','Binary_Consumption':'binary_crossentropy'},
                  loss_weights=[1, 0.7])
    # Fit
    log_multi = model.fit({'inputs': X_train},
              {'Demand': y_train_2, 'Binary_Consumption': y_train_1},
              epochs=128, batch_size=16,verbose=0)
    
    y_preds =  model.predict({'inputs': X_test, 'aux_input': X_test})[0]
    
    # R-coef
    R_coef = np.corrcoef(y_test_2.values.reshape(-1),
            y_preds.reshape(-1))[1,0]
    
    # RMSE
    RMSE = np.sqrt(mean_squared_error(y_test_2.values.reshape(-1),
                           y_preds.reshape(-1)))
    print(30*'-')
    print(f'House {house} - id {idx+1}')
    print(30*'-')
    print(f'\tR : {R_coef}')
    print(f'\tRMSE : {RMSE}')
    print()

------------------------------
House 6 - id 7
------------------------------
	R : 0.6459926265327762
	RMSE : 6.917757254553661

------------------------------
House 40 - id 41
------------------------------
	R : 0.5754093227407895
	RMSE : 9.602886743969442

------------------------------
House 59 - id 60
------------------------------
	R : 0.59985695368011
	RMSE : 16.736462717427564

------------------------------
House 72 - id 73
------------------------------
	R : 0.6022464082310576
	RMSE : 16.708639188352624

------------------------------
House 87 - id 88
------------------------------
	R : 0.501394948984013
	RMSE : 11.205566666573443

------------------------------
House 60 - id 61
------------------------------
	R : 0.1543919493125996
	RMSE : 2.4169989860839083

