# [Room Occupancy] Ensemble

Note: the current ensembling technique does not make a lot of sense, as multiplying any boolean score by the weight and adding will always favor 1 or the score of the higher weighted model. However, this can be considered as a placeholder for later development - or the forcasting (yhat) result could be used in the classifer as input variable.

### Prepare Workspace

In [1]:
import sys
import pickle
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_fscore_support

from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation, performance_metrics
from fbprophet.plot import plot_cross_validation_metric

from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

sys.path.append('../code')
import prepare as pr

ERROR:fbprophet:Importing plotly failed. Interactive plots will not work.


In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [3]:
data_dir = 'C:/Users/makayser/Desktop/wattx_local/'
fn_prepared = 'prepared.csv'

### Prepare Data

In [4]:
df = pd.read_csv(data_dir + fn_prepared)
df['timestamp'] = pd.to_datetime(df['timestamp'])
tuples = list(zip(df['device'],df['timestamp']))
df.index = pd.MultiIndex.from_tuples(tuples, names=['device', 'timestamp'])

In [5]:
df.dtypes

device                       int64
timestamp           datetime64[ns]
device.1                     int64
device_activated             int64
timestamp.1                 object
occupied                     int64
weekday                      int64
timeperiod                   int64
time                        object
dtype: object

In [6]:
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,device,timestamp,device.1,device_activated,timestamp.1,occupied,weekday,timeperiod,time
device,timestamp,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,2016-07-25 16:00:00,1,2016-07-25 16:00:00,1,6,2016-07-25 16:00:00,1,1,4,16:00
1,2016-07-25 17:00:00,1,2016-07-25 17:00:00,1,0,2016-07-25 17:00:00,0,1,4,17:00
1,2016-07-25 18:00:00,1,2016-07-25 18:00:00,1,0,2016-07-25 18:00:00,0,1,4,18:00
1,2016-07-25 19:00:00,1,2016-07-25 19:00:00,1,0,2016-07-25 19:00:00,0,1,4,19:00
1,2016-07-25 20:00:00,1,2016-07-25 20:00:00,1,0,2016-07-25 20:00:00,0,1,0,20:00


In [7]:
with open('../assets/classify.ml','rb') as fn:
    m_classify = pickle.load(fn)

## Ensemble (Scoring)

In [8]:
def validate(train, test, f_target='occupied'):
    # FORECASTING
    _data_train = pr.format_for_prophet(train[f_target])
    _data_test = pr.format_for_prophet(test[f_target])
    m = Prophet(yearly_seasonality=False).fit(_data_train)
    future = m.make_future_dataframe(periods=24, freq='H')
    future['floor'] = 0
    fcst = m.predict(future)
    print(fcst['yhat'].tail(24).values)
    y_hat = fcst['yhat'].tail(24)
    f_pred = y_hat.apply(lambda x: pr.translate_score(x)).values
    y_true = _data_test['y'].head(24).apply(lambda x: pr.translate_score(x)).values
    precision, re, fs, _ = precision_recall_fscore_support(y_true, f_pred, average='weighted')
    print(f'[INFO] Forecasting Results \n\t Precision:\t {precision}')
    print(f'\t Recall:\t {re}')
    print(f'\t F1 Score:\t {fs}')
    
    # CLASSIFICATION
    #     test['device'] = test['device'].apply(dton)
    y_pred = m_classify.predict(test[['device','weekday','timeperiod']].head(24))
    c_pred = [round(value) for value in y_pred]
    precision, re, fs, _ = precision_recall_fscore_support(y_true, c_pred, average='weighted')
    print(f'[INFO] Classification Results \n\t Precision:\t {precision}')
    print(f'\t Recall:\t {re}')
    print(f'\t F1 Score:\t {fs}')
    
    # ENSEMBLE
    res = (y_hat*0.4) + (y_pred*0.6)
    res = [round(value) for value in res]
    precision, re, fs, _ = precision_recall_fscore_support(y_true, res, average='weighted')
    print(f'[INFO] Ensemble Results \n\t Precision:\t {precision}')
    print(f'\t Recall:\t {re}')
    print(f'\t F1 Score:\t {fs}')
    

In [9]:
tr, te = pr.split(df[df.index.get_level_values('device') == 1])
validate(tr, te)

[-0.26379966 -0.24177035 -0.24290746 -0.27702511 -0.29845407 -0.2376419
 -0.06882412  0.15296754  0.32780534  0.39188998  0.36913326  0.34338003
  0.37588341  0.44544055  0.46783501  0.37619177  0.18658012 -0.01165301
 -0.12951668 -0.14613523 -0.11080389 -0.08533571 -0.08800502 -0.0920123 ]
[INFO] Forecasting Results 
	 Precision:	 1.0
	 Recall:	 1.0
	 F1 Score:	 1.0
[INFO] Classification Results 
	 Precision:	 1.0
	 Recall:	 1.0
	 F1 Score:	 1.0
[INFO] Ensemble Results 
	 Precision:	 1.0
	 Recall:	 1.0
	 F1 Score:	 1.0


In [10]:
tr, te = pr.split(df[df.index.get_level_values('device') == 2])
validate(tr, te)

[0.79156057 0.85147725 0.8277577  0.69475825 0.49522292 0.3093619
 0.19890114 0.1721439  0.19315684 0.21863023 0.22688379 0.21918624
 0.20394209 0.18751831 0.1806071  0.20564278 0.28671106 0.42427837
 0.57992703 0.69371528 0.72707398 0.69547053 0.65816226 0.66853114]
[INFO] Forecasting Results 
	 Precision:	 0.9017857142857143
	 Recall:	 0.875
	 F1 Score:	 0.8743478260869565
[INFO] Classification Results 
	 Precision:	 0.8777777777777778
	 Recall:	 0.8333333333333334
	 F1 Score:	 0.8310023310023308
[INFO] Ensemble Results 
	 Precision:	 0.8777777777777778
	 Recall:	 0.8333333333333334
	 F1 Score:	 0.8310023310023308


In [11]:
tr, te = pr.split(df[df.index.get_level_values('device') == 3])
validate(tr, te, f_target='device_activated')

[ 1.44120814  1.51919751  1.60303376  1.57408552  1.33749957  0.93605394
  0.53784187  0.30322331  0.25570125  0.27848834  0.23631146  0.1015712
 -0.035953   -0.07777201 -0.02766546  0.01048281 -0.05391524 -0.19056681
 -0.26007926 -0.13951575  0.15918333  0.49417922  0.71757988  0.79693777]
[INFO] Forecasting Results 
	 Precision:	 0.8333333333333334
	 Recall:	 0.8333333333333334
	 F1 Score:	 0.8333333333333334
[INFO] Classification Results 
	 Precision:	 0.8684210526315791
	 Recall:	 0.8333333333333334
	 F1 Score:	 0.8193277310924371
[INFO] Ensemble Results 
	 Precision:	 0.8684210526315791
	 Recall:	 0.8333333333333334
	 F1 Score:	 0.8193277310924371


In [12]:
tr, te = pr.split(df[df.index.get_level_values('device') == 3])
validate(tr, te)

[ 0.37513132  0.38206705  0.3814342   0.34578847  0.26869091  0.17678134
  0.10854487  0.08082485  0.07632133  0.06397519  0.03105776 -0.00557655
 -0.01987635 -0.00672515  0.01039742  0.00359478 -0.02722873 -0.04770027
 -0.01723022  0.07315136  0.18937352  0.2808273   0.32028171  0.32033356]
[INFO] Forecasting Results 
	 Precision:	 0.390625
	 Recall:	 0.625
	 F1 Score:	 0.4807692307692308
[INFO] Classification Results 
	 Precision:	 0.8684210526315791
	 Recall:	 0.8333333333333334
	 F1 Score:	 0.8193277310924371
[INFO] Ensemble Results 
	 Precision:	 0.8684210526315791
	 Recall:	 0.8333333333333334
	 F1 Score:	 0.8193277310924371


  'precision', 'predicted', average, warn_for)


In [13]:
tr, te = pr.split(df[df.index.get_level_values('device') == 4])
validate(tr, te)

[ 0.39070401  0.39309331  0.3054347   0.14619752 -0.0153565  -0.11597824
 -0.14452585 -0.13799232 -0.13803514 -0.15421311 -0.16780585 -0.16519092
 -0.15961294 -0.17533315 -0.21154927 -0.22859274 -0.17801876 -0.05155746
  0.1002044   0.20303625  0.21963652  0.1796626   0.14957099  0.16895148]
[INFO] Forecasting Results 
	 Precision:	 0.9184027777777778
	 Recall:	 0.9583333333333334
	 F1 Score:	 0.9379432624113475
[INFO] Classification Results 
	 Precision:	 0.9147727272727274
	 Recall:	 0.875
	 F1 Score:	 0.8944444444444444
[INFO] Ensemble Results 
	 Precision:	 0.9147727272727274
	 Recall:	 0.875
	 F1 Score:	 0.8944444444444444


  'precision', 'predicted', average, warn_for)


In [14]:
tr, te = pr.split(df[df.index.get_level_values('device') == 5])
validate(tr, te)

[0.56904457 0.59825301 0.63577638 0.61877424 0.5151761  0.35586293
 0.20957547 0.12652508 0.10398444 0.10232478 0.08977471 0.06897739
 0.06166958 0.07351054 0.0813663  0.0595736  0.01736394 0.0039155
 0.06853491 0.21084654 0.3707423  0.47265106 0.48623289 0.44759095]
[INFO] Forecasting Results 
	 Precision:	 1.0
	 Recall:	 0.7916666666666666
	 F1 Score:	 0.8837209302325583
[INFO] Classification Results 
	 Precision:	 1.0
	 Recall:	 0.8333333333333334
	 F1 Score:	 0.9090909090909091
[INFO] Ensemble Results 
	 Precision:	 1.0
	 Recall:	 0.8333333333333334
	 F1 Score:	 0.9090909090909091


  'recall', 'true', average, warn_for)


In [15]:
tr, te = pr.split(df[df.index.get_level_values('device') == 6])
validate(tr, te)

[0.86372542 0.86528483 0.80633218 0.68130776 0.50592609 0.31922787
 0.17323624 0.10728305 0.12110886 0.17016445 0.19405178 0.16158051
 0.09772432 0.06810803 0.12990553 0.28648175 0.4814278  0.63633476
 0.702268   0.68627152 0.63691564 0.60546496 0.61397684 0.64944209]
[INFO] Forecasting Results 
	 Precision:	 0.9166666666666666
	 Recall:	 0.9166666666666666
	 F1 Score:	 0.9166666666666666
[INFO] Classification Results 
	 Precision:	 0.8776223776223775
	 Recall:	 0.875
	 F1 Score:	 0.8747826086956522
[INFO] Ensemble Results 
	 Precision:	 0.8776223776223775
	 Recall:	 0.875
	 F1 Score:	 0.8747826086956522


In [16]:
tr, te = pr.split(df[df.index.get_level_values('device') == 7])
validate(tr, te)

[-0.01557256 -0.00099137 -0.00345905 -0.01769576 -0.02413393 -0.0132791
  0.00306858  0.00551728 -0.00934719 -0.0215157  -0.00579518  0.04179592
  0.09679545  0.12729741  0.12374281  0.10806715  0.11210842  0.14537172
  0.18347274  0.18926267  0.14722463  0.07877909  0.0242795   0.00901626]
[INFO] Forecasting Results 
	 Precision:	 1.0
	 Recall:	 1.0
	 F1 Score:	 1.0
[INFO] Classification Results 
	 Precision:	 1.0
	 Recall:	 1.0
	 F1 Score:	 1.0
[INFO] Ensemble Results 
	 Precision:	 1.0
	 Recall:	 1.0
	 F1 Score:	 1.0
