## NEW

In [1]:
import numpy as np 
import pandas as pd 

import matplotlib.pyplot as plt
from pathlib import Path
import os
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn import preprocessing, tree
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report, f1_score

In [2]:
df_light=pd.read_csv('Data/656A/light.csv')
df_temp=pd.read_csv('Data/656A/temperature.csv')
df_co2=pd.read_csv('Data/656A/co2.csv')
df_pir=pd.read_csv('Data/656A/pir.csv')
df_hum=pd.read_csv('Data/656A/humidity.csv')

In [3]:
df_light['1377299095']=df_light['1377299095'].astype(str)
df_temp['1377299095']=df_temp['1377299095'].astype(str)
df_hum['1377299095']=df_hum['1377299095'].astype(str)
df_co2['1377299095']=df_co2['1377299095'].astype(str)
df_pir['1377299096']=df_pir['1377299096'].astype(str)


#merge light, temperature, and humidity dfs
df656lt = pd.merge(df_light, df_temp, on='1377299095')
df656lth = pd.merge(df656lt, df_hum, on='1377299095')
df656lthco2 = df656lth.combine_first(df_co2)
df = df656lthco2.combine_first(df_pir)

df.rename(columns = {"1377299095": "day",
                          " 177.00": "lights", " 24.37": 'temp',
                          " 49.90": "humidity", " 578.00": "co2",
                          " 27.00": "pir"},  
           inplace = True) 
df=df.fillna(0)
df['Occupied_Room'] = np.where(df['pir']!= 0, True, False)

In [4]:
traindf=df[['co2', 'humidity', 'lights', 'temp']]
X = traindf.values[:,:]
Y = df['Occupied_Room'].values
X_train, X_test, y_train, y_test = train_test_split( 
          X, Y, test_size = 0.2)

In [5]:
# train a decision-tree algorithm to make predictions 

classifier = DecisionTreeClassifier()
r = classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

       False       0.99      0.99      0.99     23474
        True       0.93      0.92      0.93      2708

    accuracy                           0.98     26182
   macro avg       0.96      0.96      0.96     26182
weighted avg       0.98      0.98      0.98     26182



In [12]:
r = tree.export_text(classifier, feature_names=['co2', 'humidity', 'lights', 'temp'])
print(r)

|--- co2 <= 438.50
|   |--- temp <= 24.95
|   |   |--- humidity <= 58.57
|   |   |   |--- humidity <= 53.99
|   |   |   |   |--- humidity <= 53.97
|   |   |   |   |   |--- class: False
|   |   |   |   |--- humidity >  53.97
|   |   |   |   |   |--- temp <= 22.53
|   |   |   |   |   |   |--- co2 <= 417.00
|   |   |   |   |   |   |   |--- lights <= 3.50
|   |   |   |   |   |   |   |   |--- class: True
|   |   |   |   |   |   |   |--- lights >  3.50
|   |   |   |   |   |   |   |   |--- lights <= 4.50
|   |   |   |   |   |   |   |   |   |--- class: False
|   |   |   |   |   |   |   |   |--- lights >  4.50
|   |   |   |   |   |   |   |   |   |--- co2 <= 414.00
|   |   |   |   |   |   |   |   |   |   |--- class: False
|   |   |   |   |   |   |   |   |   |--- co2 >  414.00
|   |   |   |   |   |   |   |   |   |   |--- lights <= 5.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   |   |--- lights >  5.50
|   |   |   |   |   |   | 

## Export

In [16]:
import pickle
# save the classifier
with open('../Server/classifier.pkl', 'wb') as fid:
    pickle.dump(classifier, fid)    


In [23]:
# test loading
# load it again
with open('../Server/classifier.pkl', 'rb') as fid:
    classifier_loaded = pickle.load(fid)

print(classification_report(y_train, classifier_loaded.predict(X_train)))

              precision    recall  f1-score   support

       False       1.00      1.00      1.00     93985
        True       1.00      0.98      0.99     10740

    accuracy                           1.00    104725
   macro avg       1.00      0.99      0.99    104725
weighted avg       1.00      1.00      1.00    104725

