<a href="https://colab.research.google.com/github/nkinesis/soen471-project/blob/main/Comparison_DTRFBT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import pickle
import pandas as pd
import numpy as np
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score


In [11]:
#loading the dataset
df = pd.read_csv('../../preprocessing/data/london_clean.csv')
# df.dtypes

In [12]:
# load the data, do train test split (copy from the notebook)
X = df[['DateOfCall', 'PropertyType', 'NumPumpsAttending', 'PumpHoursRoundUp', 'mean_temp']]
y = df[['CostCat']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# print a small sample of train X and y
print(X_train[0:5])
print(y_train[0:5])


         DateOfCall  PropertyType  NumPumpsAttending  PumpHoursRoundUp  \
706975            5            12                  1                 1   
450954            9             6                  2                 1   
525760            6            12                  2                 1   
20577             3            83                  2                 1   
1069644          11            37                  1                 1   

         mean_temp  
706975        15.0  
450954        13.3  
525760        16.4  
20577          7.6  
1069644        8.3  
         CostCat
706975         0
450954         0
525760         0
20577          0
1069644        1


In [13]:
# load the models
pickled_model_dt = pickle.load(open('../../models/DT_model.pickle', 'rb'))
pickled_model_rf = pickle.load(open('../../models/RF_model.pickle','rb'))
pickled_model_bt = pickle.load(open('../../models/BT_model.pickle','rb'))

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [14]:
# generate predictions
y_pred_dt = pickled_model_dt.predict(X_test)
y_pred_rf = pickled_model_rf.predict(X_test)
y_pred_bt = pickled_model_bt.predict(X_test)


In [15]:
# compute the f1 score (if you want to compute other metrics, just change this step, the rest is the same)
# https://scikit-learn.org/stable/modules/generated/sklearn.metrics.f1_score.html#sklearn.metrics.f1_score

# f1_score = pickled_model.f1_score(y_test, y_pred)
f1_score_dt = f1_score(y_test, y_pred_dt, average='weighted')
print(f1_score_dt)

f1_score_rf = f1_score(y_test, y_pred_rf, average='weighted')
print(f1_score_rf)

f1_score_bt = f1_score(y_test, y_pred_bt, average='weighted')
print(f1_score_bt)

0.7776304732713192
0.6077911066261008
0.7568813813776981


In [16]:
#compute the precision score 
precision_score_dt = precision_score(y_test, y_pred_dt, average = 'weighted')
print(precision_score_dt)

precision_score_rf = precision_score(y_test, y_pred_rf, average = 'weighted')
print(precision_score_rf)

precision_score_bt = precision_score(y_test, y_pred_bt, average = 'weighted')
print(precision_score_bt)

0.7777142671717738
0.6990843854259741
0.7708079072255916


In [17]:
#compute the recall score
recall_score_dt = recall_score(y_test, y_pred_dt, average = 'weighted')
print(recall_score_dt)

recall_score_rf = recall_score(y_test, y_pred_rf, average = 'weighted')
print(recall_score_dt)

recall_score_bt = recall_score(y_test, y_pred_bt, average = 'weighted')
print(recall_score_bt)

0.7812564769280048
0.7812564769280048
0.7701279369924444


In [18]:
# write the code to draw your graph
import plotly.express as px

classifiers = ['Decision Tree Classifier','Random Forest Classifier','Boosted Tree Classifier']
metrics = ['F1-Score']

df = pd.DataFrame({'Classifier': [classifiers[0]] + [classifiers[1]] + [classifiers[2]] ,
                   'Metrics': metrics*3,
                   'Score': [f1_score_dt,f1_score_rf,f1_score_bt]})

fig = px.bar(df, x='Classifier', y='Score', color='Metrics', barmode='group',
             height=400, width=600, title="Classifier Performance Metrics")

fig.update_layout(xaxis=dict(categoryorder='category ascending'))
fig.update_traces(customdata=df['Score'],texttemplate='%{customdata}', textposition='auto', textfont=dict(size=8))
fig.update_traces(text=df['Score'], texttemplate='%{text}', textposition='outside', textfont=dict(size=8))
fig.update_traces(hovertemplate='%{x}Score: %{y:.4f}')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_traces(text=df['Score'].round(2).astype(str), texttemplate='%{text:.4f}', textposition='outside', textfont=dict(size=8))
fig.show()

In [19]:
# write the code to draw your graph
import plotly.express as px

classifiers = ['Decision Tree Classifier','Random Forest Classifier','Boosted Tree Classifier']
metrics = ['Precision_Score']

df = pd.DataFrame({'Classifier': [classifiers[0]] + [classifiers[1]] + [classifiers[2]] ,
                   'Metrics': metrics*3,
                   'Score': [precision_score_dt,precision_score_rf,precision_score_bt]})

fig = px.bar(df, x='Classifier', y='Score', color='Metrics', barmode='group',
             height=400, width=600, title="Classifier Performance Metrics")

fig.update_layout(xaxis=dict(categoryorder='category ascending'))
fig.update_traces(customdata=df['Score'],texttemplate='%{customdata}', textposition='auto', textfont=dict(size=8))
fig.update_traces(text=df['Score'], texttemplate='%{text}', textposition='outside', textfont=dict(size=8))
fig.update_traces(hovertemplate='%{x}Score: %{y:.4f}')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_traces(text=df['Score'].round(2).astype(str), texttemplate='%{text:.4f}', textposition='outside', textfont=dict(size=8))
fig.show()

In [20]:
# write the code to draw your graph
import plotly.express as px

classifiers = ['Decision Tree Classifier','Random Forest Classifier','Boosted Tree Classifier']
metrics = ['Recall_Score']

df = pd.DataFrame({'Classifier': [classifiers[0]] + [classifiers[1]] + [classifiers[2]] ,
                   'Metrics': metrics*3,
                   'Score': [recall_score_dt,recall_score_rf,recall_score_bt]})

fig = px.bar(df, x='Classifier', y='Score', color='Metrics', barmode='group',
             height=400, width=600, title="Classifier Performance Metrics")

fig.update_layout(xaxis=dict(categoryorder='category ascending'))
fig.update_traces(customdata=df['Score'],texttemplate='%{customdata}', textposition='auto', textfont=dict(size=8))
fig.update_traces(text=df['Score'], texttemplate='%{text}', textposition='outside', textfont=dict(size=8))
fig.update_traces(hovertemplate='%{x}Score: %{y:.4f}')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_traces(text=df['Score'].round(2).astype(str), texttemplate='%{text:.4f}', textposition='outside', textfont=dict(size=8))
fig.show()