## ML Modelling

### Install Dependencies

In [None]:
!pip install --upgrade pip

In [None]:
!pip install --pre pycaret

In [None]:
# for some model interpretation functions
!pip install interpret

In [None]:
pip install tune-sklearn ray[tune]

In [None]:
!pip install preprocessor

In [None]:
!pip install matplotlib==2.1.1

### Load Dependencies and Data

In [None]:
from pycaret.utils import version
version()

'3.0.0.rc4'

In [None]:
#Load dependencies

import numpy as np
import pandas as pd
import pycaret
from pycaret.regression import *

#Connecting to google Colab
from google.colab import drive
drive.mount('/content/drive')

#Import sklearn dependencies
import shap
from xgboost import XGBClassifier
import preprocessor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

from sklearn import metrics
from sklearn.preprocessing import normalize

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer, KNNImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

#Data Visualization
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import plotly.graph_objects as go
import plotly.express as px

plt.rcParams["figure.figsize"] = (10, 10)  # Change matplotlib Box Size
plt.rcParams["font.size"] = 14  # Change matplotlib Font Size
plt.style.use("fivethirtyeight")

# supress unnecessary warning msgs.
import warnings
warnings.filterwarnings('ignore')

Mounted at /content/drive


In [48]:
##Load CLEAN data sampled every 5 min synced with lamp and the average sensor data with nearest time sampling 
# orig_url='https://drive.google.com/file/d/1tW4tLhWYQiLXrpPOLjvYJGmooKWannAH/view?usp=sharing'
# file_id = orig_url.split('/')[-2]
# dwn_url='https://drive.google.com/uc?export=download&id=' + file_id

df_single_lamp_avg_sensor_clean=pd.read_csv("/content/drive/MyDrive/Colab Notebooks/data_single_lamps_avg_sensors_clean.csv", parse_dates=['time'])
df_single_lamp_avg_sensor_clean.head()

Unnamed: 0,source_address,time,light_state,local_detections,sht40_humidity,sht40_temperature,shtc3_humidity,shtc3_temperature,total_normalized_energy,wsen_pads_temperature,delta_energy,power
0,2062404276,2022-08-15 00:00:00,2.0,18565.0,26.09,34.87,49.495556,21.908889,189624.0,35.84,0.0,1.5
1,2062404276,2022-08-15 00:05:00,2.0,18565.0,26.09,34.85,49.5,21.902222,189624.0,35.81,0.0,1.5
2,2062404276,2022-08-15 00:10:00,2.0,18565.0,26.02,34.82,49.501111,21.901111,189624.0,35.8,0.0,1.5
3,2062404276,2022-08-15 00:15:00,2.0,18565.0,26.09,34.83,49.528889,21.896667,189624.0,35.78,0.0,1.5
4,2062404276,2022-08-15 00:20:00,2.0,18565.0,26.02,34.86,49.6425,21.8675,189624.0,35.78,0.0,1.5


In [49]:
#Creating a dataset with 9 lamps to train the model, leaving out 2 lamps to test the model as unseen data.
df_single_lamp_avg_sensor_9_lamp_train_clean=df_single_lamp_avg_sensor_clean[~(df_single_lamp_avg_sensor_clean['source_address'].isin([2062404276, 367818600]))]

In [50]:
#Creating a dataset with 2 lamps that were not part of the training of the model, which helps to test the robustness of the model.
df_single_lamp_avg_sensor_2_lamp_test_clean=df_single_lamp_avg_sensor_clean[(df_single_lamp_avg_sensor_clean['source_address'].isin([2062404276, 367818600]))]

In [51]:
#Reducing the size of the training dataset by taking 2/5 of the data by grouping according to the source ID. Otherwise, model training takes a lot of time that is not necessary. 

df_single_lamp_avg_sensor_9_lamp_train_clean=df_single_lamp_avg_sensor_9_lamp_train_clean.groupby('source_address', group_keys=False).sample(frac = 2/5)

In [87]:
#Final Training dataset for repeating the experiment: removing all other features that are not necessary or redundant for training the model, based on the results of the correlation matrix.

df_single_lamp_avg_sensor_9_lamp_train_temp_humidity_clean=df_single_lamp_avg_sensor_9_lamp_train_clean.drop(columns=['time', 'light_state', 'local_detections', 'source_address', 'delta_energy', 'total_normalized_energy', 'wsen_pads_temperature'])
df_single_lamp_avg_sensor_9_lamp_train_temp_humidity_clean.head()

Unnamed: 0,sht40_humidity,sht40_temperature,shtc3_humidity,shtc3_temperature,power
59265,31.14,34.76,61.301111,22.411111,1.5
67881,22.23,35.22,42.38125,22.49,1.5
68607,21.14,35.28,41.998889,22.492222,6.445
58794,31.96,34.12,63.048571,21.788571,1.5
62698,22.83,34.87,45.03625,22.605,1.5


In [53]:
#Final test dataset for repeating the experiment: removing features that are not necessary or redundant for training the model, based on the results of the correlation matrix.

df_single_lamp_avg_sensor_2_lamp_test_temp_humidity_clean=df_single_lamp_avg_sensor_2_lamp_test_clean.drop(columns=['time', 'delta_energy', 'light_state', 'local_detections', 'source_address', 'total_normalized_energy', 'wsen_pads_temperature'])
df_single_lamp_avg_sensor_2_lamp_test_temp_humidity_clean.head()

Unnamed: 0,sht40_humidity,sht40_temperature,shtc3_humidity,shtc3_temperature,power
0,26.09,34.87,49.495556,21.908889,1.5
1,26.09,34.85,49.5,21.902222,1.5
2,26.02,34.82,49.501111,21.901111,1.5
3,26.09,34.83,49.528889,21.896667,1.5
4,26.02,34.86,49.6425,21.8675,1.5


In [54]:
df_single_lamp_avg_sensor_9_lamp_train_clean.columns


Index(['source_address', 'time', 'light_state', 'local_detections',
       'sht40_humidity', 'sht40_temperature', 'shtc3_humidity',
       'shtc3_temperature', 'total_normalized_energy', 'wsen_pads_temperature',
       'delta_energy', 'power'],
      dtype='object')

In [55]:
#Training dataset for predicting HUMIDITY: removing all other features that are not necessary or redundant for training the model, based on the results of the correlation matrix.

df_single_lamp_avg_sensor_9_lamp_train_humidity_clean=df_single_lamp_avg_sensor_9_lamp_train_clean.drop(columns=['time', 'light_state', 'local_detections', 'source_address', 'delta_energy', 'total_normalized_energy', 'shtc3_temperature','wsen_pads_temperature'])
df_single_lamp_avg_sensor_9_lamp_train_humidity_clean.head()

Unnamed: 0,sht40_humidity,sht40_temperature,shtc3_humidity,power
59265,31.14,34.76,61.301111,1.5
67881,22.23,35.22,42.38125,1.5
68607,21.14,35.28,41.998889,6.445
58794,31.96,34.12,63.048571,1.5
62698,22.83,34.87,45.03625,1.5


In [56]:
#Test UNSEEN 2 LAMP dataset for predicting HUMIDITY: removing all other features that are not necessary or redundant for training the model, based on the results of the correlation matrix.

df_single_lamp_avg_sensor_2_lamp_test_humidity_clean=df_single_lamp_avg_sensor_2_lamp_test_clean.drop(columns=['time', 'delta_energy', 'shtc3_temperature','light_state', 'local_detections', 'source_address', 'total_normalized_energy', 'wsen_pads_temperature'])
df_single_lamp_avg_sensor_2_lamp_test_humidity_clean.head()

Unnamed: 0,sht40_humidity,sht40_temperature,shtc3_humidity,power
0,26.09,34.87,49.495556,1.5
1,26.09,34.85,49.5,1.5
2,26.02,34.82,49.501111,1.5
3,26.09,34.83,49.528889,1.5
4,26.02,34.86,49.6425,1.5


In [57]:
#Training dataset for predicting TEMPERATURE: removing all other features that are not necessary or redundant for training the model, based on the results of the correlation matrix.

df_single_lamp_avg_sensor_9_lamp_train_temperature_clean=df_single_lamp_avg_sensor_9_lamp_train_clean.drop(columns=['time', 'light_state', 'local_detections', 'source_address', 'delta_energy', 'total_normalized_energy', 'shtc3_humidity','wsen_pads_temperature'])
df_single_lamp_avg_sensor_9_lamp_train_temperature_clean.head()

Unnamed: 0,sht40_humidity,sht40_temperature,shtc3_temperature,power
59265,31.14,34.76,22.411111,1.5
67881,22.23,35.22,22.49,1.5
68607,21.14,35.28,22.492222,6.445
58794,31.96,34.12,21.788571,1.5
62698,22.83,34.87,22.605,1.5


In [58]:
#Test UNSEEN 2 LAMP dataset for predicting TEMPERATURE: removing all other features that are not necessary or redundant for training the model, based on the results of the correlation matrix.

df_single_lamp_avg_sensor_2_lamp_test_temperature_clean=df_single_lamp_avg_sensor_2_lamp_test_clean.drop(columns=['time', 'delta_energy', 'shtc3_humidity','light_state', 'local_detections', 'source_address', 'total_normalized_energy', 'wsen_pads_temperature'])
df_single_lamp_avg_sensor_2_lamp_test_temperature_clean.head()

Unnamed: 0,sht40_humidity,sht40_temperature,shtc3_temperature,power
0,26.09,34.87,21.908889,1.5
1,26.09,34.85,21.902222,1.5
2,26.02,34.82,21.901111,1.5
3,26.09,34.83,21.896667,1.5
4,26.02,34.86,21.8675,1.5


In [59]:
# structure all the DataFrames used for taining and testing into a simple dict to SAVE all the relevant files. 
dfs = {
    #original clean dataset
    "data-cleaned-single_lamp_avg_sensor_power": df_single_lamp_avg_sensor_clean,

    #Training and unseen dataset for repeating the experiment
    "data-cleaned-single_lamp_avg_sensor__power_9_lamp_2_5th_train_data": df_single_lamp_avg_sensor_9_lamp_train_temp_humidity_clean,
    "data-cleaned-single_lamp_avg_sensor__power_2_lamp_test_data": df_single_lamp_avg_sensor_2_lamp_test_temp_humidity_clean,

    #Dataframes for MODEL specifically predicting HUMIDITY   
    "data-HUMIDITY_cleaned-single_lamp_avg_sensor__power_9_lamp_2_5th_train_data": df_single_lamp_avg_sensor_9_lamp_train_humidity_clean,
    "data-HUMIDITY_cleaned-single_lamp_avg_sensor__power_2_lamp_test_data": df_single_lamp_avg_sensor_2_lamp_test_temp_humidity_clean,
    
    #Dataframes for MODEL specifically predicting TEMPERATURE
    "data-TEMPERATURE_cleaned-single_lamp_avg_sensor__power_9_lamp_2_5th_train_data": df_single_lamp_avg_sensor_9_lamp_train_temperature_clean,
    "data-TEMPERATURE_cleaned-single_lamp_avg_sensor__power_2_lamp_test_data": df_single_lamp_avg_sensor_2_lamp_test_temperature_clean,  
    }

# iterate and save the data to CSV
for name, data in dfs.items():
    data.to_csv(f"{name}.csv", index=False)

### Train Test Split the data

In [60]:
# Repeating the EXPERIMENT for predicting HUMIDITY: Train Test Split data from single lamp & average sensor dataset for model only light temparature, sensor temparature and delta energy

X1 = df_single_lamp_avg_sensor_9_lamp_train_temp_humidity_clean.drop(columns=['shtc3_humidity'])
y1 = df_single_lamp_avg_sensor_9_lamp_train_temp_humidity_clean.shtc3_humidity

X_train, X_test, y_train, y_test = train_test_split(X1, y1, test_size=0.2, random_state=42)
X_train.shape, X_test.shape

((39807, 4), (9952, 4))

In [88]:
# Repeating the EXPERIMENT for predicting TEMPERATURE: Train Test Split data from single lamp & average sensor dataset for model only light temparature, sensor temparature and delta energy

X2 = df_single_lamp_avg_sensor_9_lamp_train_temp_humidity_clean.drop(columns=['shtc3_temperature'])
y2 = df_single_lamp_avg_sensor_9_lamp_train_temp_humidity_clean.shtc3_temperature

X_train, X_test, y_train, y_test = train_test_split(X2, y2, test_size=0.2, random_state=42)
X_train.shape, X_test.shape

((39807, 4), (9952, 4))

In [None]:
# For predicting only HUMIDITY: Train Test Split data from single lamp & average sensor dataset for model only light temparature, sensor temparature and delta energy

X3 = df_single_lamp_avg_sensor_9_lamp_train_humidity_clean.drop(columns=['shtc3_humidity'])
y3 = df_single_lamp_avg_sensor_9_lamp_train_humidity_clean.shtc3_temperature

X_train, X_test, y_train, y_test = train_test_split(X3, y3, test_size=0.2, random_state=42)
X_train.shape, X_test.shape

((39807, 4), (9952, 4))

In [None]:
# For predicting only TEMPERATURE: Train Test Split data from single lamp & average sensor dataset for model only light temparature, sensor temparature and delta energy

X4 = df_single_lamp_avg_sensor_9_lamp_train_temperature_clean.drop(columns=['shtc3_temperature'])
y4 = df_single_lamp_avg_sensor_9_lamp_train_temperature_clean.shtc3_temperature

X_train, X_test, y_train, y_test = train_test_split(X4, y4, test_size=0.2, random_state=42)
X_train.shape, X_test.shape

((39807, 3), (9952, 3))

In [89]:
# Review the X_test dataset for correctness before proceeding to initialize the model.

X_test.head()

Unnamed: 0,sht40_humidity,sht40_temperature,shtc3_humidity,power
48239,30.52,37.21,64.414286,4.51
98853,30.37,34.81,59.382222,1.5
148766,25.07,35.7,47.474286,1.5
83569,27.96,34.63,54.175556,1.5
32137,31.34,36.81,61.461429,5.656667


In [90]:
# Review the y_test dataset for correctness before proceeding to initialize the model.

y_test.head()

48239     23.107143
98853     22.220000
148766    23.225714
83569     21.735556
32137     23.717143
Name: shtc3_temperature, dtype: float64

# **ML Model setup**


## **Initialize Setup using PyCaret**

Below are the initialization settings for creating 4 different ML models for repeating the experiment that predict either humidity or temperature;

&

ML model for predicting only humidity or temperature.

**make sure to initialize model setup ONCE at a time specifically for a particular X_train and y_train dataset**

In [63]:
#Initialize setup
#Model for repeating the EXPERIMENT, and predicting HUMIDITY

reg_experiment_humidity = setup(data=pd.concat([X_train, y_train], axis=1), 
            target = 'shtc3_humidity', 
            session_id=1, 
            multicollinearity_threshold=True,
            normalize=True,
            polynomial_features=False,
            #polynomial_degree=2,
            experiment_name='ML Model: Repeating the experiment for predicting HUMIDITY',
            n_jobs=-1)

Unnamed: 0,Description,Value
0,Session id,1
1,Target,shtc3_humidity
2,Target type,Regression
3,Data shape,"(39807, 5)"
4,Train data shape,"(27864, 5)"
5,Test data shape,"(11943, 5)"
6,Numeric features,4
7,Preprocess,True
8,Imputation type,simple
9,Numeric imputation,mean


In [91]:
#Initialize setup
#Model for repeating the EXPERIMENT, and predicting TEMPERATURE

reg_experiment_temperature = setup(data=pd.concat([X_train, y_train], axis=1), 
            target = 'shtc3_temperature', 
            session_id=1, 
            multicollinearity_threshold=True,
            normalize=True,
            polynomial_features=False,
            #polynomial_degree=2,
            experiment_name='ML Model: Repeating the experiment for predicting TEMPERATURE',
            n_jobs=-1)

Unnamed: 0,Description,Value
0,Session id,1
1,Target,shtc3_temperature
2,Target type,Regression
3,Data shape,"(39807, 5)"
4,Train data shape,"(27864, 5)"
5,Test data shape,"(11943, 5)"
6,Numeric features,4
7,Preprocess,True
8,Imputation type,simple
9,Numeric imputation,mean


In [None]:
#Initialize setup
#Model for predicting HUMIDITY

reg_HUMIDITY = setup(data=pd.concat([X_train, y_train], axis=1), 
            target = 'shtc3_humidity', 
            session_id=1, 
            multicollinearity_threshold=True,
            normalize=True,
            polynomial_features=False,
            #polynomial_degree=2,
            experiment_name='ML Model for predicting HUMIDITY',
            n_jobs=-1)

In [None]:
#Initialize setup
#Model for predicting TEMPERATURE

reg_TEMPERATURE = setup(data=pd.concat([X_train, y_train], axis=1), 
            target = 'shtc3_temperature', 
            session_id=1, 
            multicollinearity_threshold=True,
            normalize=True,
            polynomial_features=False,
            #polynomial_degree=2,
            experiment_name='ML Model for predicting TEMPERATURE',
            n_jobs=-1)

## **Train and Compare to find BEST model**

Determine the best model for each category by executing the cells one by one.

In [64]:
best_model_experiment_humidity = compare_models(fold=20, sort='R2')

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
rf,Random Forest Regressor,0.6351,0.865,0.9297,0.9905,0.0178,0.0124,4.0875
et,Extra Trees Regressor,0.6329,0.8668,0.9307,0.9904,0.0179,0.0124,2.3805
lightgbm,Light Gradient Boosting Machine,0.7662,1.0328,1.016,0.9886,0.0196,0.0151,0.19
knn,K Neighbors Regressor,0.7221,1.096,1.0465,0.9879,0.0203,0.0142,0.084
gbr,Gradient Boosting Regressor,0.8591,1.2506,1.1181,0.9862,0.0215,0.0169,1.2395
lar,Least Angle Regression,0.9263,1.3803,1.1747,0.9848,0.0231,0.0184,0.031
br,Bayesian Ridge,0.9263,1.3803,1.1747,0.9848,0.0231,0.0184,0.031
huber,Huber Regressor,0.9254,1.3817,1.1753,0.9848,0.0231,0.0184,0.0705
ridge,Ridge Regression,0.9264,1.3803,1.1747,0.9848,0.0231,0.0184,0.034
lr,Linear Regression,0.9263,1.3803,1.1747,0.9848,0.0231,0.0184,0.276


Processing:   0%|          | 0/77 [00:00<?, ?it/s]

In [92]:
best_model_experiment_temperature = compare_models(fold=20, sort='R2')

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
rf,Random Forest Regressor,0.127,0.034,0.1844,0.9288,0.0078,0.0056,4.2745
et,Extra Trees Regressor,0.1317,0.0357,0.1889,0.9253,0.008,0.0058,2.4605
lightgbm,Light Gradient Boosting Machine,0.1521,0.0402,0.2004,0.9159,0.0085,0.0067,0.15
knn,K Neighbors Regressor,0.1499,0.0453,0.2128,0.9052,0.0089,0.0066,0.085
gbr,Gradient Boosting Regressor,0.1803,0.0543,0.2329,0.8865,0.0098,0.008,1.313
lr,Linear Regression,0.1881,0.0568,0.2384,0.8811,0.01,0.0083,0.039
ridge,Ridge Regression,0.1881,0.0568,0.2384,0.8811,0.01,0.0083,0.0375
lar,Least Angle Regression,0.1881,0.0568,0.2384,0.8811,0.01,0.0083,0.045
br,Bayesian Ridge,0.1881,0.0568,0.2384,0.8811,0.01,0.0083,0.041
huber,Huber Regressor,0.1871,0.0571,0.239,0.8805,0.0101,0.0082,0.1645


Processing:   0%|          | 0/77 [00:00<?, ?it/s]

In [None]:
best_model_HUMIDITY = compare_models(fold=20, sort='R2')

In [None]:
best_model_TEMPERATURE = compare_models(fold=20, sort='R2')

## **Create Model**

Create model by taking the best model based on MAE and R2 scores for each category by executing the cells one by one.

In [65]:
model_experiment_humidity = create_model('rf', fold=20)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.6457,0.8911,0.944,0.99,0.0182,0.0127
1,0.6189,0.811,0.9005,0.991,0.0171,0.012
2,0.629,0.8616,0.9282,0.9904,0.018,0.0124
3,0.6068,0.8176,0.9042,0.9911,0.0171,0.0118
4,0.6539,0.9535,0.9765,0.9896,0.0188,0.0128
5,0.6267,0.8381,0.9155,0.9906,0.0174,0.0123
6,0.6202,0.8519,0.923,0.9908,0.0178,0.0122
7,0.628,0.8425,0.9179,0.991,0.0176,0.0123
8,0.6531,0.9293,0.964,0.9896,0.0184,0.0127
9,0.6177,0.7993,0.894,0.9912,0.0173,0.0122


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [93]:
model_experiment_temperature = create_model('lightgbm', fold=20)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.1528,0.0398,0.1995,0.9176,0.0084,0.0067
1,0.1471,0.0364,0.1908,0.922,0.0081,0.0065
2,0.151,0.0403,0.2006,0.9152,0.0084,0.0067
3,0.1477,0.0376,0.1939,0.9236,0.0082,0.0065
4,0.1526,0.0411,0.2028,0.9084,0.0086,0.0067
5,0.1533,0.0413,0.2032,0.9176,0.0086,0.0068
6,0.1468,0.0384,0.1959,0.9211,0.0083,0.0065
7,0.1501,0.0394,0.1984,0.9163,0.0084,0.0066
8,0.1527,0.0418,0.2044,0.914,0.0086,0.0067
9,0.1559,0.0439,0.2095,0.911,0.0088,0.0069


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
model_HUMIDITY = create_model('rf', fold=20)

In [None]:
model_TEMPERATURE = create_model('rf', fold=20)

## **Tune Model Hyperparameters**

**Make sure you tune the model ONCE at a time specifically for a model created from a particular X_train and y_train dataset**

In [66]:
#Tune the model for repeating the EXPERIMENT, and predicting HUMIDITY

tuned_model_experiment_humidity = tune_model(model_experiment_humidity, n_iter=10, 
                      search_library='scikit-learn', 
                      search_algorithm='random', optimize='R2', verbose=True,
                      fold=5                 
                                         )

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.7639,1.0767,1.0377,0.9881,0.0199,0.015
1,0.771,1.1051,1.0513,0.988,0.0202,0.0152
2,0.78,1.1107,1.0539,0.9877,0.0201,0.0152
3,0.7829,1.1124,1.0547,0.9878,0.0203,0.0154
4,0.7809,1.1041,1.0508,0.9878,0.0202,0.0154
Mean,0.7758,1.1018,1.0497,0.9879,0.0202,0.0152
Std,0.0072,0.0129,0.0062,0.0002,0.0002,0.0002


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 5 folds for each of 10 candidates, totalling 50 fits


Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


In [94]:
#Tune the model for repeating the EXPERIMENT, and predicting TEMPERATURE

tuned_model_experiment_temperature = tune_model(model_experiment_temperature, n_iter=10, 
                      search_library='scikit-learn', 
                      search_algorithm='random', optimize='R2', verbose=True,
                      fold=5                 
                                         )

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,0.1721,0.0498,0.2231,0.8962,0.0094,0.0076
1,0.1741,0.0513,0.2266,0.8924,0.0096,0.0077
2,0.1796,0.0547,0.2338,0.8847,0.0099,0.0079
3,0.1773,0.0518,0.2275,0.8923,0.0096,0.0078
4,0.1761,0.0519,0.2277,0.8922,0.0096,0.0078
Mean,0.1758,0.0519,0.2277,0.8916,0.0096,0.0078
Std,0.0026,0.0016,0.0034,0.0038,0.0001,0.0001


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 5 folds for each of 10 candidates, totalling 50 fits


Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


In [None]:
#Tune the model for predicting HUMIDITY

tuned_model_HUMIDITY = tune_model(model_HUMIDITY, n_iter=10, 
                      search_library='scikit-learn', 
                      search_algorithm='random', optimize='R2', verbose=True,
                      fold=5                 
                                         )

In [None]:
#Tune the model for predicting TEMPERATURE

tuned_model_TEMPERATURE = tune_model(model_TEMPERATURE, n_iter=10, 
                      search_library='scikit-learn', 
                      search_algorithm='random', optimize='R2', verbose=True,
                      fold=5                 
                                         )

## **Analyze Model Performance Diagnostics**

Model for repeating the EXPERIMENT, and predicting HUMIDITY

In [67]:
plt.figure(figsize = (18,9))
plot_model(tuned_model_experiment_humidity)
#plt.savefig('rf_humidity.png', dpi=300, bbox_inches="tight")

AttributeError: ignored

In [None]:
plt.figure(figsize = (18,9))
plot_model(tuned_model_experiment_humidity, plot='error')
#plt.savefig('rf_humidity1.png', dpi=300, bbox_inches="tight")

In [None]:
plot_model(tuned_model_experiment_humidity, plot='feature')

In [None]:
evaluate_model(tuned_model_experiment_humidity)

**Model for repeating the EXPERIMENT, and predicting TEMPERATURE**

In [None]:
plt.figure(figsize = (18,9))
plot_model(tuned_model_experiment_temperature)
#plt.savefig('rf_humidity.png', dpi=300, bbox_inches="tight")

In [None]:
plt.figure(figsize = (18,9))
plot_model(tuned_model_experiment_temperature, plot='error')
#plt.savefig('rf_humidity1.png', dpi=300, bbox_inches="tight")

In [None]:
plot_model(tuned_model_experiment_temperature, plot='feature')

In [None]:
evaluate_model(tuned_model_experiment_temperature)

**Model for predicting HUMIDITY**

In [None]:
plt.figure(figsize = (18,9))
plot_model(tuned_model_HUMIDITY)
#plt.savefig('rf_humidity.png', dpi=300, bbox_inches="tight")

In [None]:
plt.figure(figsize = (18,9))
plot_model(tuned_model_HUMIDITY, plot='error')
#plt.savefig('rf_humidity1.png', dpi=300, bbox_inches="tight")

In [None]:
plot_model(tuned_model_HUMIDITY, plot='feature')

In [None]:
evaluate_model(tuned_model_HUMIDITY)

**Model for predicting TEMPERATURE**

In [None]:
plt.figure(figsize = (18,9))
plot_model(tuned_model_TEMPERATURE)
#plt.savefig('rf_humidity.png', dpi=300, bbox_inches="tight")

In [None]:
plt.figure(figsize = (18,9))
plot_model(tuned_model_TEMPERATURE, plot='error')
#plt.savefig('rf_humidity1.png', dpi=300, bbox_inches="tight")

In [None]:
plot_model(tuned_model_TEMPERATURE, plot='feature')

In [None]:
evaluate_model(tuned_model_TEMPERATURE)

## **Interpret Model**


Model for repeating the EXPERIMENT, and predicting HUMIDITY

In [None]:
# Details: https://interpret.ml/docs/msa.html
interpret_model(tuned_model_experiment_humidity, plot='msa');

In [None]:
predict_model(tuned_model_experiment_humidity)

**Model for repeating the EXPERIMENT, and predicting TEMPERATURE**

In [None]:
# Details: https://interpret.ml/docs/msa.html
interpret_model(tuned_model_experiment_temperature, plot='msa')

In [95]:
predict_model(tuned_model_experiment_temperature)

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Light Gradient Boosting Machine,0.1516,0.0399,0.1997,0.9185,0.0084,0.0067


Unnamed: 0,sht40_humidity,sht40_temperature,shtc3_humidity,power,shtc3_temperature,prediction_label
27864,1.242623,-1.297343,1.180090,-0.382864,21.652500,21.664690
27865,-1.129408,-0.348130,-1.268486,-0.382864,22.821251,22.587554
27866,-1.759753,0.514787,-1.802039,-0.382864,23.302856,23.173940
27867,-0.727481,-0.335804,-0.451244,-0.382864,22.379999,22.386440
27868,0.715505,0.638065,0.729143,-0.382864,22.883751,23.110408
...,...,...,...,...,...,...
39802,-1.902515,0.428496,-1.913407,-0.382864,22.941250,23.060160
39803,0.957100,1.587278,0.909732,0.384296,23.719999,23.796690
39804,0.906585,1.340728,1.117750,-0.273270,23.290001,23.225159
39805,1.049346,0.440826,0.979553,-0.382864,23.257500,23.082146


**Model for predicting HUMIDITY**

In [None]:
# Details: https://interpret.ml/docs/msa.html
interpret_model(tuned_model_HUMIDITY, plot='msa')

In [None]:
predict_model(tuned_model_HUMIDITY)

**Model for predicting TEMPERATURE**

In [None]:
# Details: https://interpret.ml/docs/msa.html
interpret_model(tuned_model_TEMPERATURE, plot='msa')

In [None]:
predict_model(tuned_model_TEMPERATURE)

# Finalize Model

In [68]:
#FINALIZE Model for repeating the EXPERIMENT, and predicting HUMIDITY
final_model_experiment_humidity = finalize_model(tuned_model_experiment_humidity)
final_model_experiment_humidity 

Pipeline(memory=Memory(location=/tmp/joblib),
         steps=[('numerical_imputer',
                 TransformerWrapper(include=['sht40_humidity',
                                             'sht40_temperature',
                                             'shtc3_temperature', 'power'],
                                    transformer=SimpleImputer())),
                ('categorical_imputer',
                 TransformerWrapper(include=[],
                                    transformer=SimpleImputer(fill_value='constant',
                                                              strategy='constant'))),
                ('low_variance',
                 TransformerWrapper(exclude=[],
                                    transformer=VarianceThreshold(threshold=0))),
                ('normalize', TransformerWrapper(transformer=StandardScaler())),
                ('actual_estimator',
                 RandomForestRegressor(n_jobs=-1, random_state=1))])

In [96]:
#FINALIZE Model for repeating the EXPERIMENT, and predicting TEMPERATURE
final_model_experiment_temperature = finalize_model(tuned_model_experiment_temperature)
final_model_experiment_temperature 

Pipeline(memory=Memory(location=/tmp/joblib),
         steps=[('numerical_imputer',
                 TransformerWrapper(include=['sht40_humidity',
                                             'sht40_temperature',
                                             'shtc3_humidity', 'power'],
                                    transformer=SimpleImputer())),
                ('categorical_imputer',
                 TransformerWrapper(include=[],
                                    transformer=SimpleImputer(fill_value='constant',
                                                              strategy='constant'))),
                ('low_variance',
                 TransformerWrapper(exclude=[],
                                    transformer=VarianceThreshold(threshold=0))),
                ('normalize', TransformerWrapper(transformer=StandardScaler())),
                ('actual_estimator', LGBMRegressor(random_state=1))])

In [None]:
#FINALIZE Model predicting HUMIDITY
final_model_HUMIDITY = finalize_model(tuned_model_HUMIDITY)
final_model_HUMIDITY

In [None]:
#FINALIZE Model for predicting TEMPERATURE
final_model_TEMPERATURE = finalize_model(tuned_model_TEMPERATURE)
final_model_TEMPERATURE

# Predict Model

PREDICT model for repeating the EXPERIMENT, and predicting HUMIDITY

In [69]:
y_test.shape

(9952,)

In [70]:
X_test

Unnamed: 0,sht40_humidity,sht40_temperature,shtc3_temperature,power
48239,30.52,37.21,23.107143,4.510000
98853,30.37,34.81,22.220000,1.500000
148766,25.07,35.70,23.225714,1.500000
83569,27.96,34.63,21.735556,1.500000
32137,31.34,36.81,23.717143,5.656667
...,...,...,...,...
99675,27.53,34.48,21.535000,1.500000
141870,30.64,35.78,22.680000,1.500000
135147,24.94,35.54,22.581250,1.500000
47781,29.59,35.67,22.545000,1.500000


In [71]:
prediction_df = predict_model(final_model_experiment_humidity, data=X_test)
prediction_df.head()

Unnamed: 0,sht40_humidity,sht40_temperature,shtc3_temperature,power,prediction_label
0,0.835308,2.0662,0.700421,1.889735,62.329119
1,0.802426,-0.878159,-0.577596,-0.384908,59.928108
2,-0.359438,0.213708,0.871234,-0.384908,47.535451
3,0.274106,-1.098987,-1.275485,-0.384908,54.13868
4,1.015068,1.575476,1.579184,2.756265,61.123614


In [72]:
df_pred=prediction_df.filter(['prediction_label'], axis=1)
df_pred.head()

Unnamed: 0,prediction_label
0,62.329119
1,59.928108
2,47.535451
3,54.13868
4,61.123614


In [73]:
from sklearn import metrics

print('MAE', metrics.mean_absolute_error(y_test, df_pred))
print('MSE', metrics.mean_squared_error(y_test, df_pred))
print('RMSE', np.sqrt(metrics.mean_squared_error(y_test, df_pred)))
print('R2 Score', metrics.r2_score(y_test, df_pred))

MAE 0.6054327187074889
MSE 0.8174420017283676
RMSE 0.9041249923148721
R2 Score 0.9909627657889722


**PREDICT model for repeating the EXPERIMENT, and predicting TEMPERATURE**

In [97]:
y_test.shape

(9952,)

In [98]:
X_test

Unnamed: 0,sht40_humidity,sht40_temperature,shtc3_humidity,power
48239,30.52,37.21,64.414286,4.510000
98853,30.37,34.81,59.382222,1.500000
148766,25.07,35.70,47.474286,1.500000
83569,27.96,34.63,54.175556,1.500000
32137,31.34,36.81,61.461429,5.656667
...,...,...,...,...
99675,27.53,34.48,53.238750,1.500000
141870,30.64,35.78,62.068750,1.500000
135147,24.94,35.54,46.967500,1.500000
47781,29.59,35.67,58.551250,1.500000


In [99]:
prediction_df = predict_model(tuned_model_experiment_temperature, data=X_test)
prediction_df.head()

Unnamed: 0,sht40_humidity,sht40_temperature,shtc3_humidity,power,prediction_label
0,0.834106,2.080373,1.322412,1.918615,23.11045
1,0.801161,-0.87821,0.794079,-0.382864,21.981104
2,-0.362891,0.218932,-0.456175,-0.382864,22.841862
3,0.271847,-1.100104,0.247414,-0.382864,21.855214
4,1.014205,1.587278,1.012382,2.795369,23.540207


In [100]:
df_pred=prediction_df.filter(['prediction_label'], axis=1)
df_pred.head()

Unnamed: 0,prediction_label
0,23.11045
1,21.981104
2,22.841862
3,21.855214
4,23.540207


In [101]:
from sklearn import metrics

print('MAE', metrics.mean_absolute_error(y_test, df_pred))
print('MSE', metrics.mean_squared_error(y_test, df_pred))
print('RMSE', np.sqrt(metrics.mean_squared_error(y_test, df_pred)))
print('R2 Score', metrics.r2_score(y_test, df_pred))

MAE 0.15230827719580142
MSE 0.04010061747890428
RMSE 0.20025138571032233
R2 Score 0.9164843358699807


**PREDICT model for predicting HUMIDITY**

In [None]:
y_test.shape

In [None]:
X_test

In [None]:
prediction_df = predict_model(tuned_model_HUMIDITY, data=X_test)
prediction_df.head()

In [None]:
df_pred=prediction_df.filter(['prediction_label'], axis=1)
df_pred.head()

In [None]:
from sklearn import metrics

print('MAE', metrics.mean_absolute_error(y_test, df_pred))
print('MSE', metrics.mean_squared_error(y_test, df_pred))
print('RMSE', np.sqrt(metrics.mean_squared_error(y_test, df_pred)))
print('R2 Score', metrics.r2_score(y_test, df_pred))

**PREDICT model for predicting TEMPERATURE**

In [None]:
y_test.shape

In [None]:
X_test

In [None]:
prediction_df = predict_model(tuned_model_TEMPERATURE, data=X_test)
prediction_df.head()

In [None]:
df_pred=prediction_df.filter(['prediction_label'], axis=1)
df_pred.head()

In [None]:
from sklearn import metrics

print('MAE', metrics.mean_absolute_error(y_test, df_pred))
print('MSE', metrics.mean_squared_error(y_test, df_pred))
print('RMSE', np.sqrt(metrics.mean_squared_error(y_test, df_pred)))
print('R2 Score', metrics.r2_score(y_test, df_pred))

# Save / Load Model

In [74]:
#SAVE Model for repeating the EXPERIMENT, and predicting HUMIDITY

save_model(final_model_experiment_humidity, model_name='experiment_HUMIDITY_ML_model_rf_pycaret_9_lamps_with_avg_light_temp_sensor_temp_humidity_power_2_5_data')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=/tmp/joblib),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['sht40_humidity',
                                              'sht40_temperature',
                                              'shtc3_temperature', 'power'],
                                     transformer=SimpleImputer())),
                 ('categorical_imputer',
                  TransformerWrapper(include=[],
                                     transformer=SimpleImputer(fill_value='constant',
                                                               strategy='constant'))),
                 ('low_variance',
                  TransformerWrapper(exclude=[],
                                     transformer=VarianceThreshold(threshold=0))),
                 ('normalize', TransformerWrapper(transformer=StandardScaler())),
                 ('actual_estimator',
                  RandomForestRegressor(n_jobs=-1, random_state=1))]),
 'experiment_HUMID

In [102]:
#SAVE Model for repeating the EXPERIMENT, and predicting TEMPERATURE

save_model(final_model_experiment_temperature, model_name='experiment_TEMPERATURE_ML_model_rf_pycaret_9_lamps_with_avg_light_temp_sensor_temp_humidity_power_2_5_data')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=/tmp/joblib),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['sht40_humidity',
                                              'sht40_temperature',
                                              'shtc3_humidity', 'power'],
                                     transformer=SimpleImputer())),
                 ('categorical_imputer',
                  TransformerWrapper(include=[],
                                     transformer=SimpleImputer(fill_value='constant',
                                                               strategy='constant'))),
                 ('low_variance',
                  TransformerWrapper(exclude=[],
                                     transformer=VarianceThreshold(threshold=0))),
                 ('normalize', TransformerWrapper(transformer=StandardScaler())),
                 ('actual_estimator', LGBMRegressor(random_state=1))]),
 'experiment_TEMPERATURE_ML_model_rf_pycaret_9_lamps_with_

In [None]:
#SAVE Model predicting HUMIDITY

save_model(final_model_HUMIDITY, model_name='HUMIDITY_ML_model_rf_pycaret_9_lamps_with_avg_light_temp_sensor_temp_humidity_power_2_5_data')

In [None]:
#SAVE Model predicting TEMPERATURE

save_model(final_model_TEMPERATURE, model_name='TEMPERATURE_ML_model_rf_pycaret_9_lamps_with_avg_light_temp_sensor_temp_humidity_power_2_5_data')

In [75]:
#Load model of your choice; make sure that the model is saved or the file path is correct

loaded_model = load_model('experiment_HUMIDITY_ML_model_rf_pycaret_9_lamps_with_avg_light_temp_sensor_temp_humidity_power_2_5_data')
loaded_model

Transformation Pipeline and Model Successfully Loaded


Pipeline(memory=Memory(location=/tmp/joblib),
         steps=[('numerical_imputer',
                 TransformerWrapper(include=['sht40_humidity',
                                             'sht40_temperature',
                                             'shtc3_temperature', 'power'],
                                    transformer=SimpleImputer())),
                ('categorical_imputer',
                 TransformerWrapper(include=[],
                                    transformer=SimpleImputer(fill_value='constant',
                                                              strategy='constant'))),
                ('low_variance',
                 TransformerWrapper(exclude=[],
                                    transformer=VarianceThreshold(threshold=0))),
                ('normalize', TransformerWrapper(transformer=StandardScaler())),
                ('actual_estimator',
                 RandomForestRegressor(n_jobs=-1, random_state=1))])

In [77]:
#Run predictions on loaded model to test new data or random data

prediction_df = predict_model(loaded_model, data=df_single_lamp_avg_sensor_2_lamp_test_temp_humidity_clean)
predictions = prediction_df['prediction_label']
predictions.mean()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,Random Forest Regressor,0.6132,0.7848,0.8859,0.9914,0.0171,0.0121


52.02850317397731