In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
numerical_features=["mnth","hr","day"]
raw_data = pd.read_csv("preprocessed-df.csv")
pickle = pd.read_pickle("bikeRentalsModel.pkl")

In [3]:
raw_data["dteday"] = pd.to_datetime(raw_data["dteday"])

In [4]:
raw_data

Unnamed: 0,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,atemp,hum,windspeed,cnt,day,time_of_day,comfortable_temp,comfortable_humidity
0,2011-01-01 00:00:00,1,0,1,0,0,6,0,1,0.2879,0.81,0.0000,16,1,3,0,0
1,2011-01-01 01:00:00,1,0,1,1,0,6,0,1,0.2727,0.80,0.0000,40,1,3,0,0
2,2011-01-01 02:00:00,1,0,1,2,0,6,0,1,0.2727,0.80,0.0000,32,1,3,0,0
3,2011-01-01 03:00:00,1,0,1,3,0,6,0,1,0.2879,0.75,0.0000,13,1,3,0,0
4,2011-01-01 04:00:00,1,0,1,4,0,6,0,1,0.2879,0.75,0.0000,1,1,3,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17352,2012-12-31 19:00:00,1,1,12,19,0,1,1,2,0.2576,0.60,0.1642,119,31,1,0,0
17353,2012-12-31 20:00:00,1,1,12,20,0,1,1,2,0.2576,0.60,0.1642,89,31,1,0,0
17354,2012-12-31 21:00:00,1,1,12,21,0,1,1,1,0.2576,0.60,0.1642,90,31,1,0,0
17355,2012-12-31 22:00:00,1,1,12,22,0,1,1,1,0.2727,0.56,0.1343,61,31,1,0,0


In [5]:
cutoff_date = pd.to_datetime('2012-09-01')
data = raw_data.loc[raw_data.dteday < cutoff_date]
data_unseen = raw_data.loc[raw_data.dteday >= cutoff_date]

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions ' + str(data_unseen.shape))

Data for Modeling: (14469, 17)
Unseen Data For Predictions (2888, 17)


In [6]:
# import necessary libraries and load the data
from pycaret.regression import *

data.drop("dteday", axis=1, inplace=True)

# setup PyCaret regression environment
regression_setup = setup(data, target='cnt', use_gpu=True, 
                         numeric_features=["mnth","hr","day"], fold_strategy = 'timeseries',fold = 5,
                         preprocess=False, remove_outliers=True, remove_multicollinearity=True)

# compare all models and select the best
best_model = compare_models()

# Tune the hyperparameters of the best model using grid search
tuned_model = tune_model(best_model)

# Create an ensemble of the tuned model
ensembled_model = ensemble_model(tuned_model)

# evaluate the performance of the selected model
evaluate_model(ensembled_model)

# finalize the model for deployment
final_model = finalize_model(best_model)

# Saving our model
save_model(final_model, 'bikeRentalsModel')

# make predictions on new data
new_data = data.drop('cnt', axis=1)
predictions = predict_model(final_model, data=data_unseen)

Unnamed: 0_level_0,MAE,MSE,RMSE,R2,RMSLE,MAPE
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,36.0785,2883.1102,53.6946,0.9059,0.6099,0.9089
1,28.3047,1964.0892,44.318,0.936,0.4733,0.5342
2,25.0535,1538.7546,39.227,0.946,0.4868,0.5526
3,24.5799,1392.0666,37.3104,0.9519,0.4257,0.429
4,23.6027,1481.9348,38.4959,0.9458,0.4014,0.401
Mean,27.5239,1851.9911,42.6092,0.9371,0.4794,0.5651
Std,4.5593,551.7902,6.0373,0.0164,0.0722,0.1815


interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Hyperparameters', 'param…

Transformation Pipeline and Model Successfully Saved


Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE
0,CatBoost Regressor,50.7281,5184.7211,72.005,0.8906,0.6347,0.8432


In [10]:
predictions

Unnamed: 0,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,atemp,hum,windspeed,cnt,day,time_of_day,comfortable_temp,comfortable_humidity,Label
14469,2012-09-01 00:00:00,3,1,9,0,0,6,0,1,0.6818,0.62,0.1045,168,1,3,0,0,155.553635
14470,2012-09-01 01:00:00,3,1,9,1,0,6,0,1,0.6970,0.74,0.1343,79,1,3,0,0,102.919740
14471,2012-09-01 02:00:00,3,1,9,2,0,6,0,1,0.6515,0.70,0.1642,69,1,3,0,0,72.243571
14472,2012-09-01 03:00:00,3,1,9,3,0,6,0,1,0.6515,0.70,0.1045,35,1,3,0,0,42.394468
14473,2012-09-01 04:00:00,3,1,9,4,0,6,0,1,0.6515,0.70,0.0000,12,1,3,0,0,28.100533
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17352,2012-12-31 19:00:00,1,1,12,19,0,1,1,2,0.2576,0.60,0.1642,119,31,1,0,0,317.307176
17353,2012-12-31 20:00:00,1,1,12,20,0,1,1,2,0.2576,0.60,0.1642,89,31,1,0,0,217.282169
17354,2012-12-31 21:00:00,1,1,12,21,0,1,1,1,0.2576,0.60,0.1642,90,31,1,0,0,175.613937
17355,2012-12-31 22:00:00,1,1,12,22,0,1,1,1,0.2727,0.56,0.1343,61,31,1,0,0,128.593634


In [7]:
# Creating the line chart with Plotly Express
fig = px.line(predictions, x='dteday', y=['Label'], title='Actual vs Predicted Bike Rentals')

# Creating a new trace for the actual values
actual_trace = go.Scatter(x=raw_data['dteday'], y=data['cnt'], name='Actual')

# Adding the actual and mean error traces to the figure
fig.add_trace(actual_trace)

# Displaying the chart
fig.show()

In [8]:
# Combining all data
all_data = pd.concat([data, data_unseen])

# Creating the line chart with Plotly Express
fig = px.line(predictions, x='dteday', y=['Label'], title='Actual vs Predicted Bike Rentals (Zoomed-In)')

# Creating a new trace for the actual values
actual_trace = go.Scatter(x=all_data['dteday'], y=all_data['cnt'], name='Actual')

# Adding the actual and mean error traces to the figure
fig.add_trace(actual_trace)

# Setting the range for the x-axis to show a zoomed in region
fig.update_xaxes(range=['2012-12-01 00:00:00', '2012-12-31 00:00:00'])

# Displaying the chart
fig.show()

In [9]:
import pickle

pickle.dump(final_model, open('bikeRentalsModel.pkl', 'wb'))