## This is a continuation of the Bike Rental Demand Analysis project where we will use the saved models to perform predictions. 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from datetime import datetime
from datetime import date
import calendar
import pickle

### Loading the saved models

In [2]:
model1 = open(r'C:\Users\KIIT\Documents\LGM-Soc contributions\Ola Bike Ride Request Demand Forecast\Model/random_forest_model.pkl','rb')
rf_model = pickle.load(model1)

In [3]:
model2 = open(r'C:\Users\KIIT\Documents\LGM-Soc contributions\Ola Bike Ride Request Demand Forecast\Model/tuned_xgb_model.pkl','rb')
xgb_model = pickle.load(model2)

### Loading the test data

In [4]:
test_df = pd.read_csv(r'C:\Users\KIIT\Documents\LGM-Soc contributions\Ola Bike Ride Request Demand Forecast\Dataset/test.csv')

In [5]:
test_df

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered
0,20-01-2011 00:00,1,0,1,1,10.66,11.365,56,26.0027,3,13
1,20-01-2011 01:00,1,0,1,1,10.66,13.635,56,0.0000,8,32
2,20-01-2011 02:00,1,0,1,1,10.66,13.635,56,0.0000,5,27
3,20-01-2011 03:00,1,0,1,1,10.66,12.880,56,11.0014,3,10
4,20-01-2011 04:00,1,0,1,1,10.66,12.880,56,11.0014,0,1
...,...,...,...,...,...,...,...,...,...,...,...
6488,31-12-2012 19:00,1,0,1,2,10.66,12.880,60,11.0014,38,137
6489,31-12-2012 20:00,1,0,1,2,10.66,12.880,60,11.0014,44,165
6490,31-12-2012 21:00,1,0,1,1,10.66,12.880,60,11.0014,34,254
6491,31-12-2012 22:00,1,0,1,1,10.66,13.635,56,8.9981,58,554


In [6]:
test_df.shape

(6493, 11)

In [7]:
test_df.isnull().sum()

datetime      0
season        0
holiday       0
workingday    0
weather       0
temp          0
atemp         0
humidity      0
windspeed     0
casual        0
registered    0
dtype: int64

In [8]:
def change_into_datetime(col):
    test_df[col] = pd.to_datetime(test_df[col])

In [9]:
for i in ['datetime']:
    change_into_datetime(i)

In [10]:
test_df['year'] = test_df['datetime'].dt.year
test_df['month'] = test_df['datetime'].dt.month
test_df['date'] = test_df['datetime'].dt.date
test_df['hour'] = test_df['datetime'].dt.hour
test_df['day of the week'] = test_df['datetime'].dt.dayofweek

In [11]:
dict = {2011 :'1', 2012 :'2'}

In [12]:
test_df['year'] = test_df['year'].map(dict)
test_df['year'].unique()

array(['1', '2'], dtype=object)

In [13]:
test_df.drop('datetime',axis=1,inplace=True)

In [14]:
test_df["year"] = pd.to_numeric(test_df["year"])

In [15]:
season = pd.get_dummies(test_df['season'],prefix='season',drop_first=True)
weather = pd.get_dummies(test_df['weather'],prefix='weather',drop_first=True)
holiday = pd.get_dummies(test_df['holiday'],prefix='holiday',drop_first=True)
month = pd.get_dummies(test_df['month'],prefix='month',drop_first=True)
hour = pd.get_dummies(test_df['hour'],prefix='hour',drop_first=True)
test_df = pd.concat([test_df,season,weather,holiday,month,hour],axis=1)
test_df.drop(['season','weather','holiday','month','hour'], axis=1,inplace=True)

In [16]:
test_df.drop(['date','day of the week','year','windspeed','workingday'], axis=1, inplace=True)

In [17]:
test_df

Unnamed: 0,temp,atemp,humidity,casual,registered,season_2,season_3,season_4,weather_2,weather_3,...,hour_14,hour_15,hour_16,hour_17,hour_18,hour_19,hour_20,hour_21,hour_22,hour_23
0,10.66,11.365,56,3,13,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,10.66,13.635,56,8,32,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,10.66,13.635,56,5,27,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,10.66,12.880,56,3,10,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,10.66,12.880,56,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6488,10.66,12.880,60,38,137,0,0,0,1,0,...,0,0,0,0,0,1,0,0,0,0
6489,10.66,12.880,60,44,165,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,0
6490,10.66,12.880,60,34,254,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
6491,10.66,13.635,56,58,554,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [18]:
test_df.dtypes

temp          float64
atemp         float64
humidity        int64
casual          int64
registered      int64
season_2        uint8
season_3        uint8
season_4        uint8
weather_2       uint8
weather_3       uint8
weather_4       uint8
holiday_1       uint8
month_2         uint8
month_3         uint8
month_4         uint8
month_5         uint8
month_6         uint8
month_7         uint8
month_8         uint8
month_9         uint8
month_10        uint8
month_11        uint8
month_12        uint8
hour_1          uint8
hour_2          uint8
hour_3          uint8
hour_4          uint8
hour_5          uint8
hour_6          uint8
hour_7          uint8
hour_8          uint8
hour_9          uint8
hour_10         uint8
hour_11         uint8
hour_12         uint8
hour_13         uint8
hour_14         uint8
hour_15         uint8
hour_16         uint8
hour_17         uint8
hour_18         uint8
hour_19         uint8
hour_20         uint8
hour_21         uint8
hour_22         uint8
hour_23   

In [19]:
test_df.shape

(6493, 46)

## Getting the predictions using the pre trained hypertuned Random Forest model

In [20]:
rf_preds = rf_model.predict(test_df)

In [21]:
rf_preds

array([2.78487935, 3.69970354, 3.46455951, ..., 5.66716113, 6.41217655,
       6.29324368])

 ## Getting the predictions using the pre trained hypertuned Random Forest model

In [22]:
xgb_preds = xgb_model.predict(test_df)

In [23]:
xgb_preds

array([2.8048124, 3.707602 , 3.4816105, ..., 5.649366 , 6.366452 ,
       6.2456656], dtype=float32)

### Saving the predictions in a dataset

In [24]:
test_df = pd.read_csv(r'C:\Users\KIIT\Documents\LGM-Soc contributions\Ola Bike Ride Request Demand Forecast\Dataset/test.csv')

In [25]:
timestamp = test_df['datetime']

In [28]:
rf_predictions = pd.DataFrame()
rf_predictions['timestamp'] = timestamp
rf_predictions['Ola Bike Demands'] = rf_preds

In [29]:
rf_predictions

Unnamed: 0,timestamp,Ola Bike Demands
0,20-01-2011 00:00,2.784879
1,20-01-2011 01:00,3.699704
2,20-01-2011 02:00,3.464560
3,20-01-2011 03:00,2.564250
4,20-01-2011 04:00,0.000000
...,...,...
6488,31-12-2012 19:00,5.172612
6489,31-12-2012 20:00,5.340365
6490,31-12-2012 21:00,5.667161
6491,31-12-2012 22:00,6.412177


In [30]:
rf_predictions.to_csv(r'C:\Users\KIIT\Documents\LGM-Soc contributions\Ola Bike Ride Request Demand Forecast\Dataset/Random_forest prediction forecast.csv',header=True,index=False)

In [31]:
xgb_predictions = pd.DataFrame()
xgb_predictions['timestamp'] = timestamp
xgb_predictions['Ola Bike Demands'] = xgb_preds

In [32]:
xgb_predictions

Unnamed: 0,timestamp,Ola Bike Demands
0,20-01-2011 00:00,2.804812
1,20-01-2011 01:00,3.707602
2,20-01-2011 02:00,3.481611
3,20-01-2011 03:00,2.570308
4,20-01-2011 04:00,-0.001067
...,...,...
6488,31-12-2012 19:00,5.158451
6489,31-12-2012 20:00,5.340024
6490,31-12-2012 21:00,5.649366
6491,31-12-2012 22:00,6.366452


In [33]:
xgb_predictions.to_csv(r'C:\Users\KIIT\Documents\LGM-Soc contributions\Ola Bike Ride Request Demand Forecast\Dataset/XGBoost prediction forecast.csv',header=True,index=False)