<a href="https://colab.research.google.com/github/spatidar2109/ML_Learing/blob/main/Stacking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
##### This code is for stacking of different models on train dataset
## All the predictions from above are them concatenated 
## next level model like RF is applied on predictions 
## this model is then fit on test dataset

import pandas as pd
import numpy as np
df = pd.read_csv("/content/train.csv",parse_dates=['datetime'])

### Pre-processing train data till scaling
df['year'] = df['datetime'].dt.year
df['month'] = df['datetime'].dt.month
df['day'] = df['datetime'].dt.day
df['hour'] = df['datetime'].dt.hour
df['weekday']=df['datetime'].dt.weekday

df['season'] = df['season'].astype('category')
df['weather'] = df['weather'].astype('category')
df.drop(columns=['datetime','casual', 'registered',],inplace=True)

dum_df = pd.get_dummies(df, drop_first=True)
X = dum_df.drop('count',axis=1)
y = dum_df['count']

In [2]:
#### Model-1 linear regression  ####

from sklearn.linear_model import LinearRegression

model_lr = LinearRegression()
model_lr.fit(X,y)
pred_lr = model_lr.predict(X)

In [3]:
pred_lr

array([-97.42281772, -94.17825274, -86.50897409, ..., 311.03391605,
       318.1289534 , 311.38615663])

In [4]:
pred_lr [pred_lr <0] = 0

In [5]:
pred_lr

array([  0.        ,   0.        ,   0.        , ..., 311.03391605,
       318.1289534 , 311.38615663])

In [6]:
#### Model-2 SVR 'linear'  ######
from sklearn.svm import SVR
model_svrl = SVR(kernel='linear')
model_svrl.fit(X,y)
pred_svrl=model_svrl.predict(X)
pred_svrl[pred_svrl <0] = 0

In [9]:
pred_svrl

array([  0.        ,   0.        ,   0.        , ..., 237.55229925,
       244.99497964, 236.6829686 ])

In [10]:
#### Model-3 SVR 'radial' ######
model_svrr = SVR(kernel='rbf')
model_svrr.fit(X,y)
pred_svrr=model_svrr.predict(X)
pred_svrr[pred_svrr <0] = 0

#### Model-4 Decision Tree Regressor ######
from sklearn.tree import DecisionTreeRegressor
model_dtr= DecisionTreeRegressor()
model_dtr.fit(X,y)
pred_dtr=model_dtr.predict(X)
pred_dtr[pred_dtr<0]=0

In [11]:
pred_svrr , pred_dtr

(array([143.57889329, 143.60139608, 143.61851007, ..., 144.83573295,
        144.84056688, 144.67251916]),
 array([ 16.,  40.,  32., ..., 168., 129.,  88.]))

In [12]:
###### Combining all the predictions #####
pred_lr=pd.Series(pred_lr)
pred_svrl=pd.Series(pred_svrl)
pred_svrr=pd.Series(pred_svrr)
pred_dtr=pd.Series(pred_dtr)
comb_pred=pd.concat([pred_lr,pred_svrl,pred_svrr,pred_dtr],axis=1)
#(pred_lr,pred_svrl,pred_svrr,pred_svrs,pred_dt
comb_pred.columns=['pred_lr','pred_svrl','pred_svrr','pred_dtr']

In [13]:
comb_pred.head()

Unnamed: 0,pred_lr,pred_svrl,pred_svrr,pred_dtr
0,0.0,0.0,143.578893,16.0
1,0.0,0.0,143.601396,40.0
2,0.0,0.0,143.61851,32.0
3,0.0,0.0,143.834334,13.0
4,0.0,0.0,143.851451,1.0


In [14]:
###### Now level 2 model RF ############################################################
from sklearn.ensemble import RandomForestRegressor
model=RandomForestRegressor(random_state=1200)
model.fit(comb_pred,y)

RandomForestRegressor(random_state=1200)

In [15]:
## preprocessing test data ############
df_test = pd.read_csv("/content/test.csv",parse_dates=['datetime'])
df_test['year'] = df_test['datetime'].dt.year
df_test['month'] = df_test['datetime'].dt.month
df_test['day'] = df_test['datetime'].dt.day
df_test['hour'] = df_test['datetime'].dt.hour
df_test['weekday']=df_test['datetime'].dt.weekday

df_test['season'] = df_test['season'].astype('category')
df_test['weather'] = df_test['weather'].astype('category')
#df_test.drop(columns=['datetime','casual', 'registered'],inplace=True)
df_test.drop(columns=['datetime'],inplace=True)
dum_df_test = pd.get_dummies(df_test, drop_first=True)

In [16]:
dum_df_test.head()

Unnamed: 0,holiday,workingday,temp,atemp,humidity,windspeed,year,month,day,hour,weekday,season_2,season_3,season_4,weather_2,weather_3,weather_4
0,0,1,10.66,11.365,56,26.0027,2011,1,20,0,3,0,0,0,0,0,0
1,0,1,10.66,13.635,56,0.0,2011,1,20,1,3,0,0,0,0,0,0
2,0,1,10.66,13.635,56,0.0,2011,1,20,2,3,0,0,0,0,0,0
3,0,1,10.66,12.88,56,11.0014,2011,1,20,3,3,0,0,0,0,0,0
4,0,1,10.66,12.88,56,11.0014,2011,1,20,4,3,0,0,0,0,0,0


In [17]:
############################################################################

pred_lr = model_lr.predict(dum_df_test)
pred_lr [pred_lr <0] = 0

pred_svrl=model_svrl.predict(dum_df_test)
pred_svrl[pred_svrl <0] = 0

pred_svrr=model_svrr.predict(dum_df_test)
pred_svrr[pred_svrr <0] = 0

pred_dtr=model_dtr.predict(dum_df_test)
pred_dtr[pred_dtr<0]=0

###### Combining all the predictions for test set #####
pred_lr=pd.Series(pred_lr)
pred_svrl=pd.Series(pred_svrl)
pred_svrr=pd.Series(pred_svrr)
pred_dtr=pd.Series(pred_dtr)
comb_pred_test=pd.concat([pred_lr,pred_svrl,pred_svrr,pred_dtr],axis=1)
#(pred_lr,pred_svrl,pred_svrr,pred_svrs,pred_dt
comb_pred_test.columns=['pred_lr','pred_svrl','pred_svrr','pred_dtr']

In [18]:
comb_pred_test.head()

Unnamed: 0,pred_lr,pred_svrl,pred_svrr,pred_dtr
0,0.0,0.0,144.512929,17.0
1,0.0,0.0,144.454733,10.0
2,0.0,0.0,144.471852,3.0
3,0.0,0.0,144.524706,7.0
4,0.0,0.0,144.541827,5.0


In [19]:
pred_testdata=model.predict(comb_pred_test)
test = pd.read_csv("/content/test.csv")
submit=pd.DataFrame({'datetime':test['datetime'],'count':pred_testdata})