In [None]:
import pandas as pd
import tensorflow as tf
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt
import warnings 
warnings.filterwarnings('ignore')

Reading data

In [None]:
train=pd.read_csv('../input/bike-sharing-demand/train.csv')
test=pd.read_csv('../input/bike-sharing-demand/test.csv')

In [None]:
train.head()

Adding features from datetime

In [None]:
from datetime import datetime,date
train['hour']=train['day']=train['month']=train['year']=train['dayofweek']=''
test['hour']=test['day']=test['month']=test['year']=test['dayofweek']=''
datetimetest=test['datetime']
for i in range(len(train)):
  train['hour'][i]=int(datetime.strptime(train['datetime'][i], '%Y-%m-%d %H:%M:%S').hour)
  train['day'][i]=int(datetime.strptime(train['datetime'][i], '%Y-%m-%d %H:%M:%S').day)
  train['dayofweek'][i]=int(((datetime.strptime(train['datetime'][i], '%Y-%m-%d %H:%M:%S').day)%5))
  train['month'][i]=int(datetime.strptime(train['datetime'][i], '%Y-%m-%d %H:%M:%S').month)
  train['year'][i]=int(datetime.strptime(train['datetime'][i], '%Y-%m-%d %H:%M:%S').year)
for i in range(len(test)):
  test['hour'][i]=int(datetime.strptime(test['datetime'][i], '%Y-%m-%d %H:%M:%S').hour)
  test['day'][i]=int(datetime.strptime(test['datetime'][i], '%Y-%m-%d %H:%M:%S').day)
  test['dayofweek'][i]=int(((datetime.strptime(test['datetime'][i], '%Y-%m-%d %H:%M:%S').day)%5))
  test['month'][i]=int(datetime.strptime(test['datetime'][i], '%Y-%m-%d %H:%M:%S').month)
  test['year'][i]=int(datetime.strptime(test['datetime'][i], '%Y-%m-%d %H:%M:%S').year)
train.drop('datetime',axis=1,inplace=True)
test.drop('datetime',axis=1,inplace=True)

In [None]:
train=train.astype(int)
test=test.astype(int)

Scaling

In [None]:
from sklearn.preprocessing import MinMaxScaler
 
# Fit your data on the scaler object
col=['temp','atemp','humidity','windspeed','year']
for i in col:
  scaler = MinMaxScaler()
  train[col]=scaler.fit_transform(train[col])
  test[col]=scaler.transform(test[col])

In [None]:
train.head()

Splitting into test/train datasets

In [None]:
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
col=train.columns.tolist()
col.remove("count")
col.remove("casual")
col.remove("registered")
X=train[col]
y=train["count"]
train_X, val_X, train_y, val_y = train_test_split(X, y,random_state=1,test_size=0.20,shuffle=True)

Building model

In [None]:
def model(model):
    model.fit(train_X,train_y)
    modelpred=model.predict(val_X)
    return(mean_absolute_error(abs(modelpred).astype(int), val_y)),modelpred

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
import lightgbm as lgb
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor  
from sklearn import svm
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import GradientBoostingRegressor

ada=AdaBoostRegressor()
bag=BaggingRegressor()
gbr=GradientBoostingRegressor()
gpr=GaussianProcessRegressor()
svm = svm.SVR()
knc = KNeighborsRegressor()
lgbm = lgb.LGBMRegressor()
mlp = MLPRegressor(hidden_layer_sizes=(10,4,))
rf = RandomForestRegressor()
lr=LinearRegression()
dt=DecisionTreeRegressor()

models=[knc,lgbm,mlp,rf,lr,dt,svm,gpr,ada,bag,gbr]
modelpreds=[]
for i in models:
  accuracy,modelpred=(model(i))
  modelpreds.append(modelpred)
  print(accuracy)

We see that the best 2 models are lightgbm and Random forest

In [None]:
modelpredss=(modelpreds[1]+modelpreds[4])/2

In [None]:
mean_absolute_error(modelpredss, val_y)

The mae using both lgbm and rf is lower than using either..So lets use the average prediction of both

Making predictions on test dataset

In [None]:
predslbm=lgbm.predict(test)
predsrf=rf.predict(test)
preds=abs((predsrf+predslbm)/2).astype(int)

Submitting csv

In [None]:
submission = pd.DataFrame({
        "datetime": datetimetest,
        "count": preds
    })
submission.to_csv('bike_prediction_output.csv', index=False)