In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
df = pd.read_csv('../input/bike-sharing-demand/train.csv',parse_dates=['datetime'])
df.head()

In [None]:
y = df['count']
x = df.drop(['casual','registered','count'],axis=1)

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=42)

In [None]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

In [None]:
from sklearn.linear_model import PoissonRegressor
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.metrics import mean_squared_log_error,mean_squared_error,mean_absolute_error
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import KBinsDiscretizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor

In [None]:
def extract(df):
        df['day'] = df.datetime.dt.day
        df['hour'] = df.datetime.dt.hour
        df['weekday'] = df.datetime.dt.weekday
        df['year'] = df.datetime.dt.year
        df['month'] = df.datetime.dt.month
        
        return pd.concat([df[['day']],pd.get_dummies(df[['year','month','weekday','hour']],columns=['year','month','weekday','hour'])],axis=1)

In [None]:
feature_transformer = ColumnTransformer([
    ('do_nothing', 'passthrough', ['holiday', 'workingday']),
    ('time_extact', FunctionTransformer(extract), ['datetime']),
    ('one_hot_encoding', OneHotEncoder(sparse = False), ['season','weather']),
    ('bins',KBinsDiscretizer(n_bins= 7, encode = 'onehot-dense', strategy = 'quantile'),['atemp','humidity','windspeed'])

])

In [None]:
x_train_trans = feature_transformer.fit_transform(x_train)
x_test_trans = feature_transformer.transform(x_test)

In [None]:
lr =LinearRegression()
lr.fit(x_train_trans,y_train)
print(lr.score(x_train_trans,y_train))
print(lr.score(x_test_trans,y_test))

In [None]:
import numpy as np
ytrainlog = np.log1p(y_train)
ytestlog = np.log1p(y_test)
lr.fit(x_train_trans,ytrainlog)
y_pred_log = lr.predict(x_test_trans)
y_pred = np.exp(y_pred_log)-1

print(lr.score(x_train_trans,ytrainlog))
print(lr.score(x_test_trans,ytestlog))

print(mean_squared_error(y_test,y_pred))
print(mean_absolute_error(y_test,y_pred))
print(mean_squared_log_error(y_test,y_pred))


In [None]:
# poisson wo poly exp
po = PoissonRegressor(max_iter=300)
po.fit(x_train_trans,y_train)

print(po.score(x_train_trans,y_train))
print(po.score(x_test_trans,y_test))
y_pred= po.predict(x_test_trans)

print(mean_squared_error(y_test,y_pred))
print(mean_absolute_error(y_test,y_pred))
print(mean_squared_log_error(y_test,y_pred))


In [None]:
# poisson with poly exp
poly = PolynomialFeatures(include_bias=False,interaction_only=True)
x_train_trans_p = poly.fit_transform(x_train_trans)
x_test_trans_p = poly.transform(x_test_trans)
po = PoissonRegressor(max_iter=2000)

po.fit(x_train_trans_p,y_train)
print(po.score(x_train_trans_p,y_train))
print(po.score(x_test_trans_p,y_test))
y_pred= po.predict(x_test_trans_p)

print(mean_squared_error(y_test,y_pred))
print(mean_absolute_error(y_test,y_pred))
print(mean_squared_log_error(y_test,y_pred))

In [None]:
rf = RandomForestRegressor(n_estimators=300,max_depth=100)
rf.fit(x_train_trans,y_train)

print(rf.score(x_train_trans,y_train))
print(rf.score(x_test_trans,y_test))
y_pred= rf.predict(x_test_trans)

print(mean_squared_error(y_test,y_pred))
print(mean_absolute_error(y_test,y_pred))
print(mean_squared_log_error(y_test,y_pred))


In [None]:
#Linear regression with polynomial feature expansion
poly = PolynomialFeatures(include_bias=False,interaction_only=True)
x_train_trans_p = poly.fit_transform(x_train_trans)
x_test_trans_p = poly.transform(x_test_trans)
ytrainlog = np.log1p(y_train)
ytestlog = np.log1p(y_test)
lr.fit(x_train_trans_p,ytrainlog)
y_pred_log = lr.predict(x_test_trans_p)
y_pred = np.exp(y_pred_log)-1

print(lr.score(x_train_trans_p,ytrainlog))
print(lr.score(x_test_trans_p,ytestlog))

print(mean_squared_error(y_test,y_pred))
print(mean_absolute_error(y_test,y_pred))
print(mean_squared_log_error(y_test,y_pred))


In [None]:
#Ridge regression with polynomial feature expansion
ridge = Ridge()

poly = PolynomialFeatures(include_bias=False,interaction_only=True)
x_train_trans_p = poly.fit_transform(x_train_trans)
x_test_trans_p = poly.transform(x_test_trans)
ytrainlog = np.log1p(y_train)
ytestlog = np.log1p(y_test)
ridge.fit(x_train_trans_p,ytrainlog)
y_pred_log = ridge.predict(x_test_trans_p)
y_pred = np.exp(y_pred_log)-1

print(ridge.score(x_train_trans_p,ytrainlog))
print(ridge.score(x_test_trans_p,ytestlog))

print(mean_squared_error(y_test,y_pred))
print(mean_absolute_error(y_test,y_pred))
print(mean_squared_log_error(y_test,y_pred))


In [None]:
test_df = pd.read_csv('../input/bike-sharing-demand/test.csv',parse_dates=['datetime'])
test_df.head()

In [None]:
# Fitting Ridge regression on test data
test_df_trans = feature_transformer.transform(test_df)
test_df_trans_p = poly.transform(test_df_trans)
test_pred_log = ridge.predict(test_df_trans_p)
test_pred_log

In [None]:
y_pred_test = np.exp(test_pred_log)-1
y_pred_test

In [None]:
np.unique(test_pred_log),np.unique(y_pred_test)

In [None]:
# Fitting Poisson regression on test data
test_pred_poisson = po.predict(test_df_trans_p)
y_pred_test_poisson = np.exp(test_pred_log)-1
y_pred_test_poisson

In [None]:
np.unique(test_pred_poisson),np.unique(y_pred_test_poisson)

In [None]:
len(y_pred_test)

In [None]:
test_df['count'] = y_pred_test
test_df

In [None]:
test_df[['datetime','count']].to_csv('submission.csv',index=0)

In [None]:
pd.read_csv('./submission.csv')