In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.express as px
import plotly.graph_objs as go

# Bike Sharing
On this notebook, we will try to predict number of total rental using machine learning algorithms. Before this one, we will do feature engineering and exploratory data analysis for examine the data.

* datetime - hourly date + timestamp  
* season -  1 = spring, 2 = summer, 3 = fall, 4 = winter 
* holiday - whether the day is considered a holiday
*  workingday - whether the day is neither a weekend nor holiday
* weather - 1: Clear, Few clouds, Partly cloudy, Partly cloudy
* 2: Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist
* 3: Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds
* 4: Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog 
*  temp - temperature in Celsius
*  atemp - "feels like" temperature in Celsius
*  humidity - relative humidity
*  windspeed - wind speed
*  casual - number of non-registered user rentals initiated
*  registered - number of registered user rentals initiated
*  count - number of total rentals

In [None]:
df = pd.read_csv('../input/bike-sharing-demand/train.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.dtypes

In [None]:
df.isnull().sum()

In [None]:
df  = df.drop(["datetime"],axis=1)

In [None]:
plt.figure(figsize=(10,6))
sns.distplot(df['count'], color='r')
plt.show()

In [None]:
fig = px.histogram(
    df, 
    "temp", 
    nbins=80, 
    width=800,
    height=500
)

fig.show()

In [None]:
fig = px.histogram(
    df, 
    "atemp", 
    nbins=80, 
    width=800,
    height=500
)

fig.show()

In [None]:
fig = px.histogram(
    df, 
    "humidity", 
    nbins=80, 
    width=800,
    height=500
)

fig.show()

In [None]:
fig = px.histogram(
    df, 
    "windspeed", 
    nbins=60, 
    width=800,
    height=500
)

fig.show()

In [None]:
fig = px.histogram(
    df, 
    "casual", 
    nbins=50, 
    width=800,
    height=500
)

fig.show()

In [None]:
fig = px.histogram(
    df, 
    "registered", 
    nbins=50, 
    width=800,
    height=500
)

fig.show()

In [None]:
fig = px.pie(df, names = "season", title = "season", color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()

In [None]:
fig = px.pie(df, names = "holiday", title = "holiday", color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()

In [None]:
fig = px.pie(df, names = "workingday", title = "workingday", color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()

In [None]:
fig = px.pie(df, names = "weather", title = "weather", color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()

In [None]:
fig = px.scatter(df, x='temp' , y='count')
fig.show()

In [None]:
fig = px.scatter(df, x='atemp' , y='count')
fig.show()

In [None]:
fig = px.scatter(df, x='humidity' , y='count')
fig.show()

In [None]:
fig = px.scatter(df, x='windspeed' , y='count')
fig.show()

In [None]:
fig = px.scatter(df, x='casual' , y='count')
fig.show()

In [None]:
fig = px.scatter(df, x='registered' , y='count')
fig.show()

In [None]:
sns.barplot('season','count',data=df,palette='hls')

In [None]:
sns.barplot('holiday','count',data=df,palette='hls')

In [None]:
sns.barplot('workingday','count',data=df,palette='hls')

In [None]:
sns.barplot('weather','count',data=df,palette='hls')

In [None]:
df.corr
f,ax = plt.subplots(figsize=(50,30))
sns.heatmap(df.corr(), annot =True, linewidth =".5", fmt =".2f", cmap='cividis')
plt.show()

In [None]:
df=pd.get_dummies(df)
df.head()

In [None]:
X = df.drop(columns=["count"])
y = df["count"]

In [None]:
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=44, shuffle =True)

In [None]:


from sklearn.ensemble import GradientBoostingRegressor



GBRModel = GradientBoostingRegressor(n_estimators=100,max_depth=2,learning_rate = 1.5 ,random_state=33)
GBRModel.fit(X_train, y_train)

print('GBRModel Train Score is : ' , GBRModel.score(X_train, y_train))
print('GBRModel Test Score is : ' , GBRModel.score(X_test, y_test))
print('----------------------------------------------------')

