In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
# -*- coding: utf-8 -*-
"""Bike_Sharing_Demand_Prediction_Capstone_Project(Shubhashis).ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1dsj7Gp61EvI0xCfiD2PIlrglNNYQlFlX

#<font color='red'> <b> Project : Bike Sharing Demand Prediction </b>

## <font color='Green'> <b>  Problem Description </b>

### Currently Rental bikes are introduced in many urban cities for the enhancement of mobility comfort. It is important to make the rental bike available and accessible to the public at the right time as it lessens the waiting time. Eventually, providing the city with a stable supply of rental bikes becomes a major concern. The crucial part is the prediction of bike count required at each hour for the stable supply of rental bikes.

## <font color='green'><b> Data Description </b>

### <b> The dataset contains weather information (Temperature, Humidity, Windspeed, Visibility, Dewpoint, Solar radiation, Snowfall, Rainfall), the number of bikes rented per hour and date information.</b>


### <b>Attribute Information: </b>

* ### Date : year-month-day
* ### Rented Bike count - Count of bikes rented at each hour
* ### Hour - Hour of he day
* ### Temperature-Temperature in Celsius
* ### Humidity - %
* ### Windspeed - m/s
* ### Visibility - 10m
* ### Dew point temperature - Celsius
* ### Solar radiation - MJ/m2
* ### Rainfall - mm
* ### Snowfall - cm
* ### Seasons - Winter, Spring, Summer, Autumn
* ### Holiday - Holiday/No holiday
* ### Functional Day - NoFunc(Non Functional Hours), Fun(Functional hours)

# <font color='Green'>**Loading Dataset and Importing Modules**
"""

# Commented out IPython magic to ensure Python compatibility.
#importing the modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# %matplotlib inline
import seaborn as sns

from datetime import datetime
import datetime as dt

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MultiLabelBinarizer

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor

from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import RandomizedSearchCV

from sklearn import metrics
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import log_loss

import warnings
warnings.filterwarnings('ignore')

"""##<font color='Green'>importing the datset"""

#mounting google drive for import the dataset
from google.colab import drive
drive.mount('/content/drive')

#loading the dataset
df=pd.read_csv('/content/drive/MyDrive/1.Project/Bike Sharing Demand Prediction/SeoulBikeData.csv',encoding ='latin')

"""
# <font color='Green'>**Getting insight from data**"""

# top 5 rows
df.head()

#shape of dataset
print(df.shape)

# Data set columns
df.columns

#info of data set
df.info()

# the unique value for the dataset
df.nunique()

"""##<font color='blue'>Features description

**Date** : *The date of the day, during 365 days from 01/12/2017 to 30/11/2018, formating in DD/MM/YYYY, type : str*, we need to convert into datetime format.

**Rented Bike Count** : *Number of rented bikes per hour which our dependent variable and we need to predict that, type : int*

**Hour**: *The hour of the day, starting from 0-23 it's in a digital time format, type : int, we need to convert it into category data type.*

**Temperature(°C)**: *Temperature in Celsius, type : Float*

**Humidity(%)**: *Humidity in the air in %, type : int*

**Wind speed (m/s)** : *Speed of the wind in m/s, type : Float*

**Visibility (10m)**: *Visibility in m, type : int*

**Dew point temperature(°C)**: *Temperature at the beggining of the day, type : Float*

**Solar Radiation (MJ/m2)**: *Sun contribution, type : Float*

**Rainfall(mm)**: *Amount of raining in mm, type : Float*

**Snowfall (cm)**: *Amount of snowing in cm, type : Float*

**Seasons**: *Season of the year, type : str, there are only 4 season's in data *. 

**Holiday**: *If the day  is holiday period or not, type: str*

**Functioning Day**: *If the day is a Functioning Day or not, type : str*

# <font color='blue'>**Preprocessing the dataset**

##<font color='green'>Missing values
"""

#check for count of missing values in each column.
df.isnull().sum()

"""##<font color='green'>Duplicate values"""

# Checking Duplicate Values
len(df[df.duplicated()])

"""##<font color='blue'>changing column name"""

#Rename the complex columns name
df=df.rename(columns={'Rented Bike Count':'Rented_Bike_Count',
                                'Temperature(°C)':'Temperature',
                                'Humidity(%)':'Humidity',
                                'Wind speed (m/s)':'Wind_speed',
                                'Visibility (10m)':'Visibility',
                                'Dew point temperature(°C)':'Dew_point_temperature',
                                'Solar Radiation (MJ/m2)':'Solar_Radiation',
                                'Rainfall(mm)':'Rainfall',
                                'Snowfall (cm)':'Snowfall',
                                'Functioning Day':'Functioning_Day'})

"""##<font color='orange'> Extracting ( year,month,day) from date colums"""

# Changing the "Date" column into three "year","month","day" column
df['Date'] = df['Date'].apply(lambda x:dt.datetime.strptime(x,"%d/%m/%Y"))

df['year'] = df['Date'].dt.year
df['month'] = df['Date'].dt.month
df['day'] = df['Date'].dt.day_name()

#creating a new column of "weekdays_weekend" and drop the column "Date","day","year"
df['weekdays_weekend']=df['day'].apply(lambda x : 1 if x=='Saturday' or x=='Sunday' else 0 )
df=df.drop(columns=['Date','day','year'],axis=1)

df.head()

df.info()

df['weekdays_weekend'].value_counts()

"""*<font color='red'> ***As "Hour","month","weekdays_weekend" column are show as a integer data type but actually it is a category data type , so we need to change this data type.While performing further analysis and correleted with these column the values are not actually true so we can mislead by this.***"""

#changing integer to categorical values
df[['Hour','month','weekdays_weekend']].apply(lambda x: x.astype('category'))
df.info()



"""# <font color='red'> **Exploratory Data Analysis**

<font color='Green'> **From the below bar plot we can clearly say that from  the month 5 to 10 the demand of the rented bike is high as compare to other months.These are summer season months.**
"""

fig,ax=plt.subplots(figsize=(20,8))
sns.barplot(data=df,x='month',y='Rented_Bike_Count',ax=ax,capsize=.2)
ax.set(title='Count of Rented bikes acording to Month ')

"""####<font color='blue'> **weekdays vs weekend**"""

#anlysis of data by vizualisation
fig,ax=plt.subplots(figsize=(10,8))
sns.barplot(data=df,x='weekdays_weekend',y='Rented_Bike_Count',ax=ax,capsize=.2)
ax.set(title='Count of Rented bikes acording to weekdays_weekenday ')

#anlysis of data by vizualisation
fig,ax=plt.subplots(figsize=(20,8))
sns.pointplot(data=df,x='Hour',y='Rented_Bike_Count',hue='weekdays_weekend',ax=ax)
ax.set(title='Count of Rented bikes acording to weekdays_weekend ')

"""<font color='Blue'>***From the above point plot and bar plot we can say that in the week days which represent in blue colur shows that the demand of the bike higher because of the office.***

<font color='Blue'>***Peak Time are 7 am to 9 am and 5 pm to 7 pm***

<font color='Blue'>***The orange colur represent the weekend days, and it show that the demand of rented bikes are very low specially in the morning hour but when the evening start from 4 pm to 8 pm the demand slightly increases.***   
"""

