In [6]:
import pandas as pd

In [7]:
data = pd.read_csv('../dataset/SeoulBikeData.csv', encoding='Windows-1252')

In [8]:
data.head()

Unnamed: 0,Date,Rented Bike Count,Hour,Temperature(°C),Humidity(%),Wind speed (m/s),Visibility (10m),Dew point temperature(°C),Solar Radiation (MJ/m2),Rainfall(mm),Snowfall (cm),Seasons,Holiday,Functioning Day
0,01/12/2017,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
1,01/12/2017,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
2,01/12/2017,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,Winter,No Holiday,Yes
3,01/12/2017,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
4,01/12/2017,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,Winter,No Holiday,Yes


In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8760 entries, 0 to 8759
Data columns (total 14 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Date                       8760 non-null   object 
 1   Rented Bike Count          8760 non-null   int64  
 2   Hour                       8760 non-null   int64  
 3   Temperature(°C)            8760 non-null   float64
 4   Humidity(%)                8760 non-null   int64  
 5   Wind speed (m/s)           8760 non-null   float64
 6   Visibility (10m)           8760 non-null   int64  
 7   Dew point temperature(°C)  8760 non-null   float64
 8   Solar Radiation (MJ/m2)    8760 non-null   float64
 9   Rainfall(mm)               8760 non-null   float64
 10  Snowfall (cm)              8760 non-null   float64
 11  Seasons                    8760 non-null   object 
 12  Holiday                    8760 non-null   object 
 13  Functioning Day            8760 non-null   objec

In [None]:
# Create a copy of data

df = data.copy()

In [None]:
df.describe().T 

In [None]:
# Are there duplicates
len(df[df.duplicated()])

In [None]:
# Checking distribution of bike rentals

sns.displot(df['Rented Bike Count'])

In [None]:
# Creating additional features such as Week day and Month as bike rental may also depend on these spatial features

df['Date'] = pd.to_datetime(df['Date'], format="%d/%m/%Y") 
df['month'] = df['Date'].dt.month_name()
df['Weekday'] = df['Date'].dt.day_name()
df.columns
df.dtypes

# Order months in the right order
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
#months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
df['month'] = pd.Categorical(df['month'], categories=months, ordered=True)
#df.sort_values(by='Date',inplace=True)

# Order week days in the right order
cats = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df['Weekday'] = pd.Categorical(df['Weekday'], categories=cats, ordered=True)
#df.sort_values(by='Date',inplace=True)

In [None]:
df.head(3)

In [None]:
# Hitmap of bike rentals by Month and weekday
# ============================================================================================
df_pivot = df.groupby(['month','Weekday'])['Rented Bike Count'].sum().reset_index()
df_pivot

month_pivot=df_pivot.pivot_table(values='Rented Bike Count',index='Weekday',columns='month')

sns.set_theme(rc={'figure.figsize':(8,4)})

sns.heatmap(month_pivot, 
            cmap='Blues',
            linecolor='white',
            linewidth=0.5)

plt.ylabel("Week Day")
plt.xlabel("Month")

plt.show()

In [None]:
# Selecting only numeric columns - drops of Seasons, Holiday Function Day Weekda and Month

df_numeric = df.select_dtypes(include=np.number)
sns.heatmap(df_numeric.corr(), annot=True)

In [None]:
# Bike renatls by Month
#===================================

df.groupby('month')['Rented Bike Count'].sum().plot(kind='bar') 
#sns.barplot(x="month", y="Rented Bike Count", data=df)
plt.title("Number of Bike rentals per month")
plt.ylabel("Rentals")
plt.xlabel("Number of Bike rentals per month")

sns.despine(left=False, bottom=False)
plt.show()


In [None]:
# Bike renatls by Time of day (hr)
#===================================
df.groupby('Hour')['Rented Bike Count'].sum().plot(kind='bar') 
plt.title("Number of Bike rentals per hour")
plt.ylabel("Rentals")
plt.xlabel("Time in hours")

sns.despine(left=False, bottom=False)
plt.show()

In [None]:
# Bike renatls by Temperature 
#===================================

df.groupby('Temperature(°C)')['Rented Bike Count'].sum().plot() 
plt.title("Temperature")
plt.ylabel("Rentals")
plt.xlabel("Temperature(°C)")

sns.despine(left=False, bottom=False)
plt.show()

In [None]:
# Bike renatls by Humidity
#===================================

df.groupby('Humidity(%)')['Rented Bike Count'].sum().plot() 
plt.title("Humidity")
plt.ylabel("Rentals")
plt.xlabel("Humidity(%)")

sns.despine(left=False, bottom=False)
plt.show()

In [None]:
# Bike renatls by Dew point temperature
#===================================

df.groupby('Dew point temperature(°C)')['Rented Bike Count'].sum().plot() 
plt.title("Dew point temperature(°C)")
plt.ylabel("Rentals")
plt.xlabel("Dew point temperature(°C)")

sns.despine(left=False, bottom=False)
plt.show()

In [None]:
# Bike renatls by Wind speed
#===================================

df.groupby('Wind speed (m/s)')['Rented Bike Count'].sum().plot() 
plt.title("Wind speed")
plt.ylabel("Rentals")
plt.xlabel("Wind speed (m/s)")

sns.despine(left=False, bottom=False)
plt.show()

In [None]:
# What are bike rentals by different Spatial features? 

df.groupby('Holiday')['Rented Bike Count'].sum().sort_values(ascending = False).reset_index()


In [None]:
Season_analysis = df.groupby('Seasons')['Rented Bike Count'].sum().sort_values(ascending = False).reset_index()

In [None]:
# Set the theme to white
sns.set_theme(style="white")
sns.set_theme(rc={'figure.figsize':(8,4)})


sns.set_theme(rc={'figure.figsize':(8,4)},palette='Blues_d')
sns.set_style(style='white')

sns.barplot(x='Seasons', y='Rented Bike Count', data=Season_analysis, 
             palette='rainbow',
            hue ="Seasons")

plt.title("Bike Rentals by Season")
plt.ylabel("Number of Rented Bike")
plt.xlabel("Seasons")

# Remove Top and Right borders
sns.despine(left=False, bottom=False)

plt.show()