## Bike sharing Analysis with Machine Learning

In [None]:
# Install the ucimlrepo package to easily import dataset
!pip install ucimlrepo 

In [35]:
# Import packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Machine Learning
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, r2_score

### 1. Load Data

In [5]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
hourly_dataset  = fetch_ucirepo(id=275)
# access data (as pandas dataframes)
features = hourly_dataset.data.features

array([16, 40, 32, ..., 90, 61, 49])

In [6]:
# Accessing the target feature

target = hourly_dataset.data.targets
target = target['cnt'].values
target

array([16, 40, 32, ..., 90, 61, 49])

In [7]:
# Overview
full_data = hourly_dataset.data.features

In [8]:
# An overview of the data
features.head()

Unnamed: 0,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed
0,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0
1,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0
2,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0
3,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0
4,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0


In [10]:
features.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
season,17379.0,2.50164,1.106918,1.0,2.0,3.0,3.0,4.0
yr,17379.0,0.502561,0.500008,0.0,0.0,1.0,1.0,1.0
mnth,17379.0,6.537775,3.438776,1.0,4.0,7.0,10.0,12.0
hr,17379.0,11.546752,6.914405,0.0,6.0,12.0,18.0,23.0
holiday,17379.0,0.02877,0.167165,0.0,0.0,0.0,0.0,1.0
weekday,17379.0,3.003683,2.005771,0.0,1.0,3.0,5.0,6.0
workingday,17379.0,0.682721,0.465431,0.0,0.0,1.0,1.0,1.0
weathersit,17379.0,1.425283,0.639357,1.0,1.0,1.0,2.0,4.0
temp,17379.0,0.496987,0.192556,0.02,0.34,0.5,0.66,1.0
atemp,17379.0,0.475775,0.17185,0.0,0.3333,0.4848,0.6212,1.0


In [15]:
# Drop the date and instant columns
features = features.drop(['dteday'], axis=1)

## 3. Model building

In [28]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42)


In [38]:
# Linear Regression
lr = LinearRegression(n_jobs=1)

In [39]:
# Train model
lr.fit(X_train, y_train)

In [40]:
# Make predictions
predictions_lr = lr.predict(X_test)

In [41]:
# Evaluation metrics
mae_lr = mean_absolute_error(y_test, predictions_lr)
r2_lr = r2_score(y_test, predictions_lr)

In [42]:
print(mae)
print(r2)

104.85283610636294
0.38716067385039765


In [37]:
# Decision tree
dt = DecisionTreeRegressor()

In [43]:
# Train model
dt.fit(X_train, y_train)

In [52]:
# Make predictions
train_prediction_dt = dt.predict(X_train)
test_predictions_dt = dt.predict(X_test)

In [54]:
# Evaluation metrics
mae_dt = mean_absolute_error(y_test, predictions_dt)
train_mae_dt = mean_absolute_error(y_train, train_prediction_dt)
r2_dt = r2_score(y_test, predictions_dt)
train_r2_dt = r2_score(y_train, train_prediction_dt)

In [61]:
print(train_mae_dt)
print(mae_dt)
print(train_r2_dt)
print(r2_dt)

0.010933004521167283
35.508630609896436
0.9999936535321521
0.8787610558176839
