<a href="https://kritikseth.github.io/ipynbtagredirect" target="_parent"><img src="https://raw.githack.com/kritikseth/kritikseth/master/assets/icons/kritik_ipynbtagredirect.svg" alt="Kritik Seth"/></a>

If you are viewing this in GitHub and cannot see the plots, [Click Here](https://nbviewer.jupyter.org/github/kritikseth/ML-College-Sem-V/blob/master/ML_11_J043_051020.ipynb)

[Click Here](https://github.com/kritikseth/ML-College-Sem-V/blob/master/ML_11_J043_051020.ipynb) to go back to GitHub

In [1]:
import os
import datetime

import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

import sklearn
from sklearn.metrics import r2_score
from sklearn import datasets
from sklearn.preprocessing import StandardScaler

In [2]:
# !wget -O bike.csv https://www.openml.org/data/get_csv/22044628/dataset
# Bike Sharing Demand - 42713

bike = sklearn.datasets.fetch_openml(data_id = 42713)

In [3]:
bike.details

{'citation': 'Fanaee-T, Hadi, and Gama, Joao, Event labeling combining ensemble detectors and background knowledge, Progress in Artificial Intelligence (2013): pp. 1-15',
 'collection_date': '01-01-2013',
 'creator': 'Hadi Fanaee-T and Joao Gama',
 'default_target_attribute': 'count',
 'file_id': '22044628',
 'format': 'arff',
 'id': '42713',
 'ignore_attribute': ['casual', 'registered'],
 'language': 'English',
 'licence': 'Public',
 'md5_checksum': '843b70f38ceb4a0d868937f5e605836a',
 'name': 'Bike_Sharing_Demand',
 'original_data_url': 'http://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset',
 'processing_date': '2020-10-17 03:24:03',
 'status': 'active',
 'upload_date': '2020-10-17T03:23:51',
 'url': 'https://www.openml.org/data/v1/download/22044628/Bike_Sharing_Demand.arff',
 'version': '3',
 'version_label': '1',
 'visibility': 'public'}

In [4]:
X = pd.DataFrame(bike.data)
Y = pd.DataFrame({'count': bike.target})

X[0] = X[0].astype('int16')
X[1] = X[1].astype('int32')
X[2] = X[2].astype('int64')
X[3] = X[3].astype('int32')
X[4] = X[4].astype('int8')
X[5] = X[5].astype('int8')
X[6] = X[6].astype('int8')

In [5]:
X.dtypes

0       int16
1       int32
2       int64
3       int32
4        int8
5        int8
6        int8
7     float64
8     float64
9     float64
10    float64
11    float64
dtype: object

In [6]:
X.shape, Y.shape

((17379, 12), (17379, 1))

In [7]:
cols = {0: 'season', 1: 'year', 2: 'month', 3: 'hour', 4: 'holiday', 5: 'weekday', 6: 'workingday', 10: 'humdity'}

In [8]:
X = X.rename(columns=cols)

In [9]:
X.head()

Unnamed: 0,season,year,month,hour,holiday,weekday,workingday,7,8,9,humdity,11
0,1,2011,1,0,0,6,0,0.0,9.84,14.395,0.81,0.0
1,1,2011,1,1,0,6,0,0.0,9.02,13.635,0.8,0.0
2,1,2011,1,2,0,6,0,0.0,9.02,13.635,0.8,0.0
3,1,2011,1,3,0,6,0,0.0,9.84,14.395,0.75,0.0
4,1,2011,1,4,0,6,0,0.0,9.84,14.395,0.75,0.0


In [10]:
X['timestamp'] = X['year'].astype('str') + '/' + X['month'].astype('str')  + ' ' + X['hour'].astype('str')
# X.drop(['year', 'month', 'hour'], axis=1, inplace=True)

date_time = pd.to_datetime(X.pop('timestamp'), format='%Y/%m %H')
date_time = date_time.dt.strftime("%Y%m%d").astype(int)
# timestamp_s = date_time.map(datetime.datetime.timestamp

X['timestamp'] = date_time

In [11]:
X.head()

Unnamed: 0,season,year,month,hour,holiday,weekday,workingday,7,8,9,humdity,11,timestamp
0,1,2011,1,0,0,6,0,0.0,9.84,14.395,0.81,0.0,20110101
1,1,2011,1,1,0,6,0,0.0,9.02,13.635,0.8,0.0,20110101
2,1,2011,1,2,0,6,0,0.0,9.02,13.635,0.8,0.0,20110101
3,1,2011,1,3,0,6,0,0.0,9.84,14.395,0.75,0.0,20110101
4,1,2011,1,4,0,6,0,0.0,9.84,14.395,0.75,0.0,20110101


In [12]:
df = X.copy()
df['count'] = Y

In [13]:
scaler = StandardScaler()

# cols = ['year', 8, 9, 'count']

# for col in cols:
#     df[col] = scaler.fit_transform(df[[col]])

columns = df.columns
df = pd.DataFrame(scaler.fit_transform(df), columns=columns)

In [14]:
df.head()

Unnamed: 0,season,year,month,hour,holiday,weekday,workingday,7,8,9,humdity,11,timestamp,count
0,-0.430424,-1.005134,-1.610438,-1.670004,-0.172112,1.493891,-1.4669,-0.703784,-1.334648,-1.093281,0.947372,-1.553889,-1.11406,-0.956339
1,-0.430424,-1.005134,-1.610438,-1.525374,-0.172112,1.493891,-1.4669,-0.703784,-1.438516,-1.181732,0.895539,-1.553889,-1.11406,-0.824022
2,-0.430424,-1.005134,-1.610438,-1.380744,-0.172112,1.493891,-1.4669,-0.703784,-1.438516,-1.181732,0.895539,-1.553889,-1.11406,-0.868128
3,-0.430424,-1.005134,-1.610438,-1.236115,-0.172112,1.493891,-1.4669,-0.703784,-1.334648,-1.093281,0.63637,-1.553889,-1.11406,-0.972879
4,-0.430424,-1.005134,-1.610438,-1.091485,-0.172112,1.493891,-1.4669,-0.703784,-1.334648,-1.093281,0.63637,-1.553889,-1.11406,-1.039037


In [15]:
df = pd.get_dummies(df, columns=['season', 'month', 'hour', 'weekday']) # one hot encoding the categorical variables

In [16]:
corr = df.corr() # getting correlation between the variables

sns_colorscale = [[0.0, '#3f7f93'], [0.071, '#5890a1'], [0.143, '#72a1b0'], [0.214, '#8cb3bf'], [0.286, '#a7c5cf'], 
                  [0.357, '#c0d6dd'], [0.429, '#dae8ec'], [0.5, '#f2f2f2'], [0.571, '#f7d7d9'], [0.643, '#f2bcc0'],
                  [0.714, '#eda3a9'], [0.786, '#e8888f'], [0.857, '#e36e76'], [0.929, '#de535e'], [1.0, '#d93a46']] # colorscale used from seaborn library

In [17]:
fig = go.Figure(data=go.Heatmap(z=corr, x=corr.columns, y=corr.columns,
                                xgap=1, ygap=1, colorscale=sns_colorscale),
                layout=go.Layout(title_text='Correlation Plot', height=1000,
                                 yaxis_autorange='reversed'))

fig.show()

In [18]:
df.drop(['workingday', 11], axis=1, inplace=True)

In [19]:
column_indices = {name: i for i, name in enumerate(df.columns)}

n = len(df)
train = df[0:int(n*0.7)]
val = df[int(n*0.7):int(n*0.9)]
test = df[int(n*0.9):]

num_features = df.shape[1]

In [20]:
X_train, y_train = train.drop(['count'], axis=1), train['count']
X_val, y_val = val.drop(['count'], axis=1), val['count']
X_test, y_test = test.drop(['count'], axis=1), test['count']

In [21]:
import keras
from keras.layers import Dense, Dropout
from keras.models import Sequential
import tensorflow as tf

In [22]:
linear = Sequential()

linear.add(Dense(units=64, activation='relu', input_shape=X_train.shape))
linear.add(Dense(units=128, activation='relu'))
linear.add(Dropout(0.3))
linear.add(Dense(units=32, activation='relu'))
linear.add(Dense(units=1, activation='linear'))

linear.compile(loss='mse', optimizer='rmsprop', metrics=[tf.metrics.MeanAbsoluteError(), keras.metrics.RootMeanSquaredError()])

In [23]:
linear.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f7616011198>

In [24]:
linear.evaluate(X_test, y_test)



[0.1997118592262268, 0.3227592408657074, 0.44689133763313293]

In [25]:
y_pred = linear.predict(X_test)
r2_score(y_test, y_pred)



0.8139464251270058