In [1]:
# importing dependencies

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

import warnings
warnings.filterwarnings('ignore')

### 1. Model building

In [2]:
def leaky_relu(X):
    '''
    function to pass object through leaky_relu
    '''
    for i in range(X.shape[0]):
        for j in range(X.shape[1]):
            if X[i,j]<0:
                X[i,j]=X[i,j]*0.01
    return X

def sigmoid(X):
    '''
    function to pass object through sigmoid
    '''
    return 1/(1+np.exp(-X))

In [3]:
class nn():
    '''
    Neural Network (NN) class
    arguments:
        arch_nn = architecture of NN in form of list with 
                    elements of list are number of neurons in different layer
    '''
    
    def __init__(self, arch_nn=[2,3,1]):
        '''
        constructor of nn class
        '''
        self.arch_nn = arch_nn
        self.W = self.init_W(arch_nn)
        self.b = self.init_b(arch_nn)
        self.z = self.init_z(arch_nn)
        self.a = self.init_a(arch_nn)
        self.W_grad = self.init_W(arch_nn)
        self.b_grad = self.init_b(arch_nn)
        self.z_grad = self.init_z(arch_nn)
        self.a_grad = self.init_a(arch_nn)
        
        
    def init_W(self, arch_nn):  
        '''
        random initialisation of different weight matrix (W)
        size(W[layer]) = [no. of neurons in (layer) X no. of neurons in (layer-1)]
        '''
        W = dict()
        for i in range(len(arch_nn)-1):
            W[i+1] = np.random.rand(arch_nn[i+1],arch_nn[i])
        return W
    
    def init_b(self, arch_nn):
        '''
        random initialisation of different bias vector (b)
        size(b[layer]) = [no. of neurons in (layer) X 1]
        '''
        b = dict()
        for i in range(1, len(arch_nn)):
            b[i] = np.random.rand(arch_nn[i],1)
        return b
    
    def init_z(self, arch_nn):
        '''
        random initialisation of different input vector (z) to NN layers
        size(z[layer]) = [no. of neurons in (layer) X 1]
        '''
        z = dict()
        for i in range(1, len(arch_nn)):
            z[i] = np.random.rand(arch_nn[i],1)
        return z
    
    def init_a(self, arch_nn):
        '''
        random initialisation of different output vector (a) from NN layers
        size(a[layer]) = [no. of neurons in (layer) X 1]
        '''
        a = dict()
        for i in range(1,len(self.arch_nn)):
            a[i] = np.random.rand(arch_nn[i],1)
        return a
    
    def feed_forward(self, data_):
        '''
        function to feedforward the input data and calculate different a & z vectors with 
        the help of W, b & input data
        argument:
            data_ = input data vector
        '''
        for i in range(1,len(self.arch_nn)):
            self.z[i] = self.W[i]@data_ + self.b[i]
    
            if i<(len(self.arch_nn)-1):
                self.a[i] = leaky_relu(self.z[i]) 
            else:
                self.a[i] = self.z[i]
            data_ = self.a[i]
            
                
    def back_prop(self, y_, data_, lr):
        '''
        function to calculate the gradient w.r.t. true_value and back propagate the same
        through the entire NN and eventually update the weights, biases of NN
        argument:
            y_ = ground through value
            data_ = input data vector
            lr = learning rate for gradient update
        '''
        # assigning true label & predicted value
        yt = y_
        pred = self.a[len(self.arch_nn)-1]
       
        # calculating the gradient
        grad = (pred - yt)          
        self.a_grad[len(self.arch_nn)-1] = grad
        self.z_grad[len(self.arch_nn)-1] = grad
        
        # calculating the gradient of loss w.r.t. W
        self.W_grad[len(self.arch_nn)-1] = grad*self.a[len(self.arch_nn)-2].T
        
        # calculating the gradient of loss w.r.r. b
        self.b_grad[len(self.arch_nn)-1] = grad   
        
        # looping through all the layers and finding the gradient of loss w.r.t. different network parameters
        for i in reversed(range(1,len(self.arch_nn)-1)):
            
            self.a_grad[i] = self.W[i+1].T@grad  
            
            self.z_grad[i] = self.a_grad[i]*np.array([1 if x>=0 
                                                      else 0.01 
                                                      for row in self.z[i] 
                                                      for x in row]).reshape(self.z[i].shape[0],
                                                                             self.z[i].shape[1])
                                                                          
            if i>1:
                self.W_grad[i] = self.z_grad[i]*self.a[i-1].T   
            else:           
                self.W_grad[i] = self.z_grad[i]*data_.T
                
            self.b_grad[i] = self.z_grad[i]
            
            grad = self.z_grad[i]
        
        # update the network parameters with the gradient descent algorithm
        for i in range(1,len(self.arch_nn)): 
            self.W[i] = self.W[i] - (lr*self.W_grad[i])
            self.b[i] = self.b[i] - (lr*self.b_grad[i])
     
    
    def predict(self, data_point):
        '''
        function to predict the value utilising the network paraments and 
        input
        argument:
            data_point = input data point
        return:
            a[last_layer] = prediction
        '''
        dp = data_point
        for i in range(1,len(self.arch_nn)):
            self.z[i] = self.W[i]@dp + self.b[i]
            if i<(len(self.arch_nn)-1):
                self.a[i] = leaky_relu(self.z[i])
            else:
                self.a[i] = self.z[i]
                return self.a[i]
            dp = self.a[i]      

In [4]:
def main(data=np.random.rand(3,3), y=np.random.rand(1,3), data_test=np.random.rand(3,3), 
         y_tst=np.random.rand(1,3), dim=[3,2,3,1], epochs=10):
    '''
    function to initialise the Neural Network class object and get the prediction
    argument:
        data = train dataset
        y = train ground truth
        data_test = test datset
        y_tst = test ground truth
        dim = list defining network architecture
        epochs = no. of epochs to train the NN
    return:
        act = test ground truth
        pre = predicted values
    '''
        
    # initialize the nn object
    model = nn(arch_nn=dim)
    
    for epoch in range(epochs):
        
        print('Epoch====>',epoch+1)
        
        for i in range(data.shape[1]):
            model.feed_forward(data[:,i].reshape(-1,1))
            model.back_prop(y[:,i].reshape(-1,1), data[:,i].reshape(-1,1), lr=0.01)

    pred_ = []
    for i in range(data_test.shape[1]):
        pred_.append(model.predict(data_test[:,i].reshape(-1,1))[0][0])
        
    act, pre = (y_tst).tolist()[0], pred_
    return act, pre 

### 2. Data preparation

In [5]:
# importing the data
df = pd.read_csv('/home/kevin/Downloads/bike data.csv')
df.head()

Unnamed: 0,Date,Season,Hour,Holiday,Day of the Week,Working Day,Weather Type,Temperature F,Temperature Feels F,Humidity,Wind Speed,Casual Users,Registered Users,Total Users
0,1/1/2011,4,0,0,6,0,1,36.6,37.4,81,0,3,13,16
1,1/1/2011,4,1,0,6,0,1,34.9,35.6,80,0,8,32,40
2,1/1/2011,4,2,0,6,0,1,34.9,35.6,80,0,5,27,32
3,1/1/2011,4,3,0,6,0,1,36.6,37.4,75,0,3,10,13
4,1/1/2011,4,4,0,6,0,1,36.6,37.4,75,0,0,1,1


In [6]:
df[['Temperature Feels F','Humidity','Wind Speed','Registered Users']].describe()

Unnamed: 0,Temperature Feels F,Humidity,Wind Speed,Registered Users
count,17379.0,17379.0,17379.0,17379.0
mean,59.722009,62.722884,12.736233,153.786869
std,20.415345,19.292983,8.196891,151.357286
min,3.2,0.0,0.0,0.0
25%,42.8,48.0,7.0,34.0
50%,60.8,63.0,13.0,115.0
75%,77.0,78.0,17.0,220.0
max,122.0,100.0,57.0,886.0


In [7]:
X = df[['Temperature Feels F','Humidity','Wind Speed']]

In [8]:
y = df['Registered Users']

In [9]:
# min-max scalling the data

X = (X-X.min())/(X.max()-X.min())
y= (y-y.min())/(y.max()-y.min())

In [10]:
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.2)

### 3. Prediction and evaluation

In [11]:
act, pred = main(data=X_train.to_numpy().T, 
     y=y_train.to_numpy().T.reshape(1,-1),
     data_test = X_test.to_numpy().T,
     y_tst=y_test.to_numpy().T.reshape(1,-1),
     dim=[3,2,1],
     epochs=10)

Epoch====> 1
Epoch====> 2
Epoch====> 3
Epoch====> 4
Epoch====> 5
Epoch====> 6
Epoch====> 7
Epoch====> 8
Epoch====> 9
Epoch====> 10


In [12]:
r2_score(act,pred)

0.18135031652768008

### 4. Comparing the obtained results with Linear Regression from SKLearn

In [13]:
from sklearn.linear_model import LinearRegression

lrm = LinearRegression()

lrm.fit(X_train, y_train)

r2_score(y_test, lrm.predict(X_test))

0.18287537080065297