In [None]:
#@title [Candidature form]

Name = '' #@param {type: "string"}
Matriculation_number = '' #@param {type:"string"}
Faculty = ''  #@param {type: "string"}
Course = '' #@param {type:"string"}
Current_semester = "" #@param ["1", "2", "3", "4", "5", "6", "7"] {allow-input: true}

# Deep Neural Network with Backpropagation
## Machine Learning Project - 04 
This project is created and adapted as a Jupyter notebook for [**MME 26849: "Fundamentals of Deep Learning and TinyML"**](https://felix.hs-furtwangen.de/url/RepositoryEntry/4020862983) by the course instructors [Marcus Rüb](https://linkedin.com/in/marcus-rüb-3b07071b2) and [Ajay Krishna](https://linkedin.com/in/ajay-krishna-2031a5119).

## Resouces
Slide from lecture:
[04 - Neural Networks](https://github.com/r1marcus/Fundamentals-of-Deep-Learning-HFU/blob/main/Slides/04%20-%20neural%20networks/Neuralnetworks.pptx)

## Goal of the project
Implementation of a neural network with backpropagation to extract, examine, analyze, train, and predict on Wheat-Seeds dataset. 


## Import libraries 

In [15]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

## Functions: 

In [48]:
#activation function (sigmoid)
def sigmoid(x):
    return 1/(1 + np.exp(-x))

def sigmoid_prime(x):
    return sigmoid(x)*(1.0 - sigmoid(x))

In [49]:
class NeuralNetwork(object):
    
    def __init__(self, architecture):
        #architecture - numpy array with ith element representing the number of neurons in the ith layer.
        
        #Initialize the network architecture
        self.L = architecture.size - 1 #The index of the last layer L
        self.n = architecture #n stores the number of neurons in each layer
        self.input_size = self.n[0] #input_size is the number of neurons in the first layer
        self.output_size = self.n[self.L] #output_size is the number of neurons in the last layer
        
        #Parameters will store the network parameters, i.e. the weights and biases
        self.parameters = {}
        
        #Initialize the network weights and biases:
        for i in range (1, self.L + 1): 
            #Initialize weights to small random values
            self.parameters['W' + str(i)] = np.random.randn(self.n[i], self.n[i - 1]) * 0.01
            
            #Initialize rest of the parameters to 1
            self.parameters['b' + str(i)] = np.ones((self.n[i], 1))
            self.parameters['z' + str(i)] = np.ones((self.n[i], 1))
            self.parameters['a' + str(i)] = np.ones((self.n[i], 1))
        
        #As we started the loop from 1, we haven't initialized a[0]:
        self.parameters['a0'] = np.ones((self.n[i], 1))
        
        #Initialize the cost:
        self.parameters['C'] = 1
        
        #Create a dictionary for storing the derivatives:
        self.derivatives = {}
        
        #Learning rate
        self.alpha = 0.01
            
    def forward_propagate(self, X):
        #Note that X here, is just one training example
        self.parameters['a0'] = X
        
        #Calculate the activations for every layer l
        for l in range(1, self.L + 1):
            self.parameters['z' + str(l)] = np.add(np.dot(self.parameters['W' + str(l)], self.parameters['a' + str(l - 1)]), self.parameters['b' + str(l)])
            self.parameters['a' + str(l)] = sigmoid(self.parameters['z' + str(l)])
        
    def compute_cost(self, y):
        self.parameters['C'] = -(y*np.log(self.parameters['a' + str(self.L)]) + (1-y)*np.log( 1 - self.parameters['a' + str(self.L)]))
    
    def compute_derivatives(self, y):
        #Partial derivatives of the cost function with respect to z[L], W[L] and b[L]:        
        #dzL
        self.derivatives['dz' + str(self.L)] = self.parameters['a' + str(self.L)] - y
        #dWL
        self.derivatives['dW' + str(self.L)] = np.dot(self.derivatives['dz' + str(self.L)], np.transpose(self.parameters['a' + str(self.L - 1)]))
        #dbL
        self.derivatives['db' + str(self.L)] = self.derivatives['dz' + str(self.L)]

        #Partial derivatives of the cost function with respect to z[l], W[l] and b[l]
        for l in range(self.L-1, 0, -1):
            self.derivatives['dz' + str(l)] = np.dot(np.transpose(self.parameters['W' + str(l + 1)]), self.derivatives['dz' + str(l + 1)])*sigmoid_prime(self.parameters['z' + str(l)])
            self.derivatives['dW' + str(l)] = np.dot(self.derivatives['dz' + str(l)], np.transpose(self.parameters['a' + str(l - 1)]))
            self.derivatives['db' + str(l)] = self.derivatives['dz' + str(l)]
            
    def update_parameters(self):
        for l in range(1, self.L+1):
            self.parameters['W' + str(l)] -= self.alpha*self.derivatives['dW' + str(l)]
            self.parameters['b' + str(l)] -= self.alpha*self.derivatives['db' + str(l)]
        
    def predict(self, x):
        self.forward_propagate(x)
        
        #self.parameters['a0'] = X
        
        #Calculate the activations for every layer l
        #for i in range(1, self.L + 1):
         #   self.parameters['z' + str(i)] = np.add(np.dot(self.parameters['W' + str(i)], self.parameters['a' + str(i - 1)]), self.parameters['b' + str(i)])
          #  self.parameters['a' + str(i)] = sigmoid(self.parameters['z' + str(i)])

        return self.parameters['a' + str(self.L)]
        
    def fit(self, X, Y, num_iter):
        for iter in range(0, num_iter):
            c = 0
            acc = 0
            n_c = 0
            for i in range(0, X.shape[0]):
              x = X[i].reshape((X[i].size, 1))
              y = Y[i]
              self.forward_propagate(x)
              self.compute_cost(y)
              c += self.parameters['C'] 
              y_pred = self.predict(x)
              y_pred = (y_pred > 0.5)
              if y_pred == y:
                  n_c += 1
              self.compute_derivatives(y)
              self.update_parameters()
            
            c = c/X.shape[0]
            acc = (n_c/X.shape[0])*100
            print('Iteration: ', iter)
            print("Cost: ", c)
            print("Accuracy:", acc)

## Load data

The Iris data for our exercise can be found at: https://drive.google.com/file/d/1UGujQZnz65UL2GHucPaNUwRoH4y9oEdO/view?usp=sharing


Steps for loading data:

    1. Press the above link "https://drive.google.com/file/d/1UGujQZnz65UL2GHucPaNUwRoH4y9oEdO/view?usp=sharing"
    2. Unzip/extract the data to your local machine (It should be .csv file).
    3. Press the folder icon on the left in the Colab window and upload "wheat-seeds-binary.csv" from your local machine.

In [50]:
dataset = pd.read_csv('wheat-seeds-binary.csv')
dataset

Unnamed: 0,Area,Perimeter,Compactness,Length of Kernel,Width of Kernel,Asymmetry Coefficient,Length of Kernel Groove,Class
0,15.26,14.84,0.8710,5.763,3.312,2.221,5.220,1
1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,1
2,14.29,14.09,0.9050,5.291,3.337,2.699,4.825,1
3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,1
4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,1
...,...,...,...,...,...,...,...,...
135,15.38,14.66,0.8990,5.477,3.465,3.600,5.439,2
136,17.36,15.76,0.8785,6.145,3.574,3.526,5.971,2
137,15.57,15.15,0.8527,5.920,3.231,2.640,5.879,2
138,15.60,15.11,0.8580,5.832,3.286,2.725,5.752,2


Shuffling the labels:

In [35]:
shuffled_dataset = dataset.sample(frac=1).reset_index(drop=True)
shuffled_dataset['Class'] = shuffled_dataset['Class'] - 1

shuffled_dataset

Unnamed: 0,Area,Perimeter,Compactness,Length of Kernel,Width of Kernel,Asymmetry Coefficient,Length of Kernel Groove,Class
0,14.11,14.18,0.8820,5.541,3.221,2.754,5.038,0
1,13.54,13.85,0.8871,5.348,3.156,2.587,5.178,0
2,17.12,15.55,0.8892,5.850,3.566,2.858,5.746,1
3,19.57,16.74,0.8779,6.384,3.772,1.472,6.273,1
4,13.02,13.76,0.8641,5.395,3.026,3.373,4.825,0
...,...,...,...,...,...,...,...,...
135,18.95,16.42,0.8829,6.248,3.755,3.368,6.148,1
136,17.63,15.86,0.8800,6.033,3.573,3.747,5.929,1
137,14.11,14.26,0.8722,5.520,3.168,2.688,5.219,0
138,13.74,14.05,0.8744,5.482,3.114,2.932,4.825,0


In [51]:
X = shuffled_dataset.iloc[:, 0:-1].values
print(X)

[[14.11   14.18    0.882   5.541   3.221   2.754   5.038 ]
 [13.54   13.85    0.8871  5.348   3.156   2.587   5.178 ]
 [17.12   15.55    0.8892  5.85    3.566   2.858   5.746 ]
 [19.57   16.74    0.8779  6.384   3.772   1.472   6.273 ]
 [13.02   13.76    0.8641  5.395   3.026   3.373   4.825 ]
 [16.87   15.65    0.8648  6.139   3.463   3.696   5.967 ]
 [20.2    16.89    0.8894  6.285   3.864   5.173   6.187 ]
 [12.74   13.67    0.8564  5.395   2.956   2.504   4.869 ]
 [15.36   14.76    0.8861  5.701   3.393   1.367   5.132 ]
 [18.89   16.23    0.9008  6.227   3.769   3.639   5.966 ]
 [14.28   14.17    0.8944  5.397   3.298   6.685   5.001 ]
 [18.76   16.2     0.8984  6.172   3.796   3.12    6.053 ]
 [14.09   14.41    0.8529  5.717   3.186   3.92    5.299 ]
 [14.16   14.4     0.8584  5.658   3.129   3.072   5.176 ]
 [17.36   15.76    0.8785  6.145   3.574   3.526   5.971 ]
 [11.42   12.86    0.8683  5.008   2.85    2.7     4.607 ]
 [20.24   16.91    0.8897  6.315   3.962   5.901   6.188

In [52]:
y = shuffled_dataset.iloc[:, -1].values
print(y)

[0 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 1 1 0 0 0 1 1 1 0 0 1 1 1 0 1 0 0 0 1 0 1
 0 1 0 0 1 1 0 0 0 0 1 1 0 0 1 0 1 0 1 0 1 1 1 1 0 1 0 1 1 1 1 1 0 1 0 1 1
 1 0 0 0 1 0 1 0 1 1 0 1 1 1 0 1 0 0 1 1 0 1 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0
 1 1 0 0 1 1 0 1 0 1 1 1 0 0 0 1 1 1 0 0 0 0 0 1 1 1 0 0 1]


Normalization:

In [53]:
sc_X = StandardScaler()
X = sc_X.fit_transform(X)

print(X)

[[-9.27669545e-01 -9.45141858e-01  1.30405478e-02 -7.08250331e-01
  -8.52122634e-01 -3.17251824e-01 -9.68330367e-01]
 [-1.16538840e+00 -1.24649144e+00  3.35218788e-01 -1.18445906e+00
  -1.08288524e+00 -4.49012248e-01 -7.05557787e-01]
 [ 3.27652848e-01  3.05915481e-01  4.67880416e-01  5.41771144e-02
   3.72694294e-01 -2.35197428e-01  3.60548113e-01]
 [ 1.34942689e+00  1.39260032e+00 -2.45965488e-01  1.37177018e+00
   1.10403426e+00 -1.32873005e+00  1.34969919e+00]
 [-1.38225473e+00 -1.32867768e+00 -1.11774190e+00 -1.06849113e+00
  -1.54441046e+00  1.71129627e-01 -1.36812008e+00]
 [ 2.23390190e-01  3.97233534e-01 -1.07352136e+00  7.67256505e-01
   7.02431240e-03  4.25971644e-01  7.75353402e-01]
 [ 1.61216878e+00  1.52957740e+00  4.80514857e-01  1.12749730e+00
   1.43065210e+00  1.59130186e+00  1.18828174e+00]
 [-1.49902891e+00 -1.41086393e+00 -1.60416787e+00 -1.06849113e+00
  -1.79292404e+00 -5.14497967e-01 -1.28553441e+00]
 [-4.06356259e-01 -4.15497145e-01  2.72046584e-01 -3.13465893e-0

In [54]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

In [55]:
architecture = np.array([7, 2, 1])

In [56]:
classifier = NeuralNetwork(architecture)

Training:

In [60]:
classifier.fit(X_train, y_train, 10)

Iteration:  0
Cost:  [[0.58094787]]
Accuracy: 84.6938775510204
Iteration:  1
Cost:  [[0.56044304]]
Accuracy: 86.73469387755102
Iteration:  2
Cost:  [[0.53968355]]
Accuracy: 86.73469387755102
Iteration:  3
Cost:  [[0.51912502]]
Accuracy: 86.73469387755102
Iteration:  4
Cost:  [[0.49911184]]
Accuracy: 88.77551020408163
Iteration:  5
Cost:  [[0.47987608]]
Accuracy: 88.77551020408163
Iteration:  6
Cost:  [[0.4615548]]
Accuracy: 89.79591836734694
Iteration:  7
Cost:  [[0.44421334]]
Accuracy: 90.81632653061224
Iteration:  8
Cost:  [[0.42786712]]
Accuracy: 90.81632653061224
Iteration:  9
Cost:  [[0.41249922]]
Accuracy: 90.81632653061224


Testing:

In [61]:
acc = 0
n_c = 0
for i in range(0, X_test.shape[0]):
  x = X_test[i].reshape((X_test[i].size, 1))
  y = y_test[i]
  y_pred = classifier.predict(x)
  y_pred = (y_pred > 0.5)
  #print('Expected: %d Got: %d' %(y, y_pred))
  if y_pred == y:
      n_c += 1

acc = (n_c/X_test.shape[0])*100
print("Test Accuracy", acc)

Test Accuracy 95.23809523809523


## Questionnaire: 

In [None]:
#@markdown 1. What is the objective of backpropagation algorithm?
#@markdown - A) to develop learning algorithm for multilayer feedforward neural network
#@markdown - B) to develop learning algorithm for single layer feedforward neural network
#@markdown - C) to develop learning algorithm for multilayer feedforward neural network, so that network can be trained to capture the mapping implicitly
Ans = "" #@param ["A", "B", "C"] {allow-input: true}

In [None]:
#@markdown 2. The backpropagation law is also known as generalized delta rule, is it true?
#@markdown - A) Yes
#@markdown - B) No
Ans = "" #@param ["A", "B"] {allow-input: true}

In [None]:
#@markdown 3. There is feedback in final stage of backpropagation algorithm?
#@markdown - A) Yes
#@markdown - B) No
Ans = "" #@param ["A", "B"] {allow-input: true}

In [None]:
#@markdown 4. What are the general tasks that are performed with backpropagation algorithm?
#@markdown - A) Pattern mapping
#@markdown - B) Function approximation
#@markdown - C) Prediction
#@markdown - D) All the above 
Ans = "" #@param ["A", "B", "C", "D"] {allow-input: true}

In [None]:
#@markdown 5. Does backpropagaion learning is based on gradient descent along error surface?
#@markdown - A) Yes
#@markdown - B) No
#@markdown - C) it depends on gradient descent but not error surface
Ans = "" #@param ["A", "B", "C"] {allow-input: true}