### **Task 1: Building a three-layer feed forward neural network from scratch by initializing random weights and biases.**

### Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [None]:
data = pd.read_csv("iris.csv")

In [None]:
data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [None]:
data.shape

(150, 6)

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             150 non-null    int64  
 1   SepalLengthCm  150 non-null    float64
 2   SepalWidthCm   150 non-null    float64
 3   PetalLengthCm  150 non-null    float64
 4   PetalWidthCm   150 non-null    float64
 5   Species        150 non-null    object 
dtypes: float64(4), int64(1), object(1)
memory usage: 7.2+ KB


In [None]:
data.describe()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0,150.0
mean,75.5,5.843333,3.054,3.758667,1.198667
std,43.445368,0.828066,0.433594,1.76442,0.763161
min,1.0,4.3,2.0,1.0,0.1
25%,38.25,5.1,2.8,1.6,0.3
50%,75.5,5.8,3.0,4.35,1.3
75%,112.75,6.4,3.3,5.1,1.8
max,150.0,7.9,4.4,6.9,2.5


In [None]:
data.Species.value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: Species, dtype: int64

In [None]:
data.Species.unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

### One Hot Encoding on Target Variable

In [None]:
# to convert categorical to dummy representation
Species = pd.get_dummies(data["Species"])

In [None]:
Species.head(5)

Unnamed: 0,Iris-setosa,Iris-versicolor,Iris-virginica
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0


In [None]:
# dropping species column from og dataset
data.drop(["Species", "Id"], axis=1, inplace=True)

In [None]:
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [None]:
std_scaler = StandardScaler()

In [None]:
data.shape

(150, 4)

In [None]:
data_scaled = std_scaler.fit_transform(data)

In [None]:
data_scaled.shape

(150, 4)

In [None]:
data_transformed = pd.DataFrame(data_scaled, columns=["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"])

In [None]:
iris_df = pd.concat([data_transformed, Species], axis=1)

In [None]:
iris_df.shape

(150, 7)

initializing weights and biases

In [None]:
np.random.seed(42)

w_i_h1 = np.random.rand(4,1)
w_i_h2 = np.random.rand(4,1)
w_h_o1 = np.random.rand(2,1)
w_h_o2 = np.random.rand(2,1)
w_h_o3 = np.random.rand(2,1)

bias1 = np.random.rand(1)
bias2 = np.random.rand(1)

In [None]:
print(w_i_h1,"\n")
print(w_i_h2,"\n")
print(w_h_o1,"\n")
print(w_h_o2,"\n")
print(w_h_o3,"\n")
print(bias1,"\n")
print(bias2)

[[0.37454012]
 [0.95071431]
 [0.73199394]
 [0.59865848]] 

[[0.15601864]
 [0.15599452]
 [0.05808361]
 [0.86617615]] 

[[0.60111501]
 [0.70807258]] 

[[0.02058449]
 [0.96990985]] 

[[0.83244264]
 [0.21233911]] 

[0.18182497] 

[0.18340451]


function for sigmoid

In [None]:
def sigmoid(x):
  return 1/(1+np.exp(-x))

In [None]:
# input = data
input = iris_df.iloc[:, :4]

In [None]:
print(np.dot(input, w_i_h1))

[[-1.12397760e+00]
 [-2.31473260e+00]
 [-2.00712619e+00]
 [-2.18925583e+00]
 [-9.49361715e-01]
 [ 1.74452398e-01]
 [-1.49217944e+00]
 [-1.34773258e+00]
 [-2.76164185e+00]
 [-2.13181655e+00]
 [-5.06209341e-01]
 [-1.39687169e+00]
 [-2.43882224e+00]
 [-2.79060943e+00]
 [ 2.10437850e-01]
 [ 1.32733893e+00]
 [ 7.95099471e-03]
 [-1.04527019e+00]
 [ 1.18935653e-02]
 [-3.43650508e-01]
 [-1.08295297e+00]
 [-4.84941208e-01]
 [-1.29739203e+00]
 [-1.20297554e+00]
 [-1.27199564e+00]
 [-2.18609967e+00]
 [-1.14869241e+00]
 [-1.03697002e+00]
 [-1.29859348e+00]
 [-1.88225014e+00]
 [-2.05686602e+00]
 [-1.00878885e+00]
 [ 2.04311231e-01]
 [ 5.97538085e-01]
 [-2.13181655e+00]
 [-1.91260485e+00]
 [-9.84074033e-01]
 [-2.13181655e+00]
 [-2.58326909e+00]
 [-1.30235035e+00]
 [-1.13227777e+00]
 [-3.99916622e+00]
 [-2.14327288e+00]
 [-7.71279487e-01]
 [-9.84416957e-02]
 [-2.28140742e+00]
 [-3.80732566e-01]
 [-2.01088307e+00]
 [-5.51591569e-01]
 [-1.60935604e+00]
 [ 1.39641590e+00]
 [ 1.11957924e+00]
 [ 1.2929936

feetforward step 1 - input

In [None]:
z2_1=np.dot(input, w_i_h1) + bias1
z2_2=np.dot(input, w_i_h1) + bias2

In [None]:
print(z2_1)

[[-0.94215263]
 [-2.13290763]
 [-1.82530122]
 [-2.00743086]
 [-0.76753675]
 [ 0.35627737]
 [-1.31035447]
 [-1.16590762]
 [-2.57981689]
 [-1.94999158]
 [-0.32438437]
 [-1.21504672]
 [-2.25699727]
 [-2.60878447]
 [ 0.39226282]
 [ 1.5091639 ]
 [ 0.18977596]
 [-0.86344522]
 [ 0.19371853]
 [-0.16182554]
 [-0.901128  ]
 [-0.30311624]
 [-1.11556707]
 [-1.02115057]
 [-1.09017067]
 [-2.0042747 ]
 [-0.96686745]
 [-0.85514505]
 [-1.11676851]
 [-1.70042517]
 [-1.87504105]
 [-0.82696388]
 [ 0.3861362 ]
 [ 0.77936305]
 [-1.94999158]
 [-1.73077989]
 [-0.80224907]
 [-1.94999158]
 [-2.40144413]
 [-1.12052539]
 [-0.9504528 ]
 [-3.81734126]
 [-1.96144791]
 [-0.58945452]
 [ 0.08338327]
 [-2.09958245]
 [-0.1989076 ]
 [-1.8290581 ]
 [-0.3697666 ]
 [-1.42753108]
 [ 1.57824087]
 [ 1.30140421]
 [ 1.47481864]
 [-1.45256041]
 [ 0.50841935]
 [-0.05367865]
 [ 1.6379782 ]
 [-2.03235535]
 [ 0.61638487]
 [-0.6716326 ]
 [-2.78371486]
 [ 0.50962079]
 [-1.6817696 ]
 [ 0.50980648]
 [-0.25369093]
 [ 1.09722002]
 [ 0.49835

In [None]:
z2_1.shape

(150, 1)

feetforward step 2

In [None]:
h2_1 = sigmoid(z2_1)
h2_2 = sigmoid(z2_2)

In [None]:
print(h2_1.shape)
print(h2_2.shape)

(150, 1)
(150, 1)


each output neuron gets input from both hidden

In [None]:
h2 = np.append(h2_1, h2_2, axis=1)

In [None]:
h2

array([[0.28046573, 0.2807846 ],
       [0.10593928, 0.10608898],
       [0.13879898, 0.1389879 ],
       [0.11842493, 0.11858994],
       [0.3170122 , 0.31735429],
       [0.58813899, 0.58852155],
       [0.21242753, 0.21269191],
       [0.2375955 , 0.23788174],
       [0.07044872, 0.07055223],
       [0.12455428, 0.12472661],
       [0.41960762, 0.41999234],
       [0.22880931, 0.22908815],
       [0.0947476 , 0.09488317],
       [0.0685752 , 0.06867616],
       [0.59682731, 0.59720733],
       [0.81893726, 0.81917136],
       [0.54730211, 0.54769343],
       [0.29662004, 0.2969497 ],
       [0.54827875, 0.54866992],
       [0.45963167, 0.46002401],
       [0.28881875, 0.2891433 ],
       [0.42479587, 0.42518187],
       [0.24683447, 0.24712824],
       [0.26480334, 0.26511097],
       [0.25158614, 0.25188367],
       [0.11875484, 0.11892024],
       [0.27550533, 0.27582072],
       [0.29835468, 0.29868545],
       [0.24661118, 0.24690477],
       [0.15440974, 0.15461609],
       [0.

feetforward step 3 - input from hidden layer to output

In [None]:
z3_1 = np.dot(h2, w_h_o1)
z3_2 = np.dot(h2, w_h_o2)
z3_3 = np.dot(h2, w_h_o3)

generating outputs

In [None]:
o1 = sigmoid(z3_1)
o2 = sigmoid(z3_2)
o3 = sigmoid(z3_3)

In [None]:
o1.shape

(150, 1)

In [None]:
o1

array([[0.59083252],
       [0.53464449],
       [0.54533705],
       [0.53871169],
       [0.60235101],
       [0.68357609],
       [0.5691281 ],
       [0.57719272],
       [0.5230596 ],
       [0.54070644],
       [0.63404684],
       [0.57438183],
       [0.5309948 ],
       [0.52244722],
       [0.68603088],
       [0.74503563],
       [0.67190157],
       [0.59593713],
       [0.67218336],
       [0.64611874],
       [0.59347453],
       [0.63562166],
       [0.58014305],
       [0.58586444],
       [0.58165819],
       [0.53881909],
       [0.58926106],
       [0.59648404],
       [0.58007181],
       [0.55040258],
       [0.54343969],
       [0.59835273],
       [0.68561484],
       [0.71048592],
       [0.54070644],
       [0.54913092],
       [0.60000624],
       [0.54070644],
       [0.52718063],
       [0.57984931],
       [0.59030305],
       [0.50704662],
       [0.54030175],
       [0.61474984],
       [0.66421798],
       [0.53568675],
       [0.64336116],
       [0.545

### **Task 2: Compute the total squared error.**

In [None]:
y_pred = np.concatenate((o1,o2,o3), axis=1)

In [None]:
y_pred.shape

(150, 3)

In [None]:
Species.shape

(150, 3)

In [None]:
# for regression problems:
def total_squared_error(y_true, y_pred):
  tse = np.sum((y_true - y_pred) ** 2)
  return tse

In [None]:
# for classification:
def categorical_cross_entropy_loss(y_true, y_pred):
  loss = -np.sum(y_true * np.log(y_pred)) / len(y_true)
  return loss

In [None]:
normalized_y_pred = y_pred / np.sum(y_pred, axis=1, keepdims=True)

In [None]:
print(categorical_cross_entropy_loss(Species,normalized_y_pred))

Iris-setosa        0.359118
Iris-versicolor    0.373159
Iris-virginica     0.371374
dtype: float64


### **Task 3: Change the initial weights and biases and compute the error again.**

new random values for weights and biases

In [None]:
np.random.seed(25)

w_i_h1=np.random.rand(4,1)
w_i_h2=np.random.rand(4,1)
w_h_o1=np.random.rand(2,1)
w_h_o2=np.random.rand(2,1)
w_h_o3=np.random.rand(2,1)

bias1=np.random.rand(1)
bias2=np.random.rand(1)

In [None]:
print(w_i_h1,"\n")
print(w_i_h2,"\n")
print(w_h_o1,"\n")
print(w_h_o2,"\n")
print(w_h_o3,"\n")
print(bias1,"\n")
print(bias2)

[[0.87012414]
 [0.58227693]
 [0.27883894]
 [0.18591123]] 

[[0.41110013]
 [0.11737555]
 [0.68496874]
 [0.43761106]] 

[[0.55622933]
 [0.36708032]] 

[[0.40236573]
 [0.1130407 ]] 

[[0.44703085]
 [0.58544512]] 

[0.1619851] 

[0.52071879]


In [None]:
z2_1=np.dot(input,w_i_h1)+bias1
z2_2=np.dot(input,w_i_h2)+bias2

In [None]:
h2_1=sigmoid(z2_1)
h2_2=sigmoid(z2_2)

In [None]:
h2=np.append(h2_1,h2_2,axis=1)
h2

array([[0.3455015 , 0.22763463],
       [0.17895588, 0.18890432],
       [0.18532551, 0.17633203],
       [0.15588979, 0.17646704],
       [0.35215894, 0.22367677],
       [0.57757762, 0.32481881],
       [0.21817626, 0.19148286],
       [0.29667379, 0.22101903],
       [0.10107529, 0.1501574 ],
       [0.19824764, 0.19022218],
       [0.49069071, 0.27308902],
       [0.25765484, 0.21074995],
       [0.16066197, 0.17300308],
       [0.09726099, 0.12670566],
       [0.67724413, 0.30679878],
       [0.78093453, 0.37182992],
       [0.56203168, 0.29162219],
       [0.35104932, 0.23790825],
       [0.61537132, 0.33917513],
       [0.45156441, 0.26042756],
       [0.39897194, 0.27237879],
       [0.42442112, 0.26631997],
       [0.2507319 , 0.16806264],
       [0.31271485, 0.27158768],
       [0.26685769, 0.23084302],
       [0.20000011, 0.20925353],
       [0.31035789, 0.24866994],
       [0.37341715, 0.24361663],
       [0.33890408, 0.23164162],
       [0.19261523, 0.19395005],
       [0.

In [None]:
z3_1=np.dot(h2,w_h_o1)
z3_2=np.dot(h2,w_h_o2)
z3_3=np.dot(h2,w_h_o3)

In [None]:
o1=sigmoid(z3_1)
o2=sigmoid(z3_2)
o3=sigmoid(z3_3)

In [None]:
y_pred = np.concatenate((o1,o2,o3), axis=1)

In [None]:
normalized_y_pred = y_pred / np.sum(y_pred, axis=1, keepdims=True)

In [None]:
print(categorical_cross_entropy_loss(Species,normalized_y_pred))

Iris-setosa        0.361725
Iris-versicolor    0.392083
Iris-virginica     0.345960
dtype: float64


To have the values with gradient descent:

In [None]:
input_size = 4 # no. of features in the input
hidden_size = 2 # no. of neurons in the hidden layer
output_size = 3 # no. of classes for the output

In [None]:
w_1 = np.random.rand(input_size, hidden_size)
b_1 = np.zeros((1, hidden_size))
w_2 = np.random.rand(hidden_size, output_size)
b_2 = np.zeros((1, output_size))

forward pass function

In [None]:
def forward(X):
  # input to hidden layer
  h = X.dot(w_1) + b_1
  a = 1 / (1 + np.exp(-h)) # sigmoid activation for hidden layer

  # hidden layer to output
  hat_y = a.dot(w_2) + b_2
  y_hat = 1 / (1 + np.exp(-hat_y)) # sigmoid activation for output layer

  return y_hat, a, h

loss function

In [None]:
def cross_entropy_loss(y_pred, y_true):
  loss = -np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)) / len(y_true)

  return loss

backpropagation

In [None]:
def backward(X, y_hat, y_true, a, h):
  # to calculate gradients
  delta = (y_hat - y_true) / len(y_true)
  grad_w2 = a.T.dot(delta)
  grad_b2 = np.sum(delta, axis=0, keepdims=True)
  grad_a = delta.dot(w_2.T)
  grad_h = grad_a * a * (1 - a) # gradient for sigmoid activation
  grad_w1 = X.T.dot(grad_h)
  grad_b1 = np.sum(grad_h, axis=0, keepdims=True)

  return grad_w1, grad_b1, grad_w2, grad_b2

training data and targets

In [None]:
x = np.random.rand(100, input_size) # 100 samples, 4 features

In [None]:
y_true = np.random.rand(100, output_size) # 100 samples, one hot encoded targets

gradient descent

In [None]:
learning_rate = 0.01
for epoch in range(100):
  # Forward pass
  y_hat, a, h = forward(x)
  # Calculate the loss
  loss = cross_entropy_loss(y_hat, y_true)
  # Backward pass
  grad_w1, grad_b1, grad_w2, grad_b2 = backward(x, y_hat, y_true, a, h)
  # Update the weights
  w_1 -= learning_rate * grad_w1
  b_1 -= learning_rate * grad_b1
  w_2 -= learning_rate * grad_w2
  b_2 -= learning_rate * grad_b2
  # Print the loss every few epochs
  if epoch % 10 == 0:
    print(f'Epoch {epoch}, Loss: {loss}')

Epoch 0, Loss: 2.2182918231375877
Epoch 10, Loss: 2.204100811083034
Epoch 20, Loss: 2.1912171810740233
Epoch 30, Loss: 2.1795307570138376
Epoch 40, Loss: 2.1689387598158802
Epoch 50, Loss: 2.1593456592889115
Epoch 60, Loss: 2.1506629558324932
Epoch 70, Loss: 2.142808908199113
Epoch 80, Loss: 2.135708221751922
Epoch 90, Loss: 2.1292917097418673
