<a href="https://colab.research.google.com/github/sauvatu/HS-algorithm/blob/master/Chp5_Backpropagation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Python Implementation of Multiplication and addition layers**

In [22]:
class MulLayer:
  def __init__(self):# Initialize variables to hold inputs
    self.x = None
    self.y = None
  def forward(self, x, y): # Store inputs for later use in backward pass
    self.x = x
    self.y = y
    out = x * y # Compute forward pass: multiply inputs
    return out
  def backward(self, dout):  # Compute gradients using the chain rule
    dx = dout * self.y # Derivative of the forward pass w.r.t x
    dy = dout * self.x # Derivative of the forward pass w.r.t y
    return dx, dy

In [23]:
class AddLayer:
  def _init__(self): # Constructor method, no initialization needed
    pass
  def forward(self, x, y):# Compute the forward pass: addition of inputs
    out = x + y
    return out
  def backward(self, dout):# Compute gradients using the chain rule
    dx = dout * 1 # Derivative of the forward pass w.r.t x
    dy= dout * 1  # Derivative of the forward pass w.r.t y
    return dx, dy

# **Python Implementation of the ReLU layer**

In [24]:
class Relu:
  def _init__(self):# Constructor method initializes mask to None
    self.mask = None
# Compute the forward pass: Rectified Linear Unit (ReLU) activation function
  def forward(self, x):
    self.mask = (x <= 0)# Create a mask to identify negative values
    out = x.copy()# Make a copy of input array
    out[self.mask] = 0 # Set negative values to 0
    return out
  def backward(self, dout): # Compute gradients using the chain rule
    dout[self.mask] = 0 # Set gradients corresponding to negative values to 0
    dx = dout # Output gradient is the same as input gradient for positive values
    return dx

# **Python Implementation of the Sigmoid Layer**

In [25]:
class Sigmoid:
  def __init__(self):#Creating constructor and intializing it to none
    self.out = None
  def forward(self, x):# Compute the forward pass: Sigmoid activation function
    out = sigmoid(x)# Invoking sigmoid function
    self.out = out#Storing the output for later use
    return out
  def backward(self, dout):#Calculating gradient using the chain rule
    dx = dout (1.0 - self.out) * self.out
    return dx

# **Python Implementation of Batch-Based Affine Layer**

In [26]:
import numpy as np

class Affine:
#Creating constructor and intializing parameters and variables
  def __init__(self, W, b):
    self.W =W #Weight Matrix
    self.b = b#Bias Vector
    self.x = None#Input Data
    self.original_x_shape = None
    self.dw = None #Gradient of Weight
    self.db = None #Gradient of Bias
# Compute the forward pass through the affine layer
  def forward(self, x):
    self.original_x_shape = x.shape
    x = x.reshape(x.shape[0], -1)
    self.x = x
    out = np.dot(self.x, self.W) + self.b
    return out
# Compute gradients using the chain rule
  def backward(self, dout):
    dx = np.dot(dout, self.W.T)
    self.dw = np.dot(self.x.T, dout)
    self.db = np.sum(dout, axis=0)
    dx = dx.reshape(*self.original_x_shape)
    return dx

# **Python Implementation of the Softmax with Loss Layer**

In [33]:
import numpy as np

class SoftmaxWithLoss:
  def __init__(self):#Constructor method intializing variables.
    self.loss = None #Loss Value
    self.y = None #Predicated probabilities (Softmax Output)
    self.t = None #Ground truth labels
# Compute the forward pass: Softmax activation followed by cross-entropy loss
  def forward(self, x, t):
    self.t = t
    self.y = softmax(x)# Compute softmax probabilities
    self.loss = cross_entropy_error(self.y, self.t)# Compute cross-entropy loss
    return self.loss
# Compute gradients using the chain rule
  def backward(self, dout=1):
    batch_size = self.t.shape[0]
    if self.t.size == self.y.size:
      dx = (self.yself.t) / batch_size # Derivative of the forward pass w.r.t. input data
    else:
      dx = self.y.copy()
      dx[np.arange(batch_size), self.t] -= 1
      dx = dx/batch_size
    return dx

# **Implementing a neural network with Backpropagation**

In [28]:
# Import necessary modules
import sys, os
# Append parent directory to the system path
sys.path.append(os.pardir)
# Import required libraries
import numpy as np
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict

class TwoLayerNet:
  def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        # Initializing Weights
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

        # Generate Layers
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

        self.lastLayer = SoftmaxWithLoss()

  def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x

    # x:Input Data, t: Teacher Data
  def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)

  def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    # x:Input Data, t: Teacher Data
  def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

        return grads

  def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # Setting
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

        return grads

# **Verifying the slope obtained by Backpropagation**

In [34]:
# Import necessary modules
import sys, os
sys.path.append(os.pardir)# Append parent directory to the system path
# Import required libraries
import numpy as np
# Import load_mnist function from dataset.mnist module
from dataset.mnist import load_mnist
# Import TwoLayerNet class from common.two_layer_net module
from common.two_layer_net import TwoLayerNet

# Load the MNIST dataset and preprocess it
(x_train, t_train), (x_test, t_test) = load_mnist(normalize = True, one_hot_label=True)
# Create an instance of the TwoLayerNet class with specified input, hidden, and output sizes
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
# Select a small batch of training data for gradient computation
x_batch = x_train[:3]
t_batch = t_train[:3]

# Compute gradients using numerical differentiation and backpropagation
grad_numerical = network.numerical_gradient(x_batch, t_batch)# Numerical gradients
grad_backprop = network.gradient(x_batch, t_batch)# Gradients computed using backpropagation

# Compare the gradients computed by both methods
for key in grad_numerical.keys():
# Compute the absolute difference between numerical and backpropagated gradients
  diff = np.average(np.abs(grad_backprop[key] - grad_numerical [key]))
  print(key + ":" + str(diff))

W1:2.0233361544731985e-10
b1:1.0308814243925211e-09
W2:7.203301550350039e-08
b2:1.4366163690188173e-07


# **Implementing Learning Using Backpropagation**

In [37]:
#import necessary modules
import sys, os
sys.path.append(os.pardir)# Append parent directory to the system path
# Import required libraries
import numpy as np
from dataset.mnist import load_mnist
from common.two_layer_net import TwoLayerNet

#Loading Data
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
# Set up hyperparameters
iters_num = 10000 # Number of iterations for training
train_size = x_train.shape[0]# Size of the training dataset
batch_size = 100 # Batch size for stochastic gradient descent
learning_rate = 0.1 # Learning rate for updating parameters

train_loss_list = []
train_acc_list = []
test_acc_list = []

# Calculate the number of iterations per epoch
iter_per_epoch = max(train_size, batch_size, 1)

# Main training loop
for i in range(iters_num):
# Create a random batch of training data
  batch_mask = np.random.choice(train_size, batch_size)
  x_batch = x_train [batch_mask]
  t_batch =  t_train [batch_mask]
  grad =  network.gradient(x_batch, t_batch)

# Compute gradients and update parameters using stochastic gradient descent
  for key in ('W1', 'b1', 'W2', 'b2'):
      network.params[key] -= learning_rate * grad[key]
# Compute training loss and store it
  loss = network.loss(x_batch, t_batch)
  train_loss_list.append(loss)

# Calculate and store training and test accuracy at the end of each epoch
  if i % iter_per_epoch == 0:
    train_acc = network.accuracy(x_train, t_train)
    test_acc = network.accuracy(x_test, t_test)
    train_acc_list.append(train_acc)
    test_acc_list.append(test_acc)
# Print training and test accuracy
    print('Printing the training and test accuracy: =', + train_acc, test_acc)

Printing the training and test accuracy: = 0.10441666666666667 0.1028
