<a href="https://colab.research.google.com/github/ngoda/Conversations/blob/master/ch5Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
# Importing the numpy library.
import numpy as np

# Definition of the SoftmaxWithLoss class.
class SoftmaxWithLoss:
    def __init__(self):
        # Initializing instance variables to store loss, predicted probabilities, and true labels.
        self.loss = None
        self.y = None
        self.t = None

    # Forward propagation method.
    def forward(self, x, t):
        # Storing the true labels and computing the softmax activation for predicted probabilities.
        self.t = t
        self.y = softmax(x)
        # Computing the cross-entropy loss between predicted and true labels.
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss

    # Backward propagation method.
    def backward(self, dout=1):
        # Calculating the batch size.
        batch_size = self.t.shape[0]

        # Handling the case where the shape of true labels and predicted probabilities are the same.
        if self.t.size == self.y.size:
            # Computing the gradient of the loss with respect to predicted probabilities.
            dx = (self.y - self.t) / batch_size
        else:
            # Creating a copy of predicted probabilities and adjusting the gradient for true labels.
            dx = self.y.copy()
            dx[np.arange(batch_size), self.t] -= 1
        return dx




In [None]:
!pip install common

Collecting common
  Downloading common-0.1.2.tar.gz (3.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: common
  Building wheel for common (setup.py) ... [?25l[?25hdone
  Created wheel for common: filename=common-0.1.2-py3-none-any.whl size=3707 sha256=61c289291d440d75465e66d91fd72ec4fed4448153fd32b82ab8dff47f41b55a
  Stored in directory: /root/.cache/pip/wheels/c0/74/13/fe3274a9137054148c69e3f0424bd2fcf068ed25998047ecb5
Successfully built common
Installing collected packages: common
Successfully installed common-0.1.2


**Implementing a neural network with Backpropagation**

In [7]:
# Importing necessary modules and packages.
import sys
import os
import common  # Assuming 'common' module is in the parent directory.
from common.layers import *  # Importing layers from the 'common' module.
from common.gradient import numerical_gradient  # Importing numerical_gradient function.
from collections import OrderedDict  # Importing OrderedDict for defining layer sequence.

# Definition of the TwoLayersNet class.
class TwoLayersNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # Initializing parameters for the neural network.
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

        # Defining layers for the neural network using OrderedDict to maintain sequence.
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

        # Initializing the last layer as SoftmaxWithLoss.
        self.lastLayer = SoftmaxWithLoss()

    # Method to make predictions given input data.
    def predict(self, x):
        # Forward propagation through all layers.
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    # Method to compute the loss given input data and true labels.
    def loss(self, x, t):
        # Making predictions for the input data.
        y = self.predict(x)
        # Computing the loss using the last layer.
        return self.lastLayer.forward(y, t)






In [8]:
# Method to compute the accuracy of the model given input data and true labels.
def accuracy(self, x, t):
    # Making predictions for the input data.
    y = self.predict(x)
    # Extracting the indices of the maximum value along axis 1 (column-wise) to get predicted labels.
    y = np.argmax(y, axis=1)
    # If true labels are not one-dimensional, extract the indices of the maximum value along axis 1.
    if t.ndim != 1:
        t = np.argmax(t, axis=1)
    # Computing accuracy by comparing predicted labels with true labels and calculating the ratio.
    accuracy = np.sum(y == t) / float(x.shape[0])
    return accuracy

# Method to compute the numerical gradient of the loss function with respect to parameters.
def numerical_gradient(self, x, t):
    # Defining a lambda function to compute the loss with respect to parameters.
    loss_W = lambda W: self.loss(x, t)
    # Initializing an empty dictionary to store gradients.
    grads = {}
    # Computing gradients for each parameter using numerical differentiation.
    grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
    grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
    grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
    grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
    # Returning the computed gradients.
    return grads

# Method to compute the gradient of the loss function with respect to parameters using backpropagation.
def gradient(self, x, t):
    # Forward pass to compute the loss.
    self.loss(x, t)
    # Initializing gradient of the loss from the last layer.
    dout = 1
    # Backward pass to compute gradients using backpropagation.
    dout = self.lastLayer.backward(dout)
    layers = list(self.layers.values())
    layers.reverse()
    for layer in layers:
        dout = layer.backward(dout)
    # Extracting gradients for each parameter from the layers.
    grads = {}
    grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
    grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
    # Returning the computed gradients.
    return grads


**Verifying the slope obtained by Backpropagation**

In [9]:
import sys
import os
import numpy as np
from dataset.mnist import load_mnist
from common.two_layer_net import TwoLayerNet

# Appending the parent directory to the system path to access necessary modules.
sys.path.append(os.pardir)

# Loading MNIST dataset with normalization and one-hot label encoding.
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

# Creating a TwoLayerNet instance with specified input, hidden, and output sizes.
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

# Selecting a batch of input and target data.
x_batch = x_train[:3]
t_batch = t_train[:3]

# Computing gradients using both numerical differentiation and backpropagation.
grad_numerical = network.numerical_gradient(x_batch, t_batch)
grad_backprop = network.gradient(x_batch, t_batch)

# Comparing the gradients computed using numerical differentiation and backpropagation.
for key in grad_numerical.keys():
    # Calculating the average absolute difference between gradients.
    diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
    # Printing the key and the average absolute difference.
    print(key + ":" + str(diff))



W1:1.9230875415305199e-10
b1:1.0187862807553312e-09
W2:7.099420853265425e-08
b2:1.411706451820427e-07


**Implementing Learning Using Backpropagation**

In [12]:
# Import necessary libraries
import sys, os

# Add parent directory to the path (assuming the dataset and common folders are there)
sys.path.append(os.pardir)

# Import libraries
import numpy as np
from dataset.mnist import load_mnist
from common.two_layer_net import TwoLayerNet

# Load the MNIST dataset (normalized and one-hot encoded labels)
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

# Define the neural network architecture
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

# Hyperparameters
iters_num = 10000  # Number of training iterations
train_size = x_train.shape[0]  # Size of the training data
batch_size = 100     # Batch size for mini-batch training
learning_rate = 0.1  # Learning rate

# Lists to store training loss and accuracy
train_loss_list = []
train_acc_list = []
test_acc_list = []

# Number of iterations per epoch (considering train_size, batch_size, and minimum of 1)
iter_per_epoch = max(train_size, batch_size, 1)

# Training loop
for i in range(iters_num):
    # Create a random mask for selecting a batch of data
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]  # Select a batch of training data
    t_batch = t_train[batch_mask]  # Select corresponding labels

    # Calculate gradients (commented out numerical gradient for efficiency)
    # grad = network.numerical_gradient(x_batch, t_batch)
    grad = network.gradient(x_batch, t_batch)  # Use more efficient gradient calculation

    # Update network weights with gradient descent
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]

    # Calculate loss for the current batch
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)

    # Evaluate and record training and test accuracy after each epoch
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train, t_train)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("Training Accuracy:", train_acc, "Test Accuracy:", test_acc)


Training Accuracy: 0.10218333333333333 Test Accuracy: 0.101
