**Q1.** 
Implement a simple 3 layer neural network to be trained on MNIST dataset, but now you have to write the complete backward pass as well. The model class should have both `forward` and `backward` methods.

In [1]:
import torch
import torchvision
import torch.utils.data
import torch.nn as nn
import torchvision.transforms as transforms
import numpy as np

In [2]:
#prepare data (dataset, dataloader)
#design model, neuralnet 3 layers
#loss and backprop
# training loop (forward, loss, backward, update)

device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')

#HYPERPARAMETERS
batch_size= 100
num_epochs= 10
learning_rate=0.001
input_size= 28*28
hidden_size= 120
num_classes= 10

In [3]:
train_dataset= torchvision.datasets.MNIST('root=./MNIST_DATA', train= True,
                                          transform=transforms.ToTensor(), download= True)
test_dataset= torchvision.datasets.MNIST('root=./MNIST_DATA', train= False,
                                          transform=transforms.ToTensor(), download= True)
train_loader= torch.utils.data.DataLoader(dataset=train_dataset, batch_size= batch_size,
                                          shuffle=True)
test_loader= torch.utils.data.DataLoader(dataset=test_dataset, batch_size= batch_size)

100%|██████████| 9.91M/9.91M [00:18<00:00, 522kB/s] 
100%|██████████| 28.9k/28.9k [00:00<00:00, 77.8kB/s]
100%|██████████| 1.65M/1.65M [00:06<00:00, 255kB/s] 
100%|██████████| 4.54k/4.54k [00:00<00:00, 9.80MB/s]


In [4]:
def relu(x):
  return np.maximum(0,x)  #max gives scalar

def softmax(x):
  o= np.exp(x)/np.sum(np.exp(x),axis=0)
  return o

def derivative_relu(x):
  return x>0        #cant write if(x>0): return 1 else: return 0 as x is vector

In [5]:
class NeuralNet3_model(nn.Module):
  def __init__(self, input_size=input_size, hidden_size= hidden_size, num_classes=num_classes, batch_size=batch_size, learning_rate=learning_rate) -> None:
    super(NeuralNet3_model, self).__init__()
    self.batch_size= batch_size
    self.learning_rate= learning_rate
    self.w1=np.random.randn(input_size, hidden_size)
    self.w2=np.random.randn(hidden_size, num_classes)

  def forward(self, x):
    self.x=x
    self.z1= self.w1.T.dot(x)
    self.a1= relu(self.z1)
    self.z2= self.w2.T.dot(self.a1)
    self.output= softmax(self.z2)
    return self.output

  def backward_weight_upd(self,actual_label):
      # softmax + cross-entropyloss
      dl_dz2= self.output - actual_label # IMP one hot encoding mein
      dl_dw2= (1/self.batch_size)*(self.a1.dot(dl_dz2.T))
      dl_da1= self.w2.dot(dl_dz2)
      dl_dz1= dl_da1 * derivative_relu(self.z1)
      dl_dw1=(1/self.batch_size)*(self.x.dot(dl_dz1.T))
      alpha = self.learning_rate
      self.w1 -= alpha * dl_dw1
      self.w2 -= alpha * dl_dw2

In [6]:
def image_convert(x):
  # x=x.squeeze(1)
  x=x.view(batch_size,-1)
  x=x.T
  x=x.numpy()
  return (x)
def convert_one_hot(y):
  one_hot = np.zeros((10, y.shape[0]))
  one_hot[y, np.arange(y.shape[0])] = 1
  return one_hot


In [7]:
model = NeuralNet3_model()
img, label = next(iter(train_loader))


In [8]:
def cross_entropy_loss(y_pred, y_true):
    epsilon = 1e-12      # Avoid log(0)
    loss = -np.sum(y_true * np.log(y_pred + epsilon)) / y_true.shape[1]
    return loss

# TRAINING LOOP
for epoch in range(num_epochs):
    total_loss = 0
    for img_batch, label_batch in train_loader:
        x = image_convert(img_batch)
        y = convert_one_hot(label_batch.numpy())
        prediction = model.forward(x)
        loss = cross_entropy_loss(prediction, y)
        total_loss += loss
        model.backward_weight_upd(y)

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {avg_loss:.4f}")

Epoch [1/10] - Loss: 16.6037
Epoch [2/10] - Loss: 11.2343
Epoch [3/10] - Loss: 8.9051
Epoch [4/10] - Loss: 7.5706
Epoch [5/10] - Loss: 6.6979
Epoch [6/10] - Loss: 6.0712
Epoch [7/10] - Loss: 5.5896
Epoch [8/10] - Loss: 5.2020
Epoch [9/10] - Loss: 4.8759
Epoch [10/10] - Loss: 4.6067


In [9]:
def accuracy(predictions, targets):
    predicted_classes = np.argmax(predictions, axis=0)
    actual_classes = np.argmax(targets, axis=0)
    return np.mean(predicted_classes == actual_classes) * 100

test_loss = 0
test_accuracy = 0
num_test_batches = 0

for images, labels in test_loader:
    x = image_convert(images)
    y = convert_one_hot(labels)
    predictions = model.forward(x)


    loss = cross_entropy_loss(predictions, y)
    accuracyy = accuracy(predictions, y)

    test_loss += loss
    test_accuracy += accuracyy
    num_test_batches += 1

avg_test_accuracy = test_accuracy / num_test_batches

print(f"Test Accuracy: {avg_test_accuracy:.2f}%")

Test Accuracy: 75.85%


Getting:<br/>
Epoch [1/10] - Loss: 19.1906

Epoch [2/10] - Loss: 11.6504

Epoch [3/10] - Loss: 8.7364

Epoch [4/10] - Loss: 7.2344

Epoch [5/10] - Loss: 6.2795

Epoch [6/10] - Loss: 5.6161

Epoch [7/10] - Loss: 5.1319

Epoch [8/10] - Loss: 4.7632

Epoch [9/10] - Loss: 4.4675

Epoch [10/10] - Loss: 4.2235


Test Accuracy: 76.43%