## Lab 2_3 Double layer NN without nn.Linear (fully connected module) for MNIST dataset

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

trainset = torchvision.datasets.MNIST("./", train=True, transform=transforms.ToTensor(), download=True)
testset = torchvision.datasets.MNIST("./", train=False, transform=transforms.ToTensor(), download=True)

In [2]:
n_classes = len(trainset.classes)
x_train = torch.flatten(trainset.data, start_dim=1)
y_train = trainset.targets
x_test = torch.flatten(testset.data, start_dim=1)
y_test = testset.targets

In [3]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(x_train)
X_train = torch.tensor(scaler.fit_transform(x_train), dtype=torch.float32)
X_test = torch.tensor(scaler.fit_transform(x_test), dtype=torch.float32)

In [4]:
input_dim = X_train.shape[1]
hidden1_dim = 128
hidden2_dim = 32
output_dim = n_classes

In [5]:
w1 = torch.randn(input_dim, hidden1_dim, requires_grad=True)
w1.retain_grad()
b1 = torch.zeros(hidden1_dim, requires_grad=True)
b1.retain_grad()
w2 = torch.randn(hidden1_dim, hidden2_dim, requires_grad=True)
w2.retain_grad()
b2 = torch.zeros(hidden2_dim, requires_grad=True)
b2.retain_grad()
w3 = torch.randn(hidden2_dim, output_dim, requires_grad=True)
w3.retain_grad()
b3 = torch.zeros(output_dim, requires_grad=True)
b3.retain_grad()

In [6]:
def relu(x):
  return torch.max(x, torch.zeros_like(x))

In [7]:
def softmax(x):
  ex = torch.exp(x - torch.max(x, dim=1, keepdim=True)[0])
  return ex / ex.sum(dim=1, keepdim=True)

In [8]:
def forward(X):
  z1 = torch.matmul(X, w1) + b1
  a1 = relu(z1)

  z2 = torch.matmul(a1, w2) + b2
  a2 = relu(z2)

  z3 = torch.matmul(a2, w3) + b3
  a3 = softmax(z3)
  return a3

In [9]:
import torch.nn.functional as F
err = 1e-10

def cross_entropy(pred, truth):
  truth_onehot = F.one_hot(truth, num_classes=output_dim).float()
  loss = -torch.sum(truth_onehot * torch.log(pred+err), dim=1)
  return torch.mean(loss)

In [10]:
learning_rate = 0.01
epochs = 2000

for i in range(epochs):
  y_pred = forward(X_train)
  loss = cross_entropy(y_pred, y_train)
  loss.backward()
  with torch.no_grad():
    w1 -= learning_rate * w1.grad
    b1 -= learning_rate * b1.grad
    w2 -= learning_rate * w2.grad
    b2 -= learning_rate * b2.grad
    w3 -= learning_rate * w3.grad
    b3 -= learning_rate * b3.grad
  w1.grad.zero_()
  b1.grad.zero_()
  w2.grad.zero_()
  b2.grad.zero_()
  w3.grad.zero_()
  b3.grad.zero_()

  if (i + 1) % 250 == 0:
    print(f"Epoch [{i+1}/{epochs}], \tLoss: {loss.item()}")

Epoch [250/2000], 	Loss: 17.790040969848633
Epoch [500/2000], 	Loss: 15.85260009765625
Epoch [750/2000], 	Loss: 14.826966285705566
Epoch [1000/2000], 	Loss: 14.255081176757812
Epoch [1250/2000], 	Loss: 13.806451797485352
Epoch [1500/2000], 	Loss: 13.53281307220459
Epoch [1750/2000], 	Loss: 13.31104564666748
Epoch [2000/2000], 	Loss: 13.1246919631958


In [11]:
y_pred_test = forward(X_test)
_, predicted_labels = torch.max(y_pred_test, 1)

correct_predictions = (predicted_labels == y_test).sum().item()
accuracy = correct_predictions / len(y_test)
print(f"Accuracy on the test set: {accuracy * 100:.2f}%")

Accuracy on the test set: 42.17%
