In [5]:
from src.activation import ReLU, Softmax
from src.nn import LinearLayer, SGDWithMomentum
from src.loss import CrossEntropyLoss
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np

In [2]:
# Define transformations for the training data and testing data
transform = transforms.Compose([transforms.ToTensor()])

# Load the training and test datasets
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Define data loaders
batch_size = 64  # You can tune this
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100.0%


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [3]:
input_layer = LinearLayer(784, 256)
relu1 = ReLU()
hidden_layer = LinearLayer(256, 128)
relu2 = ReLU()
output_layer = LinearLayer(128, 10)
softmax = Softmax()
criterion = CrossEntropyLoss()

# Collect parameters and gradients for the optimizer
parameters = [input_layer.weights, input_layer.bias,
            hidden_layer.weights, hidden_layer.bias,
            output_layer.weights, output_layer.bias]

gradients = [input_layer.weights_grad, input_layer.bias_grad,
            hidden_layer.weights_grad, hidden_layer.bias_grad,
            output_layer.weights_grad, output_layer.bias_grad]

# Initialize optimizer
optimizer = SGDWithMomentum(parameters, learning_rate=0.01, momentum=0.9)

# Training loop
num_epochs = 60

for epoch in range(num_epochs):
    epoch_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        # Prepare input and labels
        data = data.view(-1, 28*28).numpy()  # Flatten and convert to numpy
        target = target.numpy()
        
        # Forward pass
        out = input_layer.forward(data)
        out = relu1.forward(out)
        out = hidden_layer.forward(out)
        out = relu2.forward(out)
        out = output_layer.forward(out)
        logits = softmax.forward(out)
        loss = criterion.forward(logits, target)
        
        # Backward pass
        grad_loss = criterion.backward()
        grad_softmax = softmax.backward(grad_loss)
        grad_output = output_layer.backward(grad_softmax)
        grad_relu2 = relu2.backward(grad_output)
        grad_hidden = hidden_layer.backward(grad_relu2)
        grad_relu1 = relu1.backward(grad_hidden)
        grad_input = input_layer.backward(grad_relu1)
        
        # Update parameters
        optimizer.step(parameters, gradients)
        optimizer.zero_grad(gradients)
        
        epoch_loss += loss

    # Print average loss for the epoch
    avg_loss = epoch_loss / len(train_loader)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

Epoch [1/60], Loss: 2.4576
Epoch [2/60], Loss: 2.4576
Epoch [3/60], Loss: 2.4577
Epoch [4/60], Loss: 2.4575
Epoch [5/60], Loss: 2.4576
Epoch [6/60], Loss: 2.4576
Epoch [7/60], Loss: 2.4575
Epoch [8/60], Loss: 2.4576
Epoch [9/60], Loss: 2.4576
Epoch [10/60], Loss: 2.4576
Epoch [11/60], Loss: 2.4577
Epoch [12/60], Loss: 2.4576
Epoch [13/60], Loss: 2.4575
Epoch [14/60], Loss: 2.4575
Epoch [15/60], Loss: 2.4576
Epoch [16/60], Loss: 2.4575
Epoch [17/60], Loss: 2.4575
Epoch [18/60], Loss: 2.4576
Epoch [19/60], Loss: 2.4576
Epoch [20/60], Loss: 2.4575
Epoch [21/60], Loss: 2.4577
Epoch [22/60], Loss: 2.4576
Epoch [23/60], Loss: 2.4575
Epoch [24/60], Loss: 2.4575
Epoch [25/60], Loss: 2.4576
Epoch [26/60], Loss: 2.4576
Epoch [27/60], Loss: 2.4575
Epoch [28/60], Loss: 2.4576
Epoch [29/60], Loss: 2.4577
Epoch [30/60], Loss: 2.4575
Epoch [31/60], Loss: 2.4576
Epoch [32/60], Loss: 2.4577
Epoch [33/60], Loss: 2.4576
Epoch [34/60], Loss: 2.4576
Epoch [35/60], Loss: 2.4576
Epoch [36/60], Loss: 2.4576
E

KeyboardInterrupt: 

In [6]:
correct = 0
total = 0
for data, target in test_loader:
    data = data.view(-1, 28*28).numpy()
    target = target.numpy()
    
    # Forward pass
    out = input_layer.forward(data)
    out = relu1.forward(out)
    out = hidden_layer.forward(out)
    out = relu2.forward(out)
    out = output_layer.forward(out)
    logits = softmax.forward(out)
    
    predicted = np.argmax(logits, axis=1)
    total += target.size
    correct += (predicted == target).sum()
accuracy = 100 * correct / total
print(f'Test Accuracy: {accuracy:.2f}%')

Test Accuracy: 8.53%
