# Using Pytorch functions

In [6]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# Define a simple neural network with 2 fully connected layers


# Load the CIFAR-10 dataset and transform it to tensors
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [7]:


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(3 * 32 * 32, 128)  # 3 channels, 32x32 image size
        self.fc2 = nn.Linear(128, 10)  # Output layer with 10 classes (CIFAR-10)

    def forward(self, x):
        x = x.view(-1, 3 * 32 * 32)  # Flattening the input
        x = F.relu(self.fc1(x))  # Applying ReLU activation to the first layer
        x = self.fc2(x)  # Output layer
        return x
net = Net()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001)

for epoch in range(2):

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        # zero'ing the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))


[1,  2000] loss: 2.094
[1,  4000] loss: 1.896
[1,  6000] loss: 1.809
[1,  8000] loss: 1.755
[1, 10000] loss: 1.711
[1, 12000] loss: 1.709
[2,  2000] loss: 1.652
[2,  4000] loss: 1.628
[2,  6000] loss: 1.628
[2,  8000] loss: 1.607
[2, 10000] loss: 1.589
[2, 12000] loss: 1.588
Finished Training
Accuracy of the network on the 10000 test images: 44 %


# Self-coded version of 2 layer NN (Without Regularisation)

In [8]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np

def convert_to_numpy(data_loader):
    images = []
    labels = []
    for inputs, targets in data_loader:
        images.append(inputs.numpy())
        labels.append(targets.numpy())
    images = np.vstack(images)
    labels = np.hstack(labels)
    return images, labels

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize each channel with mean 0.5 and std deviation 0.5
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

input_size = 32 * 32 * 3
hidden_size = 128
output_size = 10

np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size) * 0.01
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size) * 0.01
b2 = np.zeros((1, output_size))

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def forward(X, W1, b1, W2, b2):
    # Input to hidden layer
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)

    # Hidden to output layer
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)

    return A1, A2

#cross-entropy loss
def compute_loss(Y, Y_pred):
    m = Y.shape[0]
    loss = -np.sum(Y * np.log(Y_pred)) / m
    return loss

def backward(X, Y, A1, A2, W1, W2, b1, b2, learning_rate):
    m = X.shape[0]

    # Gradient of loss w.r.t. Z2
    dZ2 = A2 - Y

    # Gradient of loss w.r.t. W2 and b2
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    # Gradient of loss w.r.t. Z1
    dZ1 = np.dot(dZ2, W2.T) * A1 * (1 - A1)

    # Gradient of loss w.r.t. W1 and b1
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    # Updating weights and bias
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

    return W1, b1, W2, b2

train_images, train_labels = convert_to_numpy(trainloader)
train_images = train_images.reshape(-1, 3072)
test_images, test_labels = convert_to_numpy(testloader)
test_images = test_images.reshape(-1,  3072)

learning_rate = 0.001
epochs = 30

for epoch in range(epochs):
    # Forward pass
    A1, A2 = forward(train_images, W1, b1, W2, b2)

    # Compute loss
    loss = compute_loss(np.eye(output_size)[train_labels], A2)

    # Backward pass
    W1, b1, W2, b2 = backward(train_images, np.eye(output_size)[train_labels], A1, A2, W1, W2, b1, b2, learning_rate)

    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss}')

_, test_pred = forward(test_images, W1, b1, W2, b2)
predictions = np.argmax(test_pred, axis=1)
accuracy = np.mean(predictions == test_labels) * 100
print(f'Test Accuracy: {accuracy}%')


Files already downloaded and verified
Files already downloaded and verified
Epoch 1/30, Loss: 2.303537521439136
Epoch 2/30, Loss: 2.303521147232998
Epoch 3/30, Loss: 2.3035048255147474
Epoch 4/30, Loss: 2.3034885559478173
Epoch 5/30, Loss: 2.3034723381976905
Epoch 6/30, Loss: 2.3034561719318947
Epoch 7/30, Loss: 2.3034400568199875
Epoch 8/30, Loss: 2.3034239925335442
Epoch 9/30, Loss: 2.3034079787461477
Epoch 10/30, Loss: 2.3033920151333716
Epoch 11/30, Loss: 2.3033761013727747
Epoch 12/30, Loss: 2.3033602371438815
Epoch 13/30, Loss: 2.3033444221281782
Epoch 14/30, Loss: 2.3033286560090964
Epoch 15/30, Loss: 2.303312938472001
Epoch 16/30, Loss: 2.30329726920418
Epoch 17/30, Loss: 2.303281647894835
Epoch 18/30, Loss: 2.3032660742350637
Epoch 19/30, Loss: 2.3032505479178553
Epoch 20/30, Loss: 2.3032350686380743
Epoch 21/30, Loss: 2.3032196360924497
Epoch 22/30, Loss: 2.303204249979569
Epoch 23/30, Loss: 2.303188909999856
Epoch 24/30, Loss: 2.303173615855576
Epoch 25/30, Loss: 2.303158367

# Improved 2-Layer NN with hyperparameter tuning and L2 Regularisation

In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np

def convert_to_numpy(data_loader):
    images = []
    labels = []
    for inputs, targets in data_loader:
        images.append(inputs.numpy())
        labels.append(targets.numpy())
    images = np.vstack(images)
    labels = np.hstack(labels)
    return images, labels

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

# Define the forward pass
def forward(X, W1, b1, W2, b2):
    # Input to hidden layer
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)

    # Hidden to output layer
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)

    return A1, A2

def compute_loss(Y, Y_pred):
    m = Y.shape[0]
    loss = -np.sum(Y * np.log(Y_pred)) / m
    return loss

# backward pass with L2 regularization
def backward_with_regularization(X, Y, A1, A2, W1, W2, b1, b2, learning_rate, reg_coeff):
    m = X.shape[0]

    # Gradient of loss w.r.t. Z2
    dZ2 = A2 - Y

    # Gradient of loss w.r.t. W2 and b2 with L2 regularization
    dW2 = (np.dot(A1.T, dZ2) + reg_coeff * W2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    # Gradient of loss w.r.t. Z1
    dZ1 = np.dot(dZ2, W2.T) * A1 * (1 - A1)

    # Gradient of loss w.r.t. W1 and b1 with L2 regularization
    dW1 = (np.dot(X.T, dZ1) + reg_coeff * W1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    # Updating weights and biases
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

    return W1, b1, W2, b2


def convert_to_numpy(data_loader):
    images = []
    labels = []
    for inputs, targets in data_loader:
        images.append(inputs.numpy())
        labels.append(targets.numpy())
    images = np.vstack(images)
    labels = np.hstack(labels)
    return images, labels

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize each channel with mean 0.5 and std deviation 0.5
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)

input_size = 32 * 32 * 3
output_size = 10

train_images, train_labels = convert_to_numpy(trainloader)
train_images = train_images.reshape(-1, 3072)
test_images, test_labels = convert_to_numpy(testloader)
test_images = test_images.reshape(-1,  3072)

Files already downloaded and verified
Files already downloaded and verified


In [2]:
hidden_sizes = [64, 128]
regularization_coeffs = [0.00001, 0.0001, 0.001]
learning_rates = [1, 1.1, 1.25]
epochs_list = [30, 50]

best_accuracy = 0
best_params = {}

# Training loop with L2 regularization
for hidden_size in hidden_sizes:
    for reg_coeff in regularization_coeffs:
        for learning_rate in learning_rates:
            for epochs in epochs_list:
                print(f"Training with hidden_size={hidden_size}, reg_coeff={reg_coeff}, learning_rate={learning_rate}, epochs={epochs}...")

                # Initialize weights and biases
                np.random.seed(42)
                W1 = np.random.randn(input_size, hidden_size) * 0.01
                b1 = np.zeros((1, hidden_size))
                W2 = np.random.randn(hidden_size, output_size) * 0.01
                b2 = np.zeros((1, output_size))

                # Training loop
                for epoch in range(epochs):
                    # Forward pass
                    A1, A2 = forward(train_images, W1, b1, W2, b2)

                    # Compute train loss with L2 regularization
                    train_loss = compute_loss(np.eye(output_size)[train_labels], A2) + (reg_coeff / (2 * train_images.shape[0])) * (np.sum(np.square(W1)) + np.sum(np.square(W2)))

                    # Backward pass with L2 regularization
                    W1, b1, W2, b2 = backward_with_regularization(train_images, np.eye(output_size)[train_labels], A1, A2, W1, W2, b1, b2, learning_rate, reg_coeff)

                    # Print train loss every epoch
                    print(f'Epoch {epoch+1}/{epochs}, Train Loss: {train_loss}')

                # Validate after training
                _, test_pred = forward(test_images, W1, b1, W2, b2)
                predictions = np.argmax(test_pred, axis=1)
                accuracy = np.mean(predictions == test_labels) * 100
                print(f'Test Accuracy: {accuracy}%')

                # Update best accuracy and parameters if current accuracy is better
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_params = {
                        'hidden_size': hidden_size,
                        'reg_coeff': reg_coeff,
                        'learning_rate': learning_rate,
                        'epochs': epochs
                    }
                    print("Best parameters updated:", best_params)

                print("Training complete.\n")

print("Grid search complete.")
print("Best parameters found:", best_params)
print("Best test accuracy:", best_accuracy)


Training with hidden_size=64, reg_coeff=1e-05, learning_rate=1, epochs=30...
Epoch 1/30, Train Loss: 2.304810586826439
Epoch 2/30, Train Loss: 2.3000220413287753
Epoch 3/30, Train Loss: 2.2947722977383385
Epoch 4/30, Train Loss: 2.2866488938876315
Epoch 5/30, Train Loss: 2.2734199165548414
Epoch 6/30, Train Loss: 2.2538450530469896
Epoch 7/30, Train Loss: 2.22910617478603
Epoch 8/30, Train Loss: 2.202659833616347
Epoch 9/30, Train Loss: 2.177582726316821
Epoch 10/30, Train Loss: 2.1547538310636045
Epoch 11/30, Train Loss: 2.1336818120452996
Epoch 12/30, Train Loss: 2.113937271507474
Epoch 13/30, Train Loss: 2.0955154674667194
Epoch 14/30, Train Loss: 2.078528026635388
Epoch 15/30, Train Loss: 2.062952525177478
Epoch 16/30, Train Loss: 2.048684355215331
Epoch 17/30, Train Loss: 2.035612911338649
Epoch 18/30, Train Loss: 2.023636278933894
Epoch 19/30, Train Loss: 2.0126518353737093
Epoch 20/30, Train Loss: 2.0025548329115965
Epoch 21/30, Train Loss: 1.9932405465982548
Epoch 22/30, Train 