In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import time

In [2]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

def to_numpy(dataset):
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=len(dataset), shuffle=False)
    data = next(iter(data_loader))
    images, labels = data
    images = images.numpy()
    labels = labels.numpy()
    images = images.reshape(images.shape[0], -1)
    return images, labels

x_train, y_train = to_numpy(trainset)
x_test, y_test = to_numpy(testset)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:04<00:00, 39520815.65it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [3]:

def initialize_parameters(input_size, num_classes):
    weights = np.random.randn(input_size, num_classes) * 0.0001
    bias = np.zeros((1, num_classes))
    return weights, bias

def linear_forward(X, weights, bias):
    return np.dot(X, weights) + bias

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def cross_entropy_loss(y_pred, y_true):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), y_true])
    loss = np.sum(log_likelihood) / m
    return loss

def linear_backward(X, y_true, y_pred, weights):
    m = y_true.shape[0]
    grad_softmax = y_pred
    grad_softmax[range(m), y_true] -= 1
    grad_softmax /= m
    grad_weights = np.dot(X.T, grad_softmax)
    grad_bias = np.sum(grad_softmax, axis=0, keepdims=True)
    return grad_weights, grad_bias

def update_parameters(weights, bias, grad_weights, grad_bias, learning_rate):
    # Update parameters using gradients
    weights -= learning_rate * grad_weights
    bias -= learning_rate * grad_bias
    return weights, bias

def train(x_train, y_train, weights, bias, learning_rate, epochs):
    for epoch in range(epochs):
        # Forward pass
        output = linear_forward(x_train, weights, bias)
        y_pred = softmax(output)

        # Loss
        loss = cross_entropy_loss(y_pred, y_train)

        # Backward pass
        grad_weights, grad_bias = linear_backward(x_train, y_train, y_pred, weights)

        # Update parameters
        weights, bias = update_parameters(weights, bias, grad_weights, grad_bias, learning_rate)

        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch + 1}, Loss: {loss}')

    return weights, bias

def test(x_test, y_test, weights, bias):
    # Forward pass
    output = linear_forward(x_test, weights, bias)
    y_pred = softmax(output)

    # Convert predictions to label indexes
    predictions = np.argmax(y_pred, axis=1)

    # Calculate accuracy
    accuracy = np.mean(predictions == y_test) * 100
    return accuracy

learning_rate = 0.01
epochs = 100

input_size = 32*32*3
num_classes = 10
weights, bias = initialize_parameters(input_size, num_classes)

start_time = time.time()
weights, bias = train(x_train, y_train, weights, bias, learning_rate, epochs)
end_time = time.time()
training_time = end_time - start_time
print('Training time taken: ', training_time)
accuracy = test(x_test, y_test, weights, bias)
accuracy


Epoch 10, Loss: 2.097077834816834
Epoch 20, Loss: 2.024669673663025
Epoch 30, Loss: 1.9822496868518868
Epoch 40, Loss: 1.952951850514273
Epoch 50, Loss: 1.9310895390070293
Epoch 60, Loss: 1.9139715523094172
Epoch 70, Loss: 1.9001053034117166
Epoch 80, Loss: 1.8885785995851685
Epoch 90, Loss: 1.878796574525763
Epoch 100, Loss: 1.8703529027054155
Training time taken:  191.73924279212952


36.64

In [4]:
def initialize_parameters(input_size, num_classes):
    weights = np.random.randn(input_size, num_classes) * 0.0001
    bias = np.zeros((1, num_classes))
    return weights, bias

def linear_forward(X, weights, bias):
    return np.dot(X, weights) + bias

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def cross_entropy_loss(y_pred, y_true, weights, lambda_reg):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), y_true])
    loss = np.sum(log_likelihood) / m
    reg_loss = 0.5 * lambda_reg * np.sum(weights ** 2)
    loss += reg_loss
    return loss

def linear_backward(X, y_true, y_pred, weights, lambda_reg):
    m = y_true.shape[0]
    grad_softmax = y_pred
    grad_softmax[range(m), y_true] -= 1
    grad_softmax /= m
    grad_weights = np.dot(X.T, grad_softmax) + lambda_reg * weights
    grad_bias = np.sum(grad_softmax, axis=0, keepdims=True)
    return grad_weights, grad_bias

def update_parameters(weights, bias, grad_weights, grad_bias, learning_rate):
    weights -= learning_rate * grad_weights
    bias -= learning_rate * grad_bias
    return weights, bias

def train(x_train, y_train, weights, bias, learning_rate, epochs, lambda_reg):
    for epoch in range(epochs):
        # Forward pass
        output = linear_forward(x_train, weights, bias)
        y_pred = softmax(output)
        # Loss
        loss = cross_entropy_loss(y_pred, y_train, weights, lambda_reg)
        # Backward pass
        grad_weights, grad_bias = linear_backward(x_train, y_train, y_pred, weights, lambda_reg)
        # Update parameters
        weights, bias = update_parameters(weights, bias, grad_weights, grad_bias, learning_rate)

        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch + 1}, Loss: {loss}')

    return weights, bias

def test(x_test, y_test, weights, bias):
    output = linear_forward(x_test, weights, bias)
    y_pred = softmax(output)

    predictions = np.argmax(y_pred, axis=1)

    accuracy = np.mean(predictions == y_test) * 100
    return accuracy

learning_rate = 0.01
epochs = 100
lambda_reg = 0.01  # Regularization parameter

weights, bias = initialize_parameters(input_size, num_classes)

# Training the model with regularization
start_time = time.time()
weights, bias = train(x_train, y_train, weights, bias, learning_rate, epochs, lambda_reg)
end_time = time.time()
training_time = end_time - start_time
print('Training time taken: ', training_time)
accuracy = test(x_test, y_test, weights, bias)
accuracy

Epoch 10, Loss: 2.0972238556020617
Epoch 20, Loss: 2.0250233011337837
Epoch 30, Loss: 1.9828078353432657
Epoch 40, Loss: 1.9537136518421627
Epoch 50, Loss: 1.9320505710098728
Epoch 60, Loss: 1.915125554402625
Epoch 70, Loss: 1.9014453620981697
Epoch 80, Loss: 1.8900977930988605
Epoch 90, Loss: 1.8804882961465164
Epoch 100, Loss: 1.872211005036776
Training time taken:  183.57237124443054


36.64

In [5]:

def initialize_parameters(input_size, num_classes):
    # Initialize weights and biases
    weights = np.random.randn(input_size, num_classes) * 0.0001
    bias = np.zeros((1, num_classes))
    return weights, bias

def linear_forward(X, weights, bias):
    # Linear forward computation
    return np.dot(X, weights) + bias

def softmax(z):
    # Softmax function for multi-class classification
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def cross_entropy_loss(y_pred, y_true, weights, lambda_reg):

    # Cross-entropy loss function with L2 regularization
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), y_true])
    loss = np.sum(log_likelihood) / m

    # L2 regularization term
    reg_loss = 0.5 * lambda_reg * np.sum(weights ** 2)

    loss += reg_loss
    return loss

def linear_backward(X, y_true, y_pred, weights, lambda_reg):
    m = y_true.shape[0]
    grad_softmax = y_pred
    grad_softmax[range(m), y_true] -= 1
    grad_softmax /= m
    grad_weights = np.dot(X.T, grad_softmax) + lambda_reg * weights
    grad_bias = np.sum(grad_softmax, axis=0, keepdims=True)
    return grad_weights, grad_bias

def update_parameters(weights, bias, grad_weights, grad_bias, learning_rate):
    weights -= learning_rate * grad_weights
    bias -= learning_rate * grad_bias
    return weights, bias

def adjust_learning_rate(learning_rate, epoch):
    if epoch % 30 == 0:
        learning_rate *= 0.1
    return learning_rate

def train(x_train, y_train, weights, bias, learning_rate, epochs, lambda_reg, batch_size):
    for epoch in range(epochs):
        learning_rate = adjust_learning_rate(learning_rate, epoch)
        indices = np.arange(x_train.shape[0])
        np.random.shuffle(indices)
        x_train_shuffled = x_train[indices]
        y_train_shuffled = y_train[indices]

        # Mini-batch training
        for i in range(0, x_train.shape[0], batch_size):
            x_batch = x_train_shuffled[i:i+batch_size]
            y_batch = y_train_shuffled[i:i+batch_size]

            # Forward pass
            output = linear_forward(x_batch, weights, bias)
            y_pred = softmax(output)

            # Loss
            loss = cross_entropy_loss(y_pred, y_batch, weights, lambda_reg)

            # Backward pass
            grad_weights, grad_bias = linear_backward(x_batch, y_batch, y_pred, weights, lambda_reg)

            # Update parameters
            weights, bias = update_parameters(weights, bias, grad_weights, grad_bias, learning_rate)

        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch + 1}, Loss: {loss}, Learning Rate: {learning_rate}')

    return weights, bias

def test(x_test, y_test, weights, bias):
    output = linear_forward(x_test, weights, bias)
    y_pred = softmax(output)

    predictions = np.argmax(y_pred, axis=1)

    accuracy = np.mean(predictions == y_test) * 100
    return accuracy

learning_rates = [0.01]
epochs_list = [30, 50, 100, 150]
lambda_regs = [0.01, 0.001, 0.0001]
batch_sizes = [32, 64]

best_accuracy = 0
best_hyperparameters = {}

for learning_rate in learning_rates:
    for epochs in epochs_list:
        for lambda_reg in lambda_regs:
            for batch_size in batch_sizes:
                weights, bias = initialize_parameters(input_size, num_classes)
                start_time = time.time()
                weights, bias = train(x_train, y_train, weights, bias, learning_rate, epochs, lambda_reg, batch_size)
                end_time = time.time()
                training_time = end_time - start_time
                accuracy = test(x_test, y_test, weights, bias)
                print(f'Learning Rate: {learning_rate}, Epochs: {epochs}, Lambda_reg: {lambda_reg}, Batch Size: {batch_size}, Accuracy: {accuracy}, Training Time: {training_time:.2f} seconds')
                if accuracy > best_accuracy:
                    best_accuracy = accuracy
                    best_hyperparameters = {'learning_rate': learning_rate, 'epochs': epochs, 'lambda_reg': lambda_reg, 'batch_size': batch_size}

print("Best Hyperparameters:")
print(best_hyperparameters)
print(f"Best Accuracy: {best_accuracy}%")

Epoch 10, Loss: 1.7540510673050587, Learning Rate: 0.001
Epoch 20, Loss: 1.7252810206296785, Learning Rate: 0.001
Epoch 30, Loss: 1.9970611885793192, Learning Rate: 0.001
Learning Rate: 0.01, Epochs: 30, Lambda_reg: 0.01, Batch Size: 32, Accuracy: 41.71, Training Time: 61.26 seconds
Epoch 10, Loss: 1.9638081578174036, Learning Rate: 0.001
Epoch 20, Loss: 1.7585166276499296, Learning Rate: 0.001
Epoch 30, Loss: 1.7734362593204658, Learning Rate: 0.001
Learning Rate: 0.01, Epochs: 30, Lambda_reg: 0.01, Batch Size: 64, Accuracy: 40.94, Training Time: 50.49 seconds
Epoch 10, Loss: 1.3420674412039557, Learning Rate: 0.001
Epoch 20, Loss: 1.3988755777678303, Learning Rate: 0.001
Epoch 30, Loss: 1.4086585752406091, Learning Rate: 0.001
Learning Rate: 0.01, Epochs: 30, Lambda_reg: 0.001, Batch Size: 32, Accuracy: 41.589999999999996, Training Time: 63.10 seconds
Epoch 10, Loss: 1.7866076011985725, Learning Rate: 0.001
Epoch 20, Loss: 2.1745909025715715, Learning Rate: 0.001
Epoch 30, Loss: 1.51