<a href="https://colab.research.google.com/github/kunshksingh/ML-ImageAnalysis/blob/main/mnist_neural_net.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pickle
import gzip
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.nn.init as init
from sklearn.model_selection import ParameterGrid

In [None]:
def load_data():
    """Return the MNIST data as a tuple containing the training data,
    the validation data, and the test data.
    The ``training_data`` is returned as a tuple with two entries.
    The first entry contains the actual training images.  This is a
    numpy ndarray with 50,000 entries.  Each entry is, in turn, a
    numpy ndarray with 784 values, representing the 28 * 28 = 784
    pixels in a single MNIST image.
    The second entry in the ``training_data`` tuple is a numpy ndarray
    containing 50,000 entries.  Those entries are just the digit
    values (0...9) for the corresponding images contained in the first
    entry of the tuple.
    The ``validation_data`` and ``test_data`` are similar, except
    each contains only 10,000 images.
    This is a nice data format, but for use in neural networks it's
    helpful to modify the format of the ``training_data`` a little.
    That's done in the wrapper function ``load_data_wrapper()``, see
    below.
    """
    f = gzip.open('mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = pickle.load(f, encoding="latin1")
    f.close()
    return (training_data, validation_data, test_data)

In [None]:
def load_data_wrapper():
    """Return a tuple containing ``(training_data, validation_data,
    test_data)``. Based on ``load_data``, but the format is more
    convenient for use in our implementation of neural networks.
    In particular, ``training_data`` is a list containing 50,000
    2-tuples ``(x, y)``.  ``x`` is a 784-dimensional numpy.ndarray
    containing the input image.  ``y`` is a 10-dimensional
    numpy.ndarray representing the unit vector corresponding to the
    correct digit for ``x``.
    ``validation_data`` and ``test_data`` are lists containing 10,000
    2-tuples ``(x, y)``.  In each case, ``x`` is a 784-dimensional
    numpy.ndarry containing the input image, and ``y`` is the
    corresponding classification, i.e., the digit values (integers)
    corresponding to ``x``.
    Obviously, this means we're using slightly different formats for
    the training data and the validation / test data.  These formats
    turn out to be the most convenient for use in our neural network
    code."""
    tr_d, va_d, te_d = load_data()
    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
    training_results = [vectorized_result(y) for y in tr_d[1]]
    training_data = zip(training_inputs, training_results)
    validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
    validation_data = zip(validation_inputs, va_d[1])
    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
    test_data = zip(test_inputs, te_d[1])
    return (training_data, validation_data, test_data)


In [None]:
def vectorized_result(j):
    """Return a 10-dimensional unit vector with a 1.0 in the jth
    position and zeroes elsewhere.  This is used to convert a digit
    (0...9) into a corresponding desired output from the neural
    network."""
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

In [None]:
train, dev, test = load_data_wrapper()
training_data = list(train)
validation_data = list(dev)
test_data = list(test)

In [None]:
print(training_data[0])

(array([[0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.        ],
       [0.  

In [None]:
class NeuralNetworkSoftmax(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(784, 128)  # First hidden layer
        self.fc2 = nn.Linear(128, 64)   # Second hidden layer
        self.fc3 = nn.Linear(64, 10)    # Output layer

    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))  # Sigmoid activation function
        x = torch.sigmoid(self.fc2(x))  # Sigmoid activation function
        x = self.fc3(x)
        x = F.softmax(x, dim=1)         # Softmax activation function
        return x


In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(784, 128)  # First hidden layer
        self.fc2 = nn.Linear(128, 64)   # Second hidden layer
        self.fc3 = nn.Linear(64, 10)    # Output layer

    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))  # Sigmoid activation function
        x = torch.sigmoid(self.fc2(x))  # Sigmoid activation function
        x = self.fc3(x)                 # The model works better without Softmax
        return x

In [None]:
class NeuralNetworkMini(nn.Module):
    def __init__(self):
        super(NeuralNetworkMini, self).__init__()
        self.fc1 = nn.Linear(784, 64)  # First hidden layer
        self.fc2 = nn.Linear(64, 32)   # Second hidden layer
        self.fc3 = nn.Linear(32, 10)    # Output layer

    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))  # Sigmoid activation function
        x = torch.sigmoid(self.fc2(x))  # Sigmoid activation function
        x = self.fc3(x)
        return x


In [None]:
class NeuralNetworkMax(nn.Module):
    def __init__(self):
        super(NeuralNetworkMax, self).__init__()
        self.fc1 = nn.Linear(784, 256)  # First hidden layer
        self.fc2 = nn.Linear(256, 128)   # Second hidden layer
        self.fc3 = nn.Linear(128, 10)    # Output layer

    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))  # Sigmoid activation function
        x = torch.sigmoid(self.fc2(x))  # Sigmoid activation function
        x = self.fc3(x)
        return x


In [None]:
class NeuralNetworkWeighted(nn.Module):
    def __init__(self):
        super(NeuralNetworkWeighted, self).__init__()
        self.fc1 = nn.Linear(784, 128)  # First hidden layer
        self.fc2 = nn.Linear(128, 64)   # Second hidden layer
        self.fc3 = nn.Linear(64, 10)    # Output layer
        init.kaiming_normal_(self.fc1.weight, mode='fan_in', nonlinearity='sigmoid')
        init.kaiming_normal_(self.fc2.weight, mode='fan_in', nonlinearity='sigmoid')
        init.xavier_normal_(self.fc3.weight)

    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))  # Sigmoid activation function
        x = torch.sigmoid(self.fc2(x))  # Sigmoid activation function
        x = self.fc3(x)
        return x


In [None]:
class NeuralNetworkBias(nn.Module):
    def __init__(self):
        super(NeuralNetworkBias, self).__init__()
        self.fc1 = nn.Linear(784, 128)  # First hidden layer
        self.fc2 = nn.Linear(128, 64)   # Second hidden layer
        self.fc3 = nn.Linear(64, 10)    # Output layer
        init.constant_(self.fc1.bias, 0.1)
        init.constant_(self.fc2.bias, 0.1)
        init.constant_(self.fc3.bias, 0.1)


    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))  # Sigmoid activation function
        x = torch.sigmoid(self.fc2(x))  # Sigmoid activation function
        x = self.fc3(x)
        return x


In [None]:
class NeuralNetworkBiasHalf(nn.Module):
    def __init__(self):
        super(NeuralNetworkBiasHalf, self).__init__()
        self.fc1 = nn.Linear(784, 128)  # First hidden layer
        self.fc2 = nn.Linear(128, 64)   # Second hidden layer
        self.fc3 = nn.Linear(64, 10)    # Output layer
        init.constant_(self.fc1.bias, 0.5)
        init.constant_(self.fc2.bias, 0.5)
        init.constant_(self.fc3.bias, 0.5)


    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))  # Sigmoid activation function
        x = torch.sigmoid(self.fc2(x))  # Sigmoid activation function
        x = self.fc3(x)
        return x


In [None]:
class NeuralNetworkThree(nn.Module):
    def __init__(self):
        super(NeuralNetworkThree, self).__init__()
        self.fc1 = nn.Linear(784, 256)  # First hidden layer
        self.fc2 = nn.Linear(256, 128)   # Second hidden layer
        self.fc3 = nn.Linear(128, 64)   # Third hidden layer
        self.fc4 = nn.Linear(64, 10)    # Output layer

    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))  # Sigmoid activation function
        x = torch.sigmoid(self.fc2(x))  # Sigmoid activation function
        x = torch.sigmoid(self.fc3(x))  # Sigmoid activation function
        x = self.fc4(x)
        return x

In [None]:
def create_batches(data, batch_size):
    for i in range(0, len(data), batch_size):
        batch = data[i:i + batch_size]
        images, labels = zip(*batch)
        images = np.array(images)
        labels = np.array([np.argmax(y) for y in labels])

        # Convert numpy arrays to tensors
        images_tensor = torch.tensor(images).view(-1, 784)
        labels_tensor = torch.tensor(labels, dtype=torch.long)

        yield images_tensor, labels_tensor

In [None]:
parameter_grid = {
    'batch_size': [1], # 1, 2, 4, 8, 16, 32, 64
    'epochs': [20], # 1, 2, 4, 5, 8, 10, 20
    'learning_rate': [0.01], # 0.1, 0.02, 0.01, 0.005, 0.001
}

In [None]:
def get_accuracy(model, type="v"):
  correct = 0
  total = 0
  if type == "t":
      all_data = test_data
  else:
      all_data = validation_data

  with torch.no_grad():
      for data, target in all_data:
          data = torch.tensor(data).view(-1, 784)
          target = torch.tensor(target, dtype=torch.long)
          outputs = model(data)
          _, predicted = torch.max(outputs.data, 1)
          total += 1
          correct += (predicted == target).sum().item()

  accuracy = 100 * correct / total
  # print(f"Accuracy: {accuracy}%")
  return accuracy

In [None]:
from typing import ParamSpecKwargs
def train_and_evaluate_model(params, model):
    # model = NeuralNetwork()
    optimizer = optim.SGD(model.parameters(), lr=params['learning_rate'])
    loss_function = nn.CrossEntropyLoss()

    for epoch in range(params['epochs']):
        for data, target in create_batches(training_data, params['batch_size']):
            optimizer.zero_grad()
            output = model(data)
            loss = loss_function(output, target)
            loss.backward()
            optimizer.step()
            pass

        model.eval()  # Set the model to evaluation mode
        accuracy = get_accuracy(model)
        print(f"Epoch {epoch + 1}, Training Loss: {loss.item()}, Validation Accuracy: {accuracy}")
        # with torch.no_grad():
        #     # Validate after each epoch
        #     validation_loss = 0
        #     for data, target in create_batches(validation_data, params['batch_size']):
        #         output = model(data)
        #         validation_loss += loss_function(output, target).item()
        #     validation_loss /= len(validation_data)
        #     print(f"Epoch {epoch + 1}, Training Loss: {loss.item()}, Validation Loss: {validation_loss}")
        ParamSpecKwargs
    # accuracy = get_accuracy(model)
    return accuracy, model


In [None]:
# Create a grid of parameters
grid = ParameterGrid(parameter_grid)

# Iterate over all combinations of parameters
print("----------Default----------")
best_score = None
best_params = None
best_model = None
for params in grid:
    model = NeuralNetwork()
    performance_metric, curr_model = train_and_evaluate_model(params, model)

    if best_score is None or performance_metric > best_score:
        best_score = performance_metric
        best_params = params
        best_model = curr_model

    print(f"Finished training with params {params}. Score: {performance_metric}")
test_accuracy = get_accuracy(best_model, "t")
print(f"Best params: {best_params}. Best Validation: {best_score}. Final Test Accuracy {test_accuracy}")


----------Default----------
Epoch 1, Training Loss: 0.2626171112060547, Validation Accuracy: 90.12
Epoch 2, Training Loss: 0.05944383889436722, Validation Accuracy: 93.5
Epoch 3, Training Loss: 0.01809985563158989, Validation Accuracy: 95.27
Epoch 4, Training Loss: 0.007947016507387161, Validation Accuracy: 96.0
Epoch 5, Training Loss: 0.004364726599305868, Validation Accuracy: 96.5
Epoch 6, Training Loss: 0.002749355509877205, Validation Accuracy: 96.71
Epoch 7, Training Loss: 0.0018368767341598868, Validation Accuracy: 96.88
Epoch 8, Training Loss: 0.0013763965107500553, Validation Accuracy: 96.98
Epoch 9, Training Loss: 0.0011747133685275912, Validation Accuracy: 97.15
Epoch 10, Training Loss: 0.0010706413304433227, Validation Accuracy: 97.16
Epoch 11, Training Loss: 0.0009589364635758102, Validation Accuracy: 97.24
Epoch 12, Training Loss: 0.0008156548719853163, Validation Accuracy: 97.33
Epoch 13, Training Loss: 0.0006692553870379925, Validation Accuracy: 97.45
Epoch 14, Training 

In [None]:
# With Softmax
grid = ParameterGrid(parameter_grid)

# Iterate over all combinations of parameters
print("----------Default----------")
best_score = None
best_params = None
best_model = None
for params in grid:
    model = NeuralNetworkSoftmax()
    performance_metric, curr_model = train_and_evaluate_model(params, model)

    if best_score is None or performance_metric > best_score:
        best_score = performance_metric
        best_params = params
        best_model = curr_model

    print(f"Finished training with params {params}. Score: {performance_metric}")
test_accuracy = get_accuracy(best_model, "t")
print(f"Best params: {best_params}. Best Validation: {best_score}. Final Test Accuracy {test_accuracy}")


----------Default----------
Epoch 1, Training Loss: 2.383850336074829, Validation Accuracy: 35.51
Epoch 2, Training Loss: 2.4277496337890625, Validation Accuracy: 63.75
Epoch 3, Training Loss: 2.4139959812164307, Validation Accuracy: 66.57
Epoch 4, Training Loss: 1.6468204259872437, Validation Accuracy: 74.91
Epoch 5, Training Loss: 1.4945931434631348, Validation Accuracy: 75.91
Epoch 6, Training Loss: 1.470283031463623, Validation Accuracy: 76.43
Epoch 7, Training Loss: 1.4653940200805664, Validation Accuracy: 76.75
Epoch 8, Training Loss: 1.46372652053833, Validation Accuracy: 76.97
Epoch 9, Training Loss: 1.4628885984420776, Validation Accuracy: 77.18
Epoch 10, Training Loss: 1.4624370336532593, Validation Accuracy: 77.38
Epoch 11, Training Loss: 1.4621838331222534, Validation Accuracy: 77.54
Epoch 12, Training Loss: 1.4620383977890015, Validation Accuracy: 77.59
Epoch 13, Training Loss: 1.461952567100525, Validation Accuracy: 77.6
Epoch 14, Training Loss: 1.4618773460388184, Valida

In [None]:
parameter2 = {
    'batch_size': [1], # 1, 2, 4, 8, 16, 32, 64
    'epochs': [20], # 1, 2, 4, 5, 8, 10, 20
    'learning_rate': [0.01], # 0.1, 0.02, 0.01, 0.005, 0.001
}

In [None]:
# Other Models
grid = ParameterGrid(parameter2)

print("----------Mini----------")
best_score = None
best_params = None
best_model = None
for params in grid:
    model = NeuralNetworkMini()
    performance_metric, curr_model = train_and_evaluate_model(params, model)

    if best_score is None or performance_metric > best_score:
        best_score = performance_metric
        best_params = params
        best_model = curr_model

    print(f"Finished training with params {params}. Score: {performance_metric}")
test_accuracy = get_accuracy(best_model, "t")
print(f"Best params: {best_params}. Best Validation: {best_score}. Final Test Accuracy {test_accuracy}")

print("----------Max----------")
best_score = None
best_params = None
best_model = None
for params in grid:
    model = NeuralNetworkMax()
    performance_metric, curr_model = train_and_evaluate_model(params, model)

    if best_score is None or performance_metric > best_score:
        best_score = performance_metric
        best_params = params
        best_model = curr_model

    print(f"Finished training with params {params}. Score: {performance_metric}")
test_accuracy = get_accuracy(best_model, "t")
print(f"Best params: {best_params}. Best Validation: {best_score}. Final Test Accuracy {test_accuracy}")

----------Mini----------
Epoch 1, Training Loss: 0.2504449784755707, Validation Accuracy: 90.54
Epoch 2, Training Loss: 0.04383499175310135, Validation Accuracy: 93.73
Epoch 3, Training Loss: 0.01685473322868347, Validation Accuracy: 95.36
Epoch 4, Training Loss: 0.010993153788149357, Validation Accuracy: 96.08
Epoch 5, Training Loss: 0.008199954405426979, Validation Accuracy: 96.51
Epoch 6, Training Loss: 0.006256043910980225, Validation Accuracy: 96.71
Epoch 7, Training Loss: 0.0047610728070139885, Validation Accuracy: 96.73
Epoch 8, Training Loss: 0.0037365397438406944, Validation Accuracy: 96.82
Epoch 9, Training Loss: 0.0030077716801315546, Validation Accuracy: 96.82
Epoch 10, Training Loss: 0.002476722002029419, Validation Accuracy: 96.91
Epoch 11, Training Loss: 0.002099335426464677, Validation Accuracy: 97.03
Epoch 12, Training Loss: 0.0017851145239546895, Validation Accuracy: 97.1
Epoch 13, Training Loss: 0.0015034097013995051, Validation Accuracy: 97.25
Epoch 14, Training Los

NameError: ignored

In [None]:
print("----------Weighted----------")
best_score = None
best_params = None
best_model = None
for params in grid:
    model = NeuralNetworkWeighted()
    performance_metric, curr_model = train_and_evaluate_model(params, model)

    if best_score is None or performance_metric > best_score:
        best_score = performance_metric
        best_params = params
        best_model = curr_model

    print(f"Finished training with params {params}. Score: {performance_metric}")
test_accuracy = get_accuracy(best_model, "t")
print(f"Best params: {best_params}. Best Validation: {best_score}. Final Test Accuracy {test_accuracy}")

print("----------Bias----------")
best_score = None
best_params = None
best_model = None
for params in grid:
    model = NeuralNetworkBias()
    performance_metric, curr_model = train_and_evaluate_model(params, model)

    if best_score is None or performance_metric > best_score:
        best_score = performance_metric
        best_params = params
        best_model = curr_model

    print(f"Finished training with params {params}. Score: {performance_metric}")
test_accuracy = get_accuracy(best_model, "t")
print(f"Best params: {best_params}. Best Validation: {best_score}. Final Test Accuracy {test_accuracy}")

----------Weighted----------
Epoch 1, Training Loss: 0.1485009491443634, Validation Accuracy: 91.81
Epoch 2, Training Loss: 0.038472242653369904, Validation Accuracy: 94.05
Epoch 3, Training Loss: 0.0128360940143466, Validation Accuracy: 95.3
Epoch 4, Training Loss: 0.005533022340387106, Validation Accuracy: 96.0
Epoch 5, Training Loss: 0.003062085248529911, Validation Accuracy: 96.45
Epoch 6, Training Loss: 0.001809746609069407, Validation Accuracy: 96.75
Epoch 7, Training Loss: 0.0011564955348148942, Validation Accuracy: 96.99
Epoch 8, Training Loss: 0.0008422164828516543, Validation Accuracy: 97.07
Epoch 9, Training Loss: 0.0006858142442069948, Validation Accuracy: 97.17
Epoch 10, Training Loss: 0.0005812147865071893, Validation Accuracy: 97.2
Epoch 11, Training Loss: 0.00048744716332294047, Validation Accuracy: 97.3
Epoch 12, Training Loss: 0.0004032037395518273, Validation Accuracy: 97.31
Epoch 13, Training Loss: 0.00033158526639454067, Validation Accuracy: 97.33
Epoch 14, Trainin

In [None]:
print("----------BiasHalf----------")
best_score = None
best_params = None
best_model = None
for params in grid:
    model = NeuralNetworkBiasHalf()
    performance_metric, curr_model = train_and_evaluate_model(params, model)

    if best_score is None or performance_metric > best_score:
        best_score = performance_metric
        best_params = params
        best_model = curr_model

    print(f"Finished training with params {params}. Score: {performance_metric}")
test_accuracy = get_accuracy(best_model, "t")
print(f"Best params: {best_params}. Best Validation: {best_score}. Final Test Accuracy {test_accuracy}")

----------BiasHalf----------
Epoch 1, Training Loss: 0.2580236792564392, Validation Accuracy: 90.06
Epoch 2, Training Loss: 0.056776389479637146, Validation Accuracy: 93.45
Epoch 3, Training Loss: 0.017757480964064598, Validation Accuracy: 95.39
Epoch 4, Training Loss: 0.008135632611811161, Validation Accuracy: 96.18
Epoch 5, Training Loss: 0.004203413613140583, Validation Accuracy: 96.54
Epoch 6, Training Loss: 0.002356254495680332, Validation Accuracy: 96.82
Epoch 7, Training Loss: 0.0013949673157185316, Validation Accuracy: 96.95
Epoch 8, Training Loss: 0.0008721124031580985, Validation Accuracy: 97.04
Epoch 9, Training Loss: 0.0005750194541178644, Validation Accuracy: 97.08
Epoch 10, Training Loss: 0.00040344204171560705, Validation Accuracy: 97.23
Epoch 11, Training Loss: 0.0002992897352669388, Validation Accuracy: 97.28
Epoch 12, Training Loss: 0.00023183519078884274, Validation Accuracy: 97.34
Epoch 13, Training Loss: 0.00018416139937471598, Validation Accuracy: 97.39
Epoch 14, 

In [None]:
parameter2 = {
    'batch_size': [1], # 1, 2, 4, 8, 16, 32, 64
    'epochs': [40], # 1, 2, 4, 5, 8, 10, 20
    'learning_rate': [0.01], # 0.1, 0.02, 0.01, 0.005, 0.001
}
grid = ParameterGrid(parameter2)

print("----------DefaultLong----------")
best_score = None
best_params = None
best_model = None
for params in grid:
    model = NeuralNetwork()
    performance_metric, curr_model = train_and_evaluate_model(params, model)

    if best_score is None or performance_metric > best_score:
        best_score = performance_metric
        best_params = params
        best_model = curr_model

    print(f"Finished training with params {params}. Score: {performance_metric}")
test_accuracy = get_accuracy(best_model, "t")
print(f"Best params: {best_params}. Best Validation: {best_score}. Final Test Accuracy {test_accuracy}")

----------DefaultLong----------
Epoch 1, Training Loss: 0.37175217270851135, Validation Accuracy: 90.1
Epoch 2, Training Loss: 0.06012723967432976, Validation Accuracy: 93.59
Epoch 3, Training Loss: 0.015364030376076698, Validation Accuracy: 95.32
Epoch 4, Training Loss: 0.006601667497307062, Validation Accuracy: 96.06
Epoch 5, Training Loss: 0.0038043521344661713, Validation Accuracy: 96.55
Epoch 6, Training Loss: 0.0027267676778137684, Validation Accuracy: 96.74
Epoch 7, Training Loss: 0.0021821276750415564, Validation Accuracy: 96.9
Epoch 8, Training Loss: 0.001782139646820724, Validation Accuracy: 97.01
Epoch 9, Training Loss: 0.0014360364293679595, Validation Accuracy: 97.16
Epoch 10, Training Loss: 0.0011397063499316573, Validation Accuracy: 97.33
Epoch 11, Training Loss: 0.0008917645900510252, Validation Accuracy: 97.4
Epoch 12, Training Loss: 0.000685576000250876, Validation Accuracy: 97.48
Epoch 13, Training Loss: 0.00052426423644647, Validation Accuracy: 97.49
Epoch 14, Train

In [None]:
parameter2 = {
    'batch_size': [1], # 1, 2, 4, 8, 16, 32, 64
    'epochs': [50], # 1, 2, 4, 5, 8, 10, 20
    'learning_rate': [0.01], # 0.1, 0.02, 0.01, 0.005, 0.001
}
grid = ParameterGrid(parameter2)

print("----------Three----------")
best_score = None
best_params = None
best_model = None
for params in grid:
    model = NeuralNetworkThree()
    performance_metric, curr_model = train_and_evaluate_model(params, model)

    if best_score is None or performance_metric > best_score:
        best_score = performance_metric
        best_params = params
        best_model = curr_model

    print(f"Finished training with params {params}. Score: {performance_metric}")
test_accuracy = get_accuracy(best_model, "t")
print(f"Best params: {best_params}. Best Validation: {best_score}. Final Test Accuracy {test_accuracy}")

----------Three----------
Epoch 1, Training Loss: 2.453505039215088, Validation Accuracy: 20.03
Epoch 2, Training Loss: 0.5540778636932373, Validation Accuracy: 84.86
Epoch 3, Training Loss: 0.135878324508667, Validation Accuracy: 90.05
Epoch 4, Training Loss: 0.04672578349709511, Validation Accuracy: 93.34
Epoch 5, Training Loss: 0.02098885551095009, Validation Accuracy: 95.15
Epoch 6, Training Loss: 0.010926534421741962, Validation Accuracy: 95.81
Epoch 7, Training Loss: 0.006259716581553221, Validation Accuracy: 96.32
Epoch 8, Training Loss: 0.003958367742598057, Validation Accuracy: 96.45
Epoch 9, Training Loss: 0.0025081150233745575, Validation Accuracy: 96.58


KeyboardInterrupt: ignored