# Programming Assignment 1

Author: Mao Nishino

## Data Preparation

In [13]:
import torch
import matplotlib.pyplot as plt

# Get the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

def load_data(path: str):
    """ Load the data and make a list of (index, data tensor).
    Args:   
        path: the path of the file to read  the data from.
    Returns:
        data_list: a list of tuples, each tuple contains the index of the data and the data tensor.
    """
    with open(path) as file:
        lines = file.readlines()

    float_lines = [[float(string) for string in line.split()] for line in lines]
    data_list = [(torch.tensor(line[1:]).reshape(16,16), int(line[0])) for line in float_lines]
    return data_list

train_list = load_data('zip_train.txt')
test_list = load_data('zip_test.txt')

print(f"Loaded {len(train_list)} data for train data")
print(f"Loaded {len(test_list)} data for train data")

# Define PyTorch Dataset
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, data_list):
        self.data_list = data_list

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        return self.data_list[idx]

# Create PyTorch Dataset
train_dataset = MyDataset(train_list)
test_dataset = MyDataset(test_list)

# Split the train dataset into train and validation
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

Using cuda device
Loaded 7291 data for train data
Loaded 2007 data for train data


In [68]:
import random

# Define a 4 layer ReLU neural network

class NeuralNetwork(torch.nn.Module):
    def __init__(self, n1, n2, n3, activations):
        """ Define the layers of the neural network.
        
        Args:
            n1: the number of neurons in the first layer.
            n2: the number of neurons in the second layer.
            n3: the number of neurons in the third layer.
            activations: a list of strings, each string is the name of the activation function to use in the corresponding layer.
            """
        
        super(NeuralNetwork, self).__init__()
        self.flatten = torch.nn.Flatten()
        self.activation_functions = {
            "relu": torch.nn.ReLU(),
            "sigmoid": torch.nn.Sigmoid()
        }
        self.layer1 = torch.nn.Linear(256, n1)
        self.layer2 = torch.nn.Linear(n1, n2)
        self.layer3 = torch.nn.Linear(n2, n3)
        self.layer4 = torch.nn.Linear(n3, 10)

        self.activations = torch.nn.ModuleList([self.activation_functions[activation] for activation in activations])


    def forward(self, x):
        x = self.flatten(x)
        x = self.layer1(x)
        x = self.activations[0](x)
        x = self.layer2(x)
        x = self.activations[1](x)
        x = self.layer3(x)
        x = self.activations[2](x)
        x = self.layer4(x)
        x = self.activations[3](x)
        return x

# Define the loss function and optimizer
loss_fn = torch.nn.CrossEntropyLoss()

# Train the model - This corresponds to 1 epoch
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Test the model
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= size
    correct /= size
    return correct

def random_search_4layerReLU():
    """ Random search for 4 layer ReLU neural network hyperparameters.

    We will employ the "coarse to fine" strategy on the number of neurons in each layer.
    At the beginning, each layer will have a range of 10 to 2024 neurons. 
    Then, we will reduce the range to 10 to the best number of neurons found in the previous iteration so that we can find the best number of neurons in a smaller range.
    Moreover, we will double the number of samples, +10 the epochs, and half the batch size in each iteration to get a more accurate result.
    
    """
    best_accuracy = 0
    best_n = [1024, 1024, 1024]
    new_best_n = best_n
    samples = 200
    epochs = 5
    batch_size = 32

    for _ in range(3):
        # Create dataloaders
        train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
        for _ in range(samples):
            n3 = torch.randint(10, max(best_n[2],11), (1,)).item()
            n2 = torch.randint(n3, max(best_n[1],11), (1,)).item()
            n1 = torch.randint(n2, max(best_n[0],11), (1,)).item()

            activation_options = ['relu'] 
            activations = [random.choice(activation_options) for _ in range(4)]

            model = NeuralNetwork(n1, n2, n3, activations).to(device)
            optimizer = torch.optim.SGD(model.parameters(), lr=1e-2) # Fixed learning rate for simplicity

            for epoch in range(epochs):
                train(train_dataloader, model, loss_fn, optimizer)
            
            accuracy = test(val_dataloader, model, loss_fn)
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                new_best_n = [n1, n2, n3]
                print(f"Best (n1, n2, n3): {new_best_n}, Best Activations: {activations}, Accuracy: {(100*best_accuracy):>0.1f}%")
                print(f"Epochs: {epochs}, Batch size: {batch_size}")
        best_n = new_best_n

        samples //= 2
        epochs += 10
        batch_size //= 2
        print(f"Now the best n is {best_n} and the number of samples is {samples} and the number of epochs is {epochs} and the batch size is {batch_size}")

random_search_4layerReLU()

Best (n1, n2, n3): [982, 794, 564], Best Activations: ['relu', 'relu', 'relu', 'relu'], Accuracy: 53.2%
Epochs: 5, Batch size: 32
Best (n1, n2, n3): [430, 326, 259], Best Activations: ['relu', 'relu', 'relu', 'relu'], Accuracy: 61.1%
Epochs: 5, Batch size: 32
Best (n1, n2, n3): [1010, 945, 786], Best Activations: ['relu', 'relu', 'relu', 'relu'], Accuracy: 83.8%
Epochs: 5, Batch size: 32
Best (n1, n2, n3): [1017, 779, 760], Best Activations: ['relu', 'relu', 'relu', 'relu'], Accuracy: 89.4%
Epochs: 5, Batch size: 32
Best (n1, n2, n3): [1000, 994, 895], Best Activations: ['relu', 'relu', 'relu', 'relu'], Accuracy: 90.5%
Epochs: 5, Batch size: 32
Best (n1, n2, n3): [1014, 922, 374], Best Activations: ['relu', 'relu', 'relu', 'relu'], Accuracy: 91.0%
Epochs: 5, Batch size: 32
Best (n1, n2, n3): [1015, 1013, 1007], Best Activations: ['relu', 'relu', 'relu', 'relu'], Accuracy: 91.2%
Epochs: 5, Batch size: 32
Now the best n is [1015, 1013, 1007] and the number of samples is 100 and the numbe

KeyboardInterrupt: 

# Locally Connected NN Architecture

In [None]:
class LocallyConnected2D(torch.nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super(LocallyConnected2D, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.weights = torch.nn.Parameter(torch.randn(out_channels, in_channels, kernel_size, kernel_size))
        self.bias = torch.nn.Parameter(torch.randn(out_channels))
    
    def forward(self, x):
        x = torch.nn.functional.unfold(x, self.kernel_size, padding=self.padding, stride=self.stride)
        x = x.permute(0, 2, 1).unsqueeze(2)
        x = torch.matmul(x, self.weights.view(self.weights.size(0), -1).t()).squeeze(2).permute(0, 2, 1)
        x = x + self.bias
        return x


# Convolutional NN Architecture

In [None]:
class ConvNet(torch.nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = torch.nn.Linear(64*4*4, 128)
        self.fc2 = torch.nn.Linear(128, 10)

    def forward(self, x):
        x = torch.nn.functional.relu(self.conv1(x))
        x = torch.nn.functional.max_pool2d(x, kernel_size=2, stride=2)
        x = torch.nn.functional.relu(self.conv2(x))
        x = torch.nn.functional.max_pool2d(x, kernel_size=2, stride=2)
        x = x.view(-1, 64*4*4)
        x = torch.nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x