##Goals
- Create a feedforward neural network for classifying CIFAR10 dataset
- Various activation functions and their pros/cons
- Use tensor board for visualiazing

In [216]:
# imports and setup
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import logging
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split

# Set up logging
logging.basicConfig(level=logging.INFO)
writer = SummaryWriter('runs/cifar10_experiment_4')

# Set random seed for pytorch and numpy
# numpy seed takes care of numpy and scipy
torch.manual_seed(42)
np.random.seed(42)

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logging.info(f"Using {device} device")

# classes from CIFAR10
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

INFO:root:Using cpu device


In [217]:
# Data loading and preprocessing
def imshow(ax, img):
    npimg = img.numpy()
    # normalize the image from 0 to 1, cifar10 has it from -1 to 1
    npimg = (npimg+1)/2
    # imshow expects color channel to be the third dimension
    # and it expects the RGB values to be between 
    ax.imshow(np.transpose(npimg, (1, 2, 0)))

def visualize_training_examples(train_dataset, classes, num_images=6):
    # Select num_images random indices
    indices = np.random.choice(len(train_dataset), size=num_images, replace=False)

    # Show images and labels
    # figure size should be dependent on 
    plt.figure(figsize=(8, (num_images//2)*4))
    for i, idx in enumerate(indices):
        ax = plt.subplot(num_images//2, 2, i + 1)
        image, label = train_dataset[idx]
        imshow(ax, image)
        ax.set_title(classes[label])

    plt.tight_layout()
    plt.show()


def load_data(dataset_loc='./data',batch_size=64):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))  # Adjust these values for normalization
    ])

    # Dataset class stores features and target
    # DataLoader builds an iterator on top of Dataset class

    # Load CIFAR10 dataset
    train_dataset = datasets.CIFAR10(root=dataset_loc, train=True, download=True, transform=transform)
    test_dataset = datasets.CIFAR10(root=dataset_loc, train=False, download=True, transform=transform)

    # Use stratified sampling to split the train dataset into train and validation
    train_dataset, validation_dataset = train_test_split(train_dataset, test_size=0.2, random_state=42, stratify=train_dataset.targets)

    # Log some info about the dataset type and size
    logging.info(f"Train dataset size: {len(train_dataset)}")
    logging.info(f"Test dataset size: {len(test_dataset)}")

    # visualize_training_examples(train_dataset, classes, num_images=20)

    # Data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, validation_loader, test_loader

In [218]:
# Define the model
class FullyConnectedNN(nn.Module):
    def __init__(self):
        super(FullyConnectedNN, self).__init__()
        # nn.Linear autograd is already included
        self.fc1 = nn.Linear(3 * 32 * 32, 500)  # CIFAR10 images are 32x32x3
        self.fc2 = nn.Linear(500, 10)  # 10 classes in CIFAR10

    def forward(self, x):
        x = x.view(-1, 3 * 32 * 32)  # Flatten the images
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [219]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    running_loss = 0.0
    running_accuracy = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        # move data to device for every iteration
        # gpu memory is limited, so we can't move all data at once
        data, target = data.to(device), target.to(device)
        # zero the gradients, otherwise they will accumulate
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        accuracy = (output.argmax(dim=1) == target).float().mean()
        # compute gradients
        loss.backward()
        # update weights
        optimizer.step()

        running_loss += loss.item()
        running_accuracy += accuracy.item()
        if (batch_idx+1) % 100 == 0: # +1 so that we don't print for 0th batch
            # Print running loss and running accuracy every 100 batches, also print fraction of epoch completed
            logging.info(f'Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {running_loss/100:.6f}\tAccuracy: {running_accuracy/100:.6f}')
            # logging.info(f'Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {running_loss:.6f}')
            # Log running loss to tensorboard
            writer.add_scalar('training loss', running_loss / 100, epoch * len(train_loader) + batch_idx)
            # Log accuracy to tensorboard
            writer.add_scalar('training accuracy', running_accuracy / 100, epoch * len(train_loader) + batch_idx)
            # writer.add_scalar('training accuracy', accuracy, epoch * len(train_loader) + batch_idx)
            running_loss = 0.0
            running_accuracy = 0.0

def test(model, device, test_loader, epoch=None, validation=False):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            # reduction='sum' means that we will get the sum of the loss instead of the mean
            # item() gives the scalar value of the loss
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            # get the index of the max log-probability
            # dim=1 means that we will get the max value for each row
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()

    test_loss /= len(test_loader.dataset)

    if validation:
        logging.info(f'Validation set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)')
        writer.add_scalar('validation loss', test_loss, epoch)
    else:
        # log test loss and accuracy to tensorboard
        logging.info(f'Test set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)')
        writer.add_scalar('test loss', test_loss)

In [220]:
# Main execution
batch_size = 64
dataset_loc = '/Users/saip/My Drive/machine-learning-fundamentals/datasets'
train_loader, validation_loader, test_loader = load_data(dataset_loc, batch_size)
model = FullyConnectedNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01)

for epoch in range(0,50):  # 10 epochs
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, validation_loader, epoch, validation=True)

test(model, device, test_loader)
writer.close()


Files already downloaded and verified
Files already downloaded and verified


INFO:root:Train dataset size: 40000
INFO:root:Test dataset size: 10000
INFO:root:Validation set: Average loss: 1.5979, Accuracy: 4486/10000 (45%)
INFO:root:Validation set: Average loss: 1.5298, Accuracy: 4786/10000 (48%)
INFO:root:Validation set: Average loss: 1.4821, Accuracy: 4918/10000 (49%)
INFO:root:Validation set: Average loss: 1.5020, Accuracy: 4993/10000 (50%)
INFO:root:Validation set: Average loss: 1.4571, Accuracy: 5100/10000 (51%)
INFO:root:Validation set: Average loss: 1.5259, Accuracy: 5076/10000 (51%)
INFO:root:Validation set: Average loss: 1.5149, Accuracy: 5190/10000 (52%)
INFO:root:Validation set: Average loss: 1.5539, Accuracy: 5149/10000 (51%)
INFO:root:Validation set: Average loss: 1.6266, Accuracy: 5088/10000 (51%)
INFO:root:Validation set: Average loss: 1.6200, Accuracy: 5168/10000 (52%)
INFO:root:Validation set: Average loss: 1.6277, Accuracy: 5157/10000 (52%)
INFO:root:Validation set: Average loss: 1.7994, Accuracy: 5147/10000 (51%)
INFO:root:Validation set: Ave

v1 - 2 layers

- Training accuracy for the model kept increasing till 90% but validation accuracy is just 50%. The state of the art stands at 99.5%
- Similar trend with loss
- The model is trying to learn the noise to decrease the training loss. 
- Does this mean the model is complex enough since the training accuracy reached 90%?