##Goals
- Create a convolutional network for classifying CIFAR10 dataset
- Play with model architecture to achieve maximum accuracy
- Optimize hyperparameters to achieve maximum accuracy
- Use tensor board for visualiazing

In [1]:
# imports and setup
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import logging
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split
import time

# Create a custom logger so that we don't affect the root logger
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
# stop propagating to the root logger to avoid duplicate logs
logger.propagate = False

# Create stdout handler, we can create another handler to write to a file if needed
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter)

# Create a file handler
# use current time to create a unique log file name

# Create a name for the experiment based on the current time and date
exp_name = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
fh = logging.FileHandler(f'logs/{__name__}_{exp_name}.log')
fh.setLevel(logging.INFO)
fh.setFormatter(formatter)

# add the handlers to the logger
logger.addHandler(ch)
logger.addHandler(fh)

# create a tensorboard summary writer
writer = SummaryWriter('tensorboard_logs/' + exp_name)

# Set random seed for pytorch and numpy
# numpy seed takes care of numpy and scipy
torch.manual_seed(42)
np.random.seed(42)

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info(f"Using {device} device")

# classes from CIFAR10
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

2023-12-30 15:49:19,830 - __main__ - INFO - Using cpu device


In [2]:
# use a dictionary to store the hyperparameters
# this way we can easily pass them to the model and optimizer

hyperparams = {
    'batch_size': 64,
    'num_epochs': 20,
    'learning_rate': 0.001,
    #'momentum': 0.9, # momentum is used to avoid local minima by taking into account the previous gradients
    #'weight_decay': 0.0001
}

In [3]:
# Data loading and preprocessing
def imshow(ax, img):
    npimg = img.numpy()
    # normalize the image from 0 to 1, cifar10 has it from -1 to 1
    npimg = (npimg+1)/2
    # imshow expects color channel to be the third dimension
    # and it expects the RGB values to be between
    ax.imshow(np.transpose(npimg, (1, 2, 0)))

def visualize_training_examples(train_dataset, classes, num_images=6):
    # Select num_images random indices
    indices = np.random.choice(len(train_dataset), size=num_images, replace=False)

    # Show images and labels
    # figure size should be dependent on
    plt.figure(figsize=(8, (num_images//2)*4))
    for i, idx in enumerate(indices):
        ax = plt.subplot(num_images//2, 2, i + 1)
        image, label = train_dataset[idx]
        imshow(ax, image)
        ax.set_title(classes[label])

    plt.tight_layout()
    plt.show()


def load_data(dataset_loc='./data',batch_size=64):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))  # Adjust these values for normalization
    ])

    # Dataset class stores features and target
    # DataLoader builds an iterator on top of Dataset class

    # Load CIFAR10 dataset
    train_dataset = datasets.CIFAR10(root=dataset_loc, train=True, download=True, transform=transform)
    test_dataset = datasets.CIFAR10(root=dataset_loc, train=False, download=True, transform=transform)

    # Use stratified sampling to split the train dataset into train and validation
    train_dataset, validation_dataset = train_test_split(train_dataset, test_size=0.2, random_state=42, stratify=train_dataset.targets)

    # Log some info about the dataset type and size
    logger.info(f"Train dataset size: {len(train_dataset)}")
    logger.info(f"Test dataset size: {len(test_dataset)}")

    # visualize_training_examples(train_dataset, classes, num_images=20)

    # Data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, validation_loader, test_loader

In [4]:
class ConvolutionalNN(nn.Module):
    def __init__(self):
        super(ConvolutionalNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.fc1 = nn.Linear(128 * 32 * 32, 10)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = self.fc1(x)
        return x

In [5]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    running_loss = 0.0
    running_accuracy = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        # move data to device for every iteration
        # gpu memory is limited, so we can't move all data at once
        data, target = data.to(device), target.to(device)
        # zero the gradients, otherwise they will accumulate
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        accuracy = (output.argmax(dim=1) == target).float().mean()
        # compute gradients
        loss.backward()
        # update weights
        optimizer.step()

        running_loss += loss.item()
        running_accuracy += accuracy.item()
        if (batch_idx+1) % 100 == 0: # +1 so that we don't print for 0th batch
            # Print running loss and running accuracy every 100 batches, also print fraction of epoch completed
            logger.info(f'Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {running_loss/100:.6f}\tAccuracy: {running_accuracy/100:.6f}')
            # logger.info(f'Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {running_loss:.6f}')
            # Log running loss to tensorboard
            writer.add_scalar('training loss', running_loss / 100, epoch * len(train_loader) + batch_idx)
            # Log accuracy to tensorboard
            writer.add_scalar('training accuracy', running_accuracy / 100, epoch * len(train_loader) + batch_idx)
            # writer.add_scalar('training accuracy', accuracy, epoch * len(train_loader) + batch_idx)
            running_loss = 0.0
            running_accuracy = 0.0

def test(model, device, test_loader, epoch=None, validation=False):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            # reduction='sum' means that we will get the sum of the loss instead of the mean
            # item() gives the scalar value of the loss
            test_loss += F.cross_entropy(output, target, reduction='sum').item()
            # get the index of the max log-probability
            # dim=1 means that we will get the max value for each row
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()

    test_loss /= len(test_loader.dataset)

    if validation:
        logger.info(f'Validation set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)')
        writer.add_scalar('validation loss', test_loss, epoch)
        writer.add_scalar('validation accuracy', correct / len(test_loader.dataset), epoch)
    else:
        # log test loss and accuracy to tensorboard
        logger.info(f'Test set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)')
        writer.add_scalar('test loss', test_loss)
        writer.add_scalar('test accuracy', correct / len(test_loader.dataset))

In [6]:
# Main execution
dataset_loc = '/Users/saip/My Drive/machine-learning-fundamentals/datasets'
train_loader, validation_loader, test_loader = load_data(dataset_loc, hyperparams['batch_size'])
model = ConvolutionalNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=hyperparams['learning_rate'])
# print the model summary along with the number of trainable parameters
logger.info("Model summary: \n" + str(model))
# print the number of trainable parameters
logger.info(f"Number of trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")

# print the hyperparameters
logger.info(f"Hyperparameters: {hyperparams}")

for epoch in range(0, hyperparams['num_epochs']):  # 20 epochs
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, validation_loader, epoch, validation=True)

test(model, device, test_loader)
writer.close()


Files already downloaded and verified
Files already downloaded and verified


2023-12-30 15:49:23,957 - __main__ - INFO - Train dataset size: 40000
2023-12-30 15:49:23,958 - __main__ - INFO - Test dataset size: 10000
2023-12-30 15:49:23,969 - __main__ - INFO - Model summary: 
ConvolutionalNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=131072, out_features=10, bias=True)
)
2023-12-30 15:49:23,969 - __main__ - INFO - Number of trainable parameters: 1404426
2023-12-30 15:49:23,969 - __main__ - INFO - Hyperparameters: {'batch_size': 64, 'num_epochs': 20, 'learning_rate': 0.001}


KeyboardInterrupt: 

Achieved a validaiton accuracy of 68%. 