<a href="https://colab.research.google.com/github/mbcruz96/ConvNet/blob/main/ConvNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Architecture

In [None]:
import time
import torch
import torch.nn as nn
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self, mode):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 40, 5)
        self.conv2 = nn.Conv2d(40, 40, 5)
        self.fcl1 = nn.Linear(1 * 28 * 28, 100)
        self.fcl2 = nn.Linear(40 * 4 * 4, 100)
        self.fcl3 = nn.Linear(100, 100)
        self.fcl4 = nn.Linear(40 * 4 * 4, 1000)
        self.fcl5 = nn.Linear(1000, 1000)
        # Define various layers here, such as in the tutorial example
        # self.conv1 = nn.Conv2D(...)

        # This will select the forward pass function based on mode for the ConvNet.
        # Based on the question, you have 5 modes available for step 1 to 5.
        # During creation of each ConvNet model, you will assign one of the valid mode.
        # This will fix the forward function (and the network graph) for the entire training/testing
        if mode == 1:
            self.forward = self.model_1
        elif mode == 2:
            self.forward = self.model_2
        elif mode == 3:
            self.forward = self.model_3
        elif mode == 4:
            self.forward = self.model_4
        elif mode == 5:
            self.forward = self.model_5
        else:
            print("Invalid mode ", mode, "selected. Select between 1-5")
            exit(0)



    # flattening function
    def num_flat_features(self, x):
      '''
      ========================================
      calculates the flattened vector for input into the fully connected layers
      '''
      size = x.size()[1:]  # all dimensions except the batch dimension
      num_features = 1
      for s in size:
          num_features *= s
      return num_features


    # Baseline model. step 1
    def model_1(self, X):
        # ======================================================================
        # One fully connected layer.
        #
        fcl = X.view(-1, self.num_flat_features(X))
        fcl = F.sigmoid(self.fcl1(fcl))
        return  fcl


    # Use two convolutional layers.
    def model_2(self, X):
        # ======================================================================
        # Two convolutional layers + one fully connnected layer.
        fcl = F.max_pool2d(F.relu(self.conv1(X)), 2)
        fcl = F.max_pool2d(F.relu(self.conv2(fcl)), 2)

        fcl = fcl.view(-1, self.num_flat_features(fcl))
        fcl = F.sigmoid(self.fcl2(fcl))

        return  fcl

    # Replace sigmoid with ReLU.
    def model_3(self, X):
        # ======================================================================
        # Two convolutional layers + one fully connected layer, with ReLU.
        fcl = F.max_pool2d(F.relu(self.conv1(X)), 2)
        fcl = F.max_pool2d(F.relu(self.conv2(fcl)), 2)

        fcl = fcl.view(-1, self.num_flat_features(fcl))
        fcl = F.relu(self.fcl2(fcl))
        return  fcl

    # Add one extra fully connected layer.
    def model_4(self, X):
        # ======================================================================
        # Two convolutional layers + two fully connected layers, with ReLU.
        fcl = F.max_pool2d(F.relu(self.conv1(X)), 2)
        fcl = F.max_pool2d(F.relu(self.conv2(fcl)), 2)

        fcl = fcl.view(-1, self.num_flat_features(fcl))
        fcl = F.relu(self.fcl2(fcl))
        fcl = self.fcl3(fcl)
        return  fcl

    # Use Dropout now.
    def model_5(self, X):
        # ======================================================================
        # Two convolutional layers + two fully connected layers, with ReLU.
        # and  + Dropout.
        fcl = F.max_pool2d(F.relu(self.conv1(X)), 2)
        fcl = F.max_pool2d(F.relu(self.conv2(fcl)), 2)

        fcl = fcl.view(-1, self.num_flat_features(fcl))
        fcl = F.relu(self.fcl4(fcl))
        fcl = self.fcl5(fcl)
        return  fcl



#Training

In [None]:
from __future__ import print_function
import argparse
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.tensorboard import SummaryWriter
#from ConvNet import ConvNet
import argparse
import numpy as np

def train(model, device, train_loader, optimizer, criterion, epoch, batch_size):
    '''
    Trains the model for an epoch and optimizes it.
    model: The model to train. Should already be in correct device.
    device: 'cuda' or 'cpu'.
    train_loader: dataloader for training samples.
    optimizer: optimizer to use for model parameter updates.
    criterion: used to compute loss for prediction and target
    epoch: Current epoch to train for.
    batch_size: Batch size to be used.
    '''
    # Set model to train mode before each epoch
    model.train()

    # Empty list to store losses
    losses = []
    correct = 0

    # Iterate over entire training samples (1 epoch)
    for batch_idx, batch_sample in enumerate(train_loader):
        data, target = batch_sample

        # Push data/label to correct device
        data, target = data.to(device), target.to(device)

        # Reset optimizer gradients. Avoids grad accumulation (accumulation used in RNN).
        optimizer.zero_grad()

        # Do forward pass for current set of data
        output = model(data)

        # Compute loss based on criterion
        loss = criterion(output, target)

        # Computes gradient based on final loss
        loss.backward()

        # Store loss
        losses.append(loss.item())

        # Optimize model parameters based on learning rate and gradient
        optimizer.step()

        # Get predicted index by selecting maximum log-probability
        pred = output.argmax(dim=1, keepdim=True)

        # Count correct predictions overall
        for i, element in enumerate(pred):
          if pred[i] == target[i]:
            correct += 1

    train_loss = float(np.mean(losses))
    train_acc = correct / ((batch_idx+1) * batch_size)
    print('Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        float(np.mean(losses)), correct, (batch_idx+1) * batch_size,
        100. * correct / ((batch_idx+1) * batch_size)))
    return train_loss, train_acc



def test(model, device, test_loader):
    '''
    Tests the model.
    model: The model to train. Should already be in correct device.
    device: 'cuda' or 'cpu'.
    test_loader: dataloader for test samples.
    '''

    # Set model to eval mode to notify all layers.
    model.eval()

    losses = []
    correct = 0

    # Set torch.no_grad() to disable gradient computation and backpropagation
    with torch.no_grad():
        for batch_idx, sample in enumerate(test_loader):
            data, target = sample
            data, target = data.to(device), target.to(device)


            # Predict for data by doing forward pass
            output = model(data)

            # Compute loss based on same criterion as training
            criterion = nn.CrossEntropyLoss()
            # Compute loss based on same criterion as training
            loss = criterion(output, target)

            # Append loss to overall test loss
            losses.append(loss.item())

            # Get predicted index by selecting maximum log-probability
            pred = output.argmax(dim=1, keepdim=True)

            # Count correct predictions overall
            for i, element in enumerate(pred):
              if pred[i] == target[i]:
                correct += 1

    test_loss = float(np.mean(losses))
    accuracy = 100. * correct / len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset), accuracy))

    return test_loss, accuracy


def run_main(FLAGS):
    # Check if cuda is available
    use_cuda = torch.cuda.is_available()

    # Set proper device based on cuda availability
    device = torch.device("cuda" if use_cuda else "cpu")
    print("Torch device selected: ", device)

    # Initialize the model and send to device
    model = ConvNet(FLAGS.mode).to(device)

    # Define loss function.
    criterion = nn.CrossEntropyLoss()

    # Define optimizer function.
    optimizer = optim.SGD(model.parameters(), lr=FLAGS.learning_rate)


    # Create transformations to apply to each data sample
    # Can specify variations such as image flip, color flip, random crop, ...
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])

    # Load datasets for training and testing
    # Inbuilt datasets available in torchvision (check documentation online)
    dataset1 = datasets.MNIST('./data/', train=True, download=True,
                       transform=transform)
    dataset2 = datasets.MNIST('./data/', train=False,
                       transform=transform)
    train_loader = DataLoader(dataset1, batch_size = FLAGS.batch_size,
                                shuffle=True, num_workers=4)
    test_loader = DataLoader(dataset2, batch_size = FLAGS.batch_size,
                                shuffle=False, num_workers=4)

    best_accuracy = 0.0

    # Run training for n_epochs specified in config
    for epoch in range(1, FLAGS.num_epochs + 1):
        train_loss, train_accuracy = train(model, device, train_loader,
                                            optimizer, criterion, epoch, FLAGS.batch_size)
        test_loss, test_accuracy = test(model, device, test_loader)

        if test_accuracy > best_accuracy:
            best_accuracy = test_accuracy


    print("accuracy is {:2.2f}".format(best_accuracy))

    print("Training and evaluation finished")


if __name__ == '__main__':
    # Set parameters for Sparse Autoencoder
    parser = argparse.ArgumentParser('CNN Exercise.')
    parser.add_argument('--mode',
                        type=int, default=5,
                        help='Select mode between 1-5.')
    parser.add_argument('--learning_rate',
                        type=float, default=0.03,
                        help='Initial learning rate.')
    parser.add_argument('--num_epochs',
                        type=int,
                        default=60,
                        help='Number of epochs to run trainer.')
    parser.add_argument('--batch_size',
                        type=int, default=10,
                        help='Batch size. Must divide evenly into the dataset sizes.')
    parser.add_argument('--log_dir',
                        type=str,
                        default='logs',
                        help='Directory to put logging.')

    FLAGS = None
    FLAGS, unparsed = parser.parse_known_args()

    run_main(FLAGS)



Torch device selected:  cuda
Train set: Average loss: 0.1273, Accuracy: 57758/60000 (96%)


Test set: Average loss: 0.0427, Accuracy: 9873/10000 (99%)

Train set: Average loss: 0.0390, Accuracy: 59286/60000 (99%)


Test set: Average loss: 0.0331, Accuracy: 9893/10000 (99%)

Train set: Average loss: 0.0251, Accuracy: 59531/60000 (99%)


Test set: Average loss: 0.0250, Accuracy: 9925/10000 (99%)

Train set: Average loss: 0.0185, Accuracy: 59657/60000 (99%)


Test set: Average loss: 0.0396, Accuracy: 9871/10000 (99%)

Train set: Average loss: 0.0129, Accuracy: 59761/60000 (100%)


Test set: Average loss: 0.0267, Accuracy: 9920/10000 (99%)

Train set: Average loss: 0.0101, Accuracy: 59804/60000 (100%)


Test set: Average loss: 0.0259, Accuracy: 9926/10000 (99%)

Train set: Average loss: 0.0060, Accuracy: 59892/60000 (100%)


Test set: Average loss: 0.0242, Accuracy: 9934/10000 (99%)

Train set: Average loss: 0.0051, Accuracy: 59899/60000 (100%)


Test set: Average loss: 0.0260, Accuracy: 9