In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms

import time
import numpy as np

from tqdm import tqdm

In [2]:
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


### Building a Convolutional Neural Network

#### References

[Comparison between activation functions](https://www.aitude.com/comparison-of-sigmoid-tanh-and-relu-activation-functions/#:~:text=ReLu%20is%20the%20best%20and,compare%20to%20other%20activation%20function.)\
[Adaptive Learning](https://towardsdatascience.com/learning-rate-schedules-and-adaptive-learning-rate-methods-for-deep-learning-2c8f433990d1)\
[Momentum or No Momentum](https://medium.com/analytics-vidhya/why-use-the-momentum-optimizer-with-minimal-code-example-8f5d93c33a53)\
[Momentum vs Adaptive Learning](https://medium.com/@vinodhb95/momentum-optimizer-6023aa445e18)

In [3]:
class MyCNN(nn.Module):
    def __init__(self, activation):
        super(MyCNN, self).__init__()
        self.activation = activation
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(4096, 128)
        self.fc2 = nn.Linear(128, 10)
        
    def forward(self, x):
        x = self.activation(self.conv1(x))
        x = self.activation(self.conv2(x))
        x = self.pool1(x)
        x = self.activation(self.conv3(x))
        x = self.activation(self.conv4(x))
        x = self.pool2(x)
        x = x.view(-1, 64*8*8)
        x = self.dropout(x)
        x = self.activation(self.fc1(x))
        x = self.fc2(x)
        return x


In [4]:
def training_loop(optimizer, criterion = nn.CrossEntropyLoss().cuda()):
    
    num_epochs = 10
    for epoch in tqdm(range(num_epochs)):
        model.train()
        for images, labels in train_loader:
            images, labels = images.cuda(), labels.cuda() # add this line
            
            optimizer.zero_grad()
            
            outputs = model(images)
            
            loss = criterion(outputs, labels)
            loss.backward()
            
            optimizer.step()

In [5]:
def testing_loop():
    model.eval()
    
    total_correct = 0
    total_images = 0
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.cuda(), labels.cuda()
            
            outputs = model(images)
            
            predictions = torch.argmax(outputs, dim=1)
            
            total_correct += (predictions == labels).sum().item()
            total_images += labels.shape[0]
            
    accuracy = total_correct / total_images
    return accuracy

In [6]:
def ModelTester(model,optimizer, name, opt_name):

    if torch.cuda.is_available():
        model.cuda()
    
    start = time.time()
    
    training_loop(optimizer)
    print()
    print("###################################################################################################################")
    print(f"# Training Time for activation {name} with {opt_name}: ", time.time() - start, "seconds")
    test_accuracy = testing_loop()
    print(f"# Classification Performance for activation {name} with {opt_name}: {test_accuracy}")
    print("###################################################################################################################")

In [7]:
activation = nn.ReLU().cuda()
model = MyCNN(activation)
ModelTester(model, optimizer = optim.Adam(model.parameters()), name = "relu", opt_name = "Adaptive Learning (Adam)")

100%|██████████| 10/10 [01:31<00:00,  9.14s/it]



###################################################################################################################
# Training Time for activation relu with Adaptive Learning (Adam):  91.3864860534668 seconds
# Classification Performance for activation relu with Adaptive Learning (Adam): 0.7593
###################################################################################################################


In [8]:
activation = nn.Tanh().cuda()
model = MyCNN(activation)
ModelTester(model, optimizer = optim.Adam(model.parameters()), name = "tanh", opt_name = "Adaptive Learning (Adam)")

100%|██████████| 10/10 [01:26<00:00,  8.66s/it]



###################################################################################################################
# Training Time for activation tanh with Adaptive Learning (Adam):  86.5818030834198 seconds
# Classification Performance for activation tanh with Adaptive Learning (Adam): 0.709
###################################################################################################################


In [9]:
activation = nn.Sigmoid().cuda()
model = MyCNN(activation)
ModelTester(model, optimizer = optim.Adam(model.parameters()), name = "sigmoid", opt_name = "Adaptive Learning (Adam)")

100%|██████████| 10/10 [01:25<00:00,  8.56s/it]



###################################################################################################################
# Training Time for activation sigmoid with Adaptive Learning (Adam):  85.5919976234436 seconds
# Classification Performance for activation sigmoid with Adaptive Learning (Adam): 0.6227
###################################################################################################################


In [10]:
activation = nn.ReLU().cuda()
model = MyCNN(activation)
ModelTester(model, optimizer = optim.SGD(model.parameters(), lr=0.01), name = "relu", opt_name = "Without Momentum (SGD)")

100%|██████████| 10/10 [01:17<00:00,  7.79s/it]



###################################################################################################################
# Training Time for activation relu with Without Momentum (SGD):  77.95045971870422 seconds
# Classification Performance for activation relu with Without Momentum (SGD): 0.5419
###################################################################################################################


In [11]:
activation = nn.ReLU().cuda()
model = MyCNN(activation)
ModelTester(model, optimizer = optim.SGD(model.parameters(), lr=0.01, momentum = 0.9), name = "relu", opt_name = "With Momentum (SGD)")

100%|██████████| 10/10 [01:20<00:00,  8.00s/it]



###################################################################################################################
# Training Time for activation relu with With Momentum (SGD):  80.02108192443848 seconds
# Classification Performance for activation relu with With Momentum (SGD): 0.7614
###################################################################################################################
