In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import TensorDataset, DataLoader
import torchvision.models as models
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import torchvision
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.data import random_split

In [10]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
data = torchvision.datasets.CIFAR10("./", train=True, download=True, transform=transforms.ToTensor())
test_data = torchvision.datasets.CIFAR10("./", train=False, transform=transforms.ToTensor())

Files already downloaded and verified


In [11]:
train_size = int(0.9*len(data))
test_size = len(data) - train_size
train_set, val_set = torch.utils.data.random_split(data, [train_size,test_size])
trainloader = torch.utils.data.DataLoader(train_set, batch_size=10,
                                          shuffle=True)
testloader = torch.utils.data.DataLoader(test_data, batch_size=10,
                                          shuffle=False)
valloader = torch.utils.data.DataLoader(val_set, batch_size=10,
                                          shuffle=True)

In [12]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 10)
        self.dropout = nn.Dropout(p=0.2)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = torch.tanh(x)
        x = self.pool(x)
        x = self.conv2(x)
        x = torch.tanh(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = torch.tanh(x)
        x = self.fc2(x)
        return x

# Implement a train model function so you can re_use it in task 3 and 4. 
# Should return the best performing model after training
def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs, msg):
    writer = SummaryWriter()
    best_accuracy = 0
    for epochs in range(num_epochs):
        for batch_nr, (data, labels) in enumerate(train_loader):
         
            prediction = model.forward(data)

            loss = criterion(prediction, labels)

            loss.backward()

            optimizer.step()

            optimizer.zero_grad()
            writer.add_scalar(msg, loss, epochs)
            print(
            f'\rEpoch {epochs+1} [{batch_nr+1}/{len(train_loader)}] - Loss: {loss}',
            end=''
        )
        val_accuracy = 0
        total = 0
        with torch.no_grad():
            for batch_nr, (data, labels) in enumerate(val_loader):
                prediction = model.forward(data)
                _, predicted = torch.max(prediction, 1)
                val_accuracy += (predicted == labels).sum().item()
                total += labels.size(0)
            print(" ",val_accuracy/total, "Val Acc")
            val_accuracy = val_accuracy/total
            if(best_accuracy < val_accuracy):
                best_accuracy = val_accuracy
                print("best model here")
                torch.save(model.state_dict(), "./E1")
def test_model(model, test_loader):
    test_accuracy = 0
    total = 0
    predictList = []
    testList = []
    
    for batch_nr, (data, labels) in enumerate(test_loader):
        prediction = model.forward(data)
        _, predicted = torch.max(prediction, 1)
        for i in range(len(prediction)):
            predictList.append(predicted[i].item())
            testList.append(labels[i].item())
        test_accuracy += (predicted == labels).sum().item()
        total += labels.size(0)
    print(test_accuracy/total, "Test Accuracy")
  

# Hyperparams. Set these to reasonable values
LEARNING_RATE = 0.0001

# Load our network
model = Net()

# Define our loss function
criterion = nn.CrossEntropyLoss()

# Define our optimizer

optimizer = optim.Adam(model.parameters(), LEARNING_RATE)

# Train the model
msg = 'training_loss with tanh and adam'
trained_model = train_model(model, criterion, optimizer, trainloader, valloader, 2, msg)

# Test the model
model.load_state_dict(torch.load("./E1"))
tested_model = test_model(model, testloader)

Epoch 1 [4500/4500] - Loss: 1.5766049623489384  0.3726 Val Acc
best model here
Epoch 2 [4500/4500] - Loss: 1.2912018299102783  0.434 Val Acc
best model here
0.4501 Test Accuracy


In [13]:
model = Net()
optimizer = optim.SGD(model.parameters(), LEARNING_RATE)
msg = 'training_loss with tanh and SGD'
trained_model = train_model(model, criterion, optimizer, trainloader, valloader, 2, msg)
model.load_state_dict(torch.load("./E1"))
tested_model = test_model(model, testloader)

Epoch 1 [4500/4500] - Loss: 2.3082318305969247  0.1138 Val Acc
best model here
Epoch 2 [4500/4500] - Loss: 2.2879505157470703  0.1118 Val Acc
0.109 Test Accuracy


Using Adam as the optimizer and Tanh as the activation function gives roughly 44% accuracy with a learning rate of 0.0001


Using SGD as the optimizer and Tanh as the activation function gives roughly 10% accuracy with a learning rate of 0.0001

![TanhADAM](tanhADAM.png)
![TanhSGD](tanhSGD.png)