### Imports

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from PIL import Image

# Other dependencies
import random
import sys
import os
import tqdm
import time

import numpy as np
import matplotlib.pyplot as plt

print('Python version: ', sys.version)
print('Pytorch version: ', torch.__version__)

print("GPU available: {}".format(torch.cuda.is_available()))
print("current GPU index: {}".format(torch.cuda.current_device()))
print("current GPU card name: {}".format(torch.cuda.get_device_name(0)))

Python version:  3.8.10 (tags/v3.8.10:3d8993a, May  3 2021, 11:48:03) [MSC v.1928 64 bit (AMD64)]
Pytorch version:  1.12.0+cu113
GPU available: True
current GPU index: 0
current GPU card name: GeForce RTX 2070 Super with Max-Q Design


#### Model definition


> Our model follows the architecture which has 3 modules with a 3 × 3 convolutions and 64 filters, followed by
batch normalization (Ioffe & Szegedy, 2015), a ReLU nonlinearity, and 2 × 2 max-pooling. 

In [8]:
# TODO 1. build your model following the instruction above.
# Note that, the size of inputs will be (32, 32)
class Net(nn.Module):
  def __init__(self, nclasses):
    super(Net, self).__init__()
    self.conv1 = nn.Conv2d(3, 64, 3)
    self.bn1 = nn.BatchNorm2d(64)
    self.act1 = nn.ReLU()
    self.max1 = nn.MaxPool2d(2)

    self.conv2 = nn.Conv2d(64, 64, 3)
    self.bn2 = nn.BatchNorm2d(64)
    self.act2 = nn.ReLU()
    self.max2 = nn.MaxPool2d(2)

    self.conv3 = nn.Conv2d(64, 64, 3)
    self.bn3 = nn.BatchNorm2d(64)
    self.act3 = nn.ReLU()
    self.max3 = nn.MaxPool2d(2)

    self.flatten = nn.Flatten()
    self.fc = nn.Linear(256, nclasses)     

  def forward(self, img):
    x = self.conv1(img)
    x = self.bn1(x)
    x = self.act1(x)
    x = self.max1(x)

    x = self.conv2(x)
    x = self.bn2(x)
    x = self.act2(x)
    x = self.max2(x)

    x = self.conv3(x)
    x = self.bn3(x)
    x = self.act3(x)
    x = self.max3(x)

    x = self.flatten(x)
    x = self.fc(x)
    return x

In [9]:
# test if it works
net = Net(5)
img = torch.randn((1, 3, 32, 32))
net(img)

tensor([[-0.9533, -0.3489, -0.2274,  0.2298,  0.5392]],
       grad_fn=<AddmmBackward0>)

In [10]:
# prepare everything needed for training the CNN model
# load the CIFAR10 dataset from the torchvision package
train_transform = transforms.Compose([
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(),
                                      ])
test_transform = transforms.Compose([
                                     transforms.ToTensor(),
                                     ])

train_dataset = torchvision.datasets.CIFAR10(root='dataset/cifar10', train=True, download=True, transform=train_transform)
test_dataset = torchvision.datasets.CIFAR10(root='dataset/cifar10', train=False, download=True, transform=test_transform)

# get the dataloader
train_dataloader = DataLoader(train_dataset, batch_size=256, drop_last=True, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=128, drop_last=False, shuffle=False)

# instantiate the model & move to GPU
model = Net(nclasses=10)
model.to("cuda:0")

criterion = nn.CrossEntropyLoss() # instantiate the loss (criterion)
criterion.to('cuda:0') # move to GPU

# get the optimizer to train the model
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 180, 190], gamma=0.1)
#=================================================== 

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to dataset/cifar10\cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting dataset/cifar10\cifar-10-python.tar.gz to dataset/cifar10
Files already downloaded and verified


In [11]:
def train(dataloader, model, criterion, optimizer):
  model.train()
  total_loss = 0
  total_acc = 0
  n = 0
  for x, y in dataloader:#tqdm.notebook.tqdm(dataloader, desc='train', leave=False):
    optimizer.zero_grad() 
    x, y = x.cuda(), y.cuda()
    logits = model(x)
    loss = criterion(logits, y)
    loss.backward()
    optimizer.step()

    pred = torch.argmax(logits, dim=1) # (# minibatch, #class)
    acc = torch.sum(pred == y)
    
    total_loss += loss.item() * x.shape[0]
    total_acc += acc.item()
    n += x.shape[0]

  return total_loss / n, total_acc / n

In [12]:
@torch.no_grad()
def test(dataloader, model, criterion):
  model.eval()
  total_loss = 0
  total_acc = 0
  n = 0
  for x, y in dataloader:#tqdm.notebook.tqdm(dataloader, desc='test', leave=False):
    x, y = x.cuda(), y.cuda()
    logits = model(x)
    loss = criterion(logits, y)

    pred = torch.argmax(logits, dim=1)
    acc = torch.sum(pred == y)

    total_loss += loss.item() * x.shape[0]
    total_acc += acc.item()
    n += x.shape[0]

  return total_loss / n, total_acc / n

In [13]:
%load_ext tensorboard
%tensorboard --logdir runs

In [14]:
# train the model for 200 epochs
writer = SummaryWriter('runs/cifar10')
for epc in tqdm.notebook.trange(200, desc='EPOCH'):
  tr_loss, tr_acc = train(train_dataloader, model, criterion, optimizer)
  te_loss, te_acc = test(test_dataloader, model, criterion)

  writer.add_scalar('train/loss', tr_loss, epc)
  writer.add_scalar('train/acc', tr_acc, epc)
  writer.add_scalar('test/loss', te_loss, epc)
  writer.add_scalar('test/acc', te_acc, epc)
  writer.flush()

  scheduler.step()
  
writer.close()

EPOCH:   0%|          | 0/200 [00:00<?, ?it/s]

In [None]:
# train CIFAR100 with the ImageNet pretrained resnet34 model provided by PyTorch
#================ YOUR CODE HERE ===================
# load the CIFAR100 dataset from the torchvision package

# get the dataloader

# instantiate the model & move to GPU

# get the optimizer to train the model

# SET EPOCHS
EPOCHS = 
writer = SummaryWriter('runs/cifar100')
for epc in tqdm.notebook.trange(EPOCHS, desc='EPOCH'):
  tr_loss, tr_acc = train(train_dataloader, model, criterion, optimizer)
  te_loss, te_acc = test(test_dataloader, model, criterion)

  writer.add_scalar('train/loss', tr_loss, epc)
  writer.add_scalar('train/acc', tr_acc, epc)
  writer.add_scalar('test/loss', te_loss, epc)
  writer.add_scalar('test/acc', te_acc, epc)
  writer.flush()

  scheduler.step()
  
writer.close()

#=================================================== 