In [1]:
# Mount to Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# cd to folder
%cd drive/MyDrive/Colab Notebooks/SC201L15

/content/drive/MyDrive/Colab Notebooks/SC201L15


In [3]:
# import packages
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

In [4]:
# Turn on GPU
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cup')
device

device(type='cuda')

In [5]:
# Constants
IMG_SIZE = 224
N = 64

In [21]:
# Prepare data-preprocessing
tranforms = T.Compose([T.ToTensor(), T.Resize((IMG_SIZE, IMG_SIZE)), T.RandomHorizontalFlip(), T.RandomRotation(30), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

# Load Data
train_data = ImageFolder('train', transform=tranforms)
val_data = ImageFolder('val', transform=tranforms)

In [7]:
# Prepare Mini-batches
train_loader = DataLoader(train_data, batch_size=N, shuffle=True)
val_loader = DataLoader(val_data, batch_size=N)

In [8]:
# Constants
NUM_VAL = len(val_data)
NUM_TRAIN = len(train_data)

In [9]:
# validation function
@torch.no_grad()
def val(device, model, val_loader):
  model.eval()
  total_acc = 0
  for i, (x, y) in enumerate(val_loader):
    x, y = x.to(device), y.to(device)
    logits = model(x) # N x C
    predictions = torch.argmax(logits, dim=1)
    acc = (predictions == y).sum().item()
    total_acc += acc
  print('Val Acc:', total_acc/NUM_VAL)
  print('-'*40)

In [10]:
def train(device, model, train_loader, val_loader, optimizer, num_epochs, print_every):
  for epoch in range(num_epochs):
    for i, (x, y) in enumerate(train_loader):
      model.train()
      x, y = x.to(device), y.to(device)
      logits = model(x)
      loss = F.cross_entropy(logits, y)
      if i % print_every == 0:
        print(f'Epoch [{epoch+1}] (Iter {i+1}) \n Train Loss: {loss}')
        val(device, model, val_loader)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

In [43]:
student = nn.Sequential(
    # N x 3 x 224 x 224
    nn.Conv2d(3, 64, 3, 1, 1, bias=False),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    # N x 64 x 112 x 112
    nn.Conv2d(64, 128, 3, 1, 1, bias=False),
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    # N x 128 x 56 x 56
    nn.Conv2d(128, 256, 3, 1, 1, bias=False),
    nn.BatchNorm2d(256),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    # N x 256 x 28 x 28
    nn.Conv2d(256, 512, 3, 1, 1, bias=False),
    nn.BatchNorm2d(512),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    # N x 512 x 14 x 14
    nn.Flatten(),
    nn.Linear(512*14*14, 2)
)
student = student.to(device)

In [None]:
# Choose optimizer
optimizer = optim.SGD(student.parameters(), lr=1e-2, momentum=0.9, nesterov=True)

In [None]:
train(device, student, train_loader, val_loader, optimizer, num_epochs=40, print_every=8)

Epoch [1] (Iter 1) 
 Train Loss: 0.6901616454124451
Val Acc: 0.4797979797979798
----------------------------------------
Epoch [1] (Iter 9) 
 Train Loss: 141.82977294921875
Val Acc: 0.5
----------------------------------------
Epoch [2] (Iter 1) 
 Train Loss: 20.21263313293457
Val Acc: 0.5858585858585859
----------------------------------------
Epoch [2] (Iter 9) 
 Train Loss: 13.770216941833496
Val Acc: 0.494949494949495
----------------------------------------
Epoch [3] (Iter 1) 
 Train Loss: 9.412843704223633
Val Acc: 0.5252525252525253
----------------------------------------
Epoch [3] (Iter 9) 
 Train Loss: 5.821316719055176
Val Acc: 0.5151515151515151
----------------------------------------
Epoch [4] (Iter 1) 
 Train Loss: 4.24921178817749
Val Acc: 0.5353535353535354
----------------------------------------
Epoch [4] (Iter 9) 
 Train Loss: 3.010955572128296
Val Acc: 0.5353535353535354
----------------------------------------
Epoch [5] (Iter 1) 
 Train Loss: 1.5779531002044678
Va

In [24]:
from torchvision.models import resnet18
teacher = resnet18(pretrained = True)
teacher.fc = nn.Linear(teacher.fc.in_features, 2)
teacher = teacher.to(device)



In [25]:
fine_tune_optimizer = optim.SGD(teacher.parameters(), lr=1e-3, momentum=0.9, nesterov=True)
train(device, teacher, train_loader, val_loader, fine_tune_optimizer, num_epochs=2, print_every=8)

Epoch [1] (Iter 1) 
 Train Loss: 0.905818521976471
Val Acc: 0.5
----------------------------------------
Epoch [1] (Iter 9) 
 Train Loss: 0.46143946051597595
Val Acc: 0.8383838383838383
----------------------------------------
Epoch [2] (Iter 1) 
 Train Loss: 0.25099533796310425
Val Acc: 0.9090909090909091
----------------------------------------
Epoch [2] (Iter 9) 
 Train Loss: 0.13397343456745148
Val Acc: 0.9292929292929293
----------------------------------------


In [44]:
def knowledge_distillation(s_logits, t_logits, y, T=2.0, alpha=0.7, epsilon=1e-8):
  p_s = F.softmax(s_logits/T, dim=1)
  p_t = F.softmax(t_logits/T, dim=1)
  kl_d_loss = torch.sum(p_t*torch.log((p_t+epsilon)/(p_s+epsilon)), dim=1).mean()*T*T  # Soft Label
  loss = F.cross_entropy(s_logits, y)  # Hard Label
  return alpha*loss + (1-alpha)*kl_d_loss

In [45]:
def new_train(device, student, teacher, train_loader, val_loader, optimizer, num_epochs, print_every):
  for epoch in range(num_epochs):
    for i, (x, y) in enumerate(train_loader):
      student.train()
      x, y = x.to(device), y.to(device)
      s_logits = student(x)
      with torch.no_grad():
        t_logits = teacher(x)
      loss = knowledge_distillation(s_logits, t_logits, y)
      if i % print_every == 0:
        print(f'Epoch [{epoch+1}] (Iter {i+1}) \n Train Loss: {loss}')
        val(device, student, val_loader)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

In [37]:
new_optimizer = optim.SGD(student.parameters(), lr=5e-3, momentum=0.9, nesterov=True)
new_train(device, student, teacher, train_loader, val_loader, new_optimizer, num_epochs=40, print_every=8) # This tests alpha=0.2 on student

Epoch [1] (Iter 1) 
 Train Loss: 0.8547251224517822
Val Acc: 0.47474747474747475
----------------------------------------
Epoch [1] (Iter 9) 
 Train Loss: 37.390357971191406
Val Acc: 0.5
----------------------------------------
Epoch [2] (Iter 1) 
 Train Loss: 30.827106475830078
Val Acc: 0.4898989898989899
----------------------------------------
Epoch [2] (Iter 9) 
 Train Loss: 37.17108154296875
Val Acc: 0.5202020202020202
----------------------------------------
Epoch [3] (Iter 1) 
 Train Loss: 34.98985290527344
Val Acc: 0.5151515151515151
----------------------------------------
Epoch [3] (Iter 9) 
 Train Loss: 24.72319984436035
Val Acc: 0.48484848484848486
----------------------------------------
Epoch [4] (Iter 1) 
 Train Loss: 24.944217681884766
Val Acc: 0.5959595959595959
----------------------------------------
Epoch [4] (Iter 9) 
 Train Loss: 21.727771759033203
Val Acc: 0.5808080808080808
----------------------------------------
Epoch [5] (Iter 1) 
 Train Loss: 15.423942565917

In [46]:
new_optimizer = optim.SGD(student.parameters(), lr=1e-2, momentum=0.9, nesterov=True)
new_train(device, student, teacher, train_loader, val_loader, new_optimizer, num_epochs=40, print_every=8) # This tests alpha=0.7 on student

Epoch [1] (Iter 1) 
 Train Loss: 0.844012439250946
Val Acc: 0.5202020202020202
----------------------------------------
Epoch [1] (Iter 9) 
 Train Loss: 43.60917663574219
Val Acc: 0.5202020202020202
----------------------------------------
Epoch [2] (Iter 1) 
 Train Loss: 35.488285064697266
Val Acc: 0.494949494949495
----------------------------------------
Epoch [2] (Iter 9) 
 Train Loss: 16.276174545288086
Val Acc: 0.5252525252525253
----------------------------------------
Epoch [3] (Iter 1) 
 Train Loss: 14.512085914611816
Val Acc: 0.5606060606060606
----------------------------------------
Epoch [3] (Iter 9) 
 Train Loss: 8.814745903015137
Val Acc: 0.5808080808080808
----------------------------------------
Epoch [4] (Iter 1) 
 Train Loss: 12.925395965576172
Val Acc: 0.4898989898989899
----------------------------------------
Epoch [4] (Iter 9) 
 Train Loss: 10.536697387695312
Val Acc: 0.5656565656565656
----------------------------------------
Epoch [5] (Iter 1) 
 Train Loss: 6.2