In [70]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms, models
import torch.nn.functional as F
from torchvision.models import resnet18
import torch.optim as optim
from torch.optim import lr_scheduler
import time
import os
import copy



device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

model = resnet18(pretrained=True)


cuda


In [72]:

preprocess = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

In [73]:
processed_trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=preprocess)
class_names = processed_trainset.classes
processed_testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=preprocess)
dataloaders = {'train' : [], 'val' : []}
dataloaders['train'] = torch.utils.data.DataLoader(processed_trainset, batch_size=100,
                                                    shuffle=True, num_workers=4, pin_memory=True)
dataloaders['val'] = torch.utils.data.DataLoader(processed_testset, batch_size=100,
                                                    shuffle=False, num_workers=4, pin_memory=True)
dataset_sizes = {'train' : len(processed_trainset), 'val' : len(processed_testset)}

Files already downloaded and verified
Files already downloaded and verified


In [81]:
#Finetune to Cifar10: Load the resnet18 model that has been pretrained on Imagenet.
# Set requires_grad=False for all of the weights. Change its last layer (“fc”) to have only 10 outputs.
#  The last layer weights will have requires_grad=True. 
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Linear(512, 10)
model = model.to(device)

In [82]:
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [85]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.fc.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
dataset_sizes = {'train' : 50000 , 'val' : 10000}

In [93]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=50):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [88]:
model_conv = train_model(model, criterion, optimizer,
                         exp_lr_scheduler, num_epochs=5)

Epoch 0/4
----------
train Loss: 1.1428 Acc: 0.6018
val Loss: 1.1320 Acc: 0.6079

Epoch 1/4
----------
train Loss: 1.1255 Acc: 0.6088
val Loss: 1.1245 Acc: 0.6096

Epoch 2/4
----------
train Loss: 1.1236 Acc: 0.6072
val Loss: 1.1192 Acc: 0.6053

Epoch 3/4
----------
train Loss: 1.1270 Acc: 0.6069
val Loss: 1.1178 Acc: 0.6036

Epoch 4/4
----------
train Loss: 1.1202 Acc: 0.6094
val Loss: 1.1195 Acc: 0.6053

Training complete in 2m 54s
Best val Acc: 0.609600


In [94]:
#Use the cifar 10 training dataset to train the modified model for say 50 epochs.
model_conv, hist = train_model(model, criterion, optimizer,
                         exp_lr_scheduler, num_epochs=50)

Epoch 0/49
----------
train Loss: 1.1218 Acc: 0.6086
val Loss: 1.1110 Acc: 0.6168

Epoch 1/49
----------
train Loss: 1.1211 Acc: 0.6122
val Loss: 1.1059 Acc: 0.6189

Epoch 2/49
----------
train Loss: 1.1163 Acc: 0.6102
val Loss: 1.1140 Acc: 0.6157

Epoch 3/49
----------
train Loss: 1.1172 Acc: 0.6115
val Loss: 1.1205 Acc: 0.6122

Epoch 4/49
----------
train Loss: 1.1169 Acc: 0.6099
val Loss: 1.1317 Acc: 0.6057

Epoch 5/49
----------
train Loss: 1.1185 Acc: 0.6095
val Loss: 1.1180 Acc: 0.6110

Epoch 6/49
----------
train Loss: 1.1175 Acc: 0.6097
val Loss: 1.1018 Acc: 0.6152

Epoch 7/49
----------
train Loss: 1.1173 Acc: 0.6103
val Loss: 1.1202 Acc: 0.6116

Epoch 8/49
----------
train Loss: 1.1142 Acc: 0.6085
val Loss: 1.1076 Acc: 0.6168

Epoch 9/49
----------
train Loss: 1.1248 Acc: 0.6075
val Loss: 1.1260 Acc: 0.6056

Epoch 10/49
----------
train Loss: 1.1209 Acc: 0.6084
val Loss: 1.1045 Acc: 0.6205

Epoch 11/49
----------
train Loss: 1.1152 Acc: 0.6107
val Loss: 1.1147 Acc: 0.6089

Ep

In [90]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    model_ft = models.resnet18(pretrained=use_pretrained)
    set_parameter_requires_grad(model_ft, feature_extract)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, num_classes)
    input_size = 224

    return model_ft, input_size

In [None]:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()

In [97]:
scratch_model,_ = initialize_model(model, num_classes = 10, feature_extract=False, use_pretrained=False)
scratch_model = scratch_model.to(device)
scratch_optimizer = optim.SGD(scratch_model.parameters(), lr=0.001, momentum=0.9)
scratch_criterion = nn.CrossEntropyLoss()
_,scratch_hist = train_model(model, criterion, optimizer,
                         exp_lr_scheduler, num_epochs=50)

Epoch 0/49
----------
train Loss: 1.1114 Acc: 0.6131
val Loss: 1.1199 Acc: 0.6078

Epoch 1/49
----------
train Loss: 1.1152 Acc: 0.6107
val Loss: 1.1238 Acc: 0.6050

Epoch 2/49
----------
train Loss: 1.1122 Acc: 0.6104
val Loss: 1.1155 Acc: 0.6116

Epoch 3/49
----------
train Loss: 1.1192 Acc: 0.6106
val Loss: 1.1091 Acc: 0.6123

Epoch 4/49
----------
train Loss: 1.1188 Acc: 0.6088
val Loss: 1.1063 Acc: 0.6124

Epoch 5/49
----------
train Loss: 1.1133 Acc: 0.6113
val Loss: 1.1220 Acc: 0.6021

Epoch 6/49
----------
train Loss: 1.1177 Acc: 0.6105
val Loss: 1.0994 Acc: 0.6129

Epoch 7/49
----------
train Loss: 1.1145 Acc: 0.6118
val Loss: 1.1130 Acc: 0.6131

Epoch 8/49
----------
train Loss: 1.1144 Acc: 0.6121
val Loss: 1.1187 Acc: 0.6053

Epoch 9/49
----------
train Loss: 1.1187 Acc: 0.6102
val Loss: 1.1215 Acc: 0.6062

Epoch 10/49
----------
train Loss: 1.1128 Acc: 0.6124
val Loss: 1.1134 Acc: 0.6153

Epoch 11/49
----------
train Loss: 1.1135 Acc: 0.6150
val Loss: 1.1177 Acc: 0.6063

Ep

In [60]:
# Find important weights: For the model trained above, set requires_grad=True for all the weights.
# Compute the gradients for each weight for images in the cifar 10 test set.

for param in model.parameters():
    param.requires_grad = True

for inputs, labels in dataloaders['val']:
    inputs = inputs.to(device)
    labels = labels.to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()