In [1]:
from __future__ import print_function, division

# import sys
# sys.path.append(root_folder)
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import pathlib

from models.convengers import *
from models.solver import NickFury
from utils import dictionary
from utils import visualizations

plt.ion()   # interactive mode

%load_ext autoreload
%autoreload 2

In [3]:
import GPUtil
GPUtil.showUtilization()

| ID | GPU | MEM |
------------------
|  0 |  0% | 96% |
|  1 |  0% |  0% |
|  2 | 95% | 35% |
|  3 | 99% | 95% |


In [3]:
dataset_folder = "data/tiny-imagenet-200/"
data_dir = pathlib.Path(dataset_folder)

# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(256),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [4]:
image_datasets = {x: datasets.ImageFolder(data_dir / x, data_transforms[x]) for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=64, num_workers=6, shuffle=True, pin_memory=True) for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [5]:
vgg_new = CaptainAmerica(layer_cutoff=-3, num_blocks=2, hidden_size=2048, requires_grad=True)
vgg_new = vgg_new.to(device)
vgg_solver = NickFury("VGG-3", vgg_new, dataloaders, dataset_sizes, device)

vgg_criterion = nn.CrossEntropyLoss()
vgg_optimizer = optim.SGD(vgg_new.parameters(), lr=0.01, momentum=0.9)
vgg_exp_lr_scheduler = lr_scheduler.StepLR(vgg_optimizer, step_size=7, gamma=0.1)

In [6]:
vgg_solver.train(vgg_optimizer, vgg_criterion, vgg_exp_lr_scheduler, num_epochs=25)

Epoch 0:
Training 99.94%, loss: 0.063, Accuracy: 16.39%
Epoch 0 completed with elapsed time 00:03:47.16
Validation Accuracy: 0.351
Per Point Loss: 0.063
Epoch 1:
Training 99.94%, loss: 0.058, Accuracy: 20.90%
Epoch 1 completed with elapsed time 00:03:46.00
Validation Accuracy: 0.386
Per Point Loss: 0.058
Epoch 2:
Training 99.94%, loss: 0.055, Accuracy: 22.90%
Epoch 2 completed with elapsed time 00:03:46.27
Validation Accuracy: 0.403
Per Point Loss: 0.055
Epoch 3:
Training 99.94%, loss: 0.054, Accuracy: 24.41%
Epoch 3 completed with elapsed time 00:03:46.58
Validation Accuracy: 0.418
Per Point Loss: 0.054
Epoch 4:
Training 99.94%, loss: 0.053, Accuracy: 25.51%
Epoch 4 completed with elapsed time 00:03:45.94
Validation Accuracy: 0.438
Per Point Loss: 0.053
Epoch 5:
Training 99.94%, loss: 0.052, Accuracy: 26.16%
Epoch 5 completed with elapsed time 00:03:46.68
Validation Accuracy: 0.441
Per Point Loss: 0.052
Epoch 6:
Training 99.94%, loss: 0.051, Accuracy: 26.91%
Epoch 6 completed with ela

[0.06259959344625474,
 0.057928778064250945,
 0.05547468769073486,
 0.05386274608373642,
 0.052624595158100126,
 0.05194039000511169,
 0.05103346601247787,
 0.04995715508460999,
 0.04961773470878601,
 0.04930137592554092,
 0.04913346573114395,
 0.04905214888095856,
 0.04901794262886047,
 0.04884871689319611,
 0.04874061403512955,
 0.04881995627164841,
 0.048761866898536683,
 0.04873924275636673,
 0.04864788383722305,
 0.048724306733608246,
 0.04876247328042984,
 0.04875599523305893,
 0.048635022494792936,
 0.04875265236377716,
 0.048591830780506134]