In [13]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms, datasets, models
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn

In [14]:
import matplotlib.pyplot as plt
import numpy as np
import copy
import time
import os

In [15]:
import utils
from models import ResNet

In [16]:
data_loaders, test_loader = utils.get_data_loaders(val=False)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [17]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

net = ResNet.ResNet18()
#net = ResNet.ResNet50()
#net = ResNet.ResNet152()

print(torch.cuda.current_device())
print(torch.cuda.device(0))
print(torch.cuda.device_count())

0
<torch.cuda.device object at 0x7f948bfa3668>
1


In [19]:
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))
    net = net.cuda()
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True
    criterion = nn.CrossEntropyLoss().cuda()
else:
    print('CPU')
    criterion = nn.CrossEntropyLoss()

Tesla K80


In [20]:
#optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4, nesterov=True)
optimizer = optim.Adam(net.parameters())
#scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
#scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, 32)
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[81, 122], gamma=0.1)

In [21]:
old = False
test = True

Size of CIFAR-10

In [22]:
if test:
    dataset_size = {'train':5000, 'test':1000}
else:
    dataset_size = {'train': 4000,'val': 1000,'test': 1000}

Load Trained Model

In [23]:
SAVE_PATH = './trained-models/net.pth'

if old:
    old_epochs = utils.load_checkpoint(net, optimizer, scheduler, SAVE_PATH)

Implement SWATS

In [35]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25, test=True):
    
    best_accuracy = 0.0
    
    for epoch in range(num_epochs):
        
        print(str(epoch + 1) + "/" + str(num_epochs))
        
        if type(scheduler) is torch.optim.lr_scheduler.MultiStepLR:
            scheduler.step()
        
        if test:
            phases = ['train', 'test']
        else:
            phases = ['train', 'val']
        
        for phase in phases:
            
            print(phase)
            
            if phase == 'train':
                model.train()
                
            else:
                model.eval()
            
            running_loss = 0.0
            running_corrects = 0
            total = 0
            
            start = time.time()
            
            if phase == 'train' or phase == 'val':
                for index, (inputs, targets) in enumerate(data_loaders[phase]):
                
                    inputs = inputs.to(device)
                    targets = targets.to(device)

                    optimizer.zero_grad()
                
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        _, preds = outputs.max(1)
                        loss = criterion(outputs, targets)
                    
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()
                        
                    running_loss += loss.item()
                    running_corrects += preds.eq(targets).sum().item()
                    total += targets.size(0)
            else:
                for index, (inputs, targets) in enumerate(test_loader):
                
                    inputs = inputs.to(device)
                    targets = targets.to(device)
                
                    outputs = model(inputs)
                    _, preds = outputs.max(1)
                    loss = criterion(outputs, targets)
                    
                    running_loss += loss.item()
                    running_corrects += preds.eq(targets).sum().item()
                    total += targets.size(0)
                
            epoch_loss = running_loss / total
            epoch_acc = running_corrects / total
                
            print('Loss: ' + str(epoch_loss) + ", Epoch Accuracy: " + str(epoch_acc))
            
            print('Time: ' + str((time.time() - start) / 60))
            
            if phase != 'train' and epoch_acc > best_accuracy:
                best_accuracy = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                
                if not os.path.isdir('trained-models'):
                    os.mkdir('trained-models')
                
                state = {
                    
                    'epoch': epoch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'scheduler': scheduler.state_dict()
                    
                }

                if os.path.exists(SAVE_PATH):
                    os.remove(SAVE_PATH)
                
                torch.save(state, SAVE_PATH)
    
    print('Best Accuracy: ' + str(best_accuracy))
    
    model.load_state_dict(best_model_wts)
    return model

In [37]:
base_epochs = 3

if old:
    epochs = base_epochs - old_epochs
else:
    epochs = base_epochs

print(epochs)

net = train_model(net, criterion, optimizer, scheduler, epochs, test=True)

3
1/3
train
Loss: 0.0057674202016557135, Epoch Accuracy: 0.98186
Time: 3.8160754283269247
test
Loss: 0.0600440838071052, Epoch Accuracy: 0.9281
Time: 0.3290539741516113
2/3
train
Loss: 0.005720081186237075, Epoch Accuracy: 0.98178
Time: 3.8180610219637554
test
Loss: 0.05988495913124643, Epoch Accuracy: 0.9273
Time: 0.33005776405334475
3/3
train
Loss: 0.005756084284085373, Epoch Accuracy: 0.98266
Time: 3.8169201215108237
test
Loss: 0.05864745150581002, Epoch Accuracy: 0.9261
Time: 0.3312794049580892
Best Accuracy: 0.9281


In [39]:
sgd_epochs = 3
utils.load_checkpoint(net, optimizer, scheduler, SAVE_PATH)
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.5, weight_decay=5e-4, nesterov=True)
net = train_model(net, criterion, optimizer, scheduler, sgd_epochs, test=True)

1/3
train
Loss: 0.005801942758474906, Epoch Accuracy: 0.98322
Time: 3.8177738189697266
test
Loss: 0.05979425307633355, Epoch Accuracy: 0.9262
Time: 0.3307978828748067
2/3
train
Loss: 0.00575660215022821, Epoch Accuracy: 0.98206
Time: 3.820275064309438
test
Loss: 0.05857356343199499, Epoch Accuracy: 0.927
Time: 0.33211567799250286
3/3
train
Loss: 0.005778906250165892, Epoch Accuracy: 0.98292
Time: 3.8152034242947894
test
Loss: 0.0584068417660892, Epoch Accuracy: 0.927
Time: 0.32789112329483033
Best Accuracy: 0.927
