In [1]:
!pwd

/home/ubuntu/project


In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torchvision.models as models

import numpy as np

import PIL

import sys
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

from torch.utils import data

import matplotlib.pyplot as plt
import time
import logging
import datetime

cuda = torch.cuda.is_available()
cuda

True

In [3]:
! nvidia-smi


Wed Nov  4 20:52:27 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.80.02    Driver Version: 450.80.02    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            On   | 00000000:00:1E.0 Off |                    0 |
| N/A   47C    P0    28W /  70W |      3MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------

In [4]:
class AddGaussianNoise(object):
    def __init__(self, mean=0., std=1.):
        self.std = std
        self.mean = mean
        
    def __call__(self, tensor):
      res = tensor + torch.randn(tensor.size()) * self.std + self.mean
      return torch.clamp(input=res, min=-0.5, max=0.5)
    
    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(self.mean, self.std)

In [5]:
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()

    running_loss = 0.0
    total_predictions = 0.0
    correct_predictions = 0.0
    
    for batch_idx, (data, target) in enumerate(train_loader):   
        optimizer.zero_grad()   # .backward() accumulates gradients
        data = data.to(device)
        target = target.to(device) # all data & model on same device
        
        with torch.cuda.amp.autocast():
          outputs = model(data)
          loss = criterion(outputs, target)
          running_loss += loss.item()

          _, predicted = torch.max(outputs.data, 1)
          total_predictions += target.size(0)
          correct_predictions += (predicted == target).sum().item()
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()


        del data
        del target
    
    
    running_loss /= len(train_loader)
    acc = (correct_predictions/total_predictions)*100.0
    logging.info('Training Loss: {}'.format(running_loss))
    logging.info('Training Accuracy: {}%'.format(acc))
    return running_loss


def test_model(model, test_loader, criterion):
    with torch.no_grad():
        model.eval()

        running_loss = 0.0
        total_predictions = 0.0
        correct_predictions = 0.0

        for batch_idx, (data, target) in enumerate(test_loader): 
            #print(data.shape)  
            data = data.to(device)
            target = target.to(device)

            with torch.cuda.amp.autocast():
              outputs = model(data)

              _, predicted = torch.max(outputs.data, 1)
              total_predictions += target.size(0)
              correct_predictions += (predicted == target).sum().item()

              loss = criterion(outputs, target).detach()
              running_loss += loss.item()

            del data
            del target

        running_loss /= len(test_loader)
        acc = (correct_predictions/total_predictions)*100.0
        logging.info('Testing Loss: {}'.format(running_loss))
        logging.info('Testing Accuracy: {}%'.format(acc))
        return running_loss, acc

def init_weights(m):
    if type(m) == nn.Conv2d or type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight.data)

In [6]:
# configure logging
logger = logging.getLogger("")

# reset handler
for handler in logging.root.handlers[:]:
  logging.root.removeHandler(handler)

# set handler
stream_hdlr = logging.StreamHandler()
file_hdlr = logging.FileHandler('/home/ubuntu/project/log_{}.log'.format(datetime.datetime.now()))

formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
stream_hdlr.setFormatter(formatter)
file_hdlr.setFormatter(formatter)

logger.addHandler(stream_hdlr)
logger.addHandler(file_hdlr)

logger.setLevel(logging.INFO)

In [7]:
class VGG16(nn.Module):
    def __init__(self, nb_classes=10):
        super(VGG16, self).__init__()
        self.vgg16 = models.vgg16_bn()
        self.linear = nn.Linear(1000, nb_classes)
        
    def forward(self, x):
        x = self.vgg16(x)
        x = self.linear(x)
        return x

In [8]:
train_batchsize = 128
test_batchsize = 100
num_workers = 8
num_classes = 10

n_epochs = 30
img_size = 224
lr = 1e-4
min_lr = 1e-8
weight_decay = 5e-4
num_models = 30
noise_std = 0.06

device = torch.device("cuda" if cuda else "cpu")

hyper_params = {'lr': lr, 'min_lr': min_lr, 'weight_decay': weight_decay, 'num_models': num_models, 'num_epochs': n_epochs, 'noise_std': noise_std}
logging.info(hyper_params)


2020-11-04 20:52:27,327 INFO {'lr': 0.0001, 'min_lr': 1e-08, 'weight_decay': 0.0005, 'num_models': 30, 'num_epochs': 30, 'noise_std': 0.06}


In [None]:
seeds = np.arange(0, num_models)

for seed in seeds:
  logging.info('Seed: {}'.format(seed))
  torch.manual_seed(seed)
  

  train_transform = transforms.Compose([transforms.Resize(size=img_size),
                      transforms.ColorJitter(hue=.05, saturation=.05),
                      transforms.RandomHorizontalFlip(p=0.5),
                      transforms.RandomRotation(20, resample=PIL.Image.BILINEAR),
                      transforms.ToTensor(),
                      transforms.Normalize((0, 0, 0), (1, 1, 1)),
                      AddGaussianNoise(0., noise_std)])

  test_transform = transforms.Compose([transforms.Resize(size=img_size),
                     transforms.ToTensor(),
                     transforms.Normalize((0, 0, 0), (1, 1, 1)),
                     AddGaussianNoise(0., noise_std)])

  
  trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=train_transform)
  testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)
  trainloader = torch.utils.data.DataLoader(trainset, batch_size=train_batchsize, shuffle=True, num_workers=num_workers)
  testloader = torch.utils.data.DataLoader(testset, batch_size=test_batchsize, shuffle=False, num_workers=num_workers)

  # model = models.vgg16()
  model = VGG16(num_classes)
  model.apply(init_weights)
  criterion = nn.CrossEntropyLoss()
  # optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, nesterov=False)
  optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
  scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=1, cooldown=5, min_lr=min_lr, verbose=True)
  scaler = torch.cuda.amp.GradScaler()
  model.to(device)

  # Train_loss = []
  # Test_loss = []
  # Test_acc = []

  for i in range(n_epochs):
    tic = time.time()
    logging.info('Epoch: {}'.format(i))
    train_loss = train_epoch(model, trainloader, criterion, optimizer)
    test_loss, test_acc = test_model(model, testloader, criterion)
    scheduler.step(test_loss)
    # Train_loss.append(train_loss)
    # Test_loss.append(test_loss)
    # Test_acc.append(test_acc)
    toc = time.time()
    logging.info('Time: {}s'.format(toc - tic))
    logging.info('='*20)
    
  torch.save({'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict()}, 
        "/home/ubuntu/project/Model_{}".format(datetime.datetime.now()))
  
  torch.cuda.empty_cache()
  del model
  del criterion
  del optimizer
  del scheduler
  del scaler
  del trainloader
  del testloader
  del train_transform
  del test_transform

  


2020-11-04 20:52:27,415 INFO Seed: 0


Files already downloaded and verified
Files already downloaded and verified


2020-11-04 20:52:35,710 INFO Epoch: 0
2020-11-04 21:00:34,021 INFO Training Loss: 2.340395213088111
2020-11-04 21:00:34,023 INFO Training Accuracy: 28.362%
2020-11-04 21:01:05,328 INFO Testing Loss: 1.579775584936142
2020-11-04 21:01:05,329 INFO Testing Accuracy: 42.3%
2020-11-04 21:01:05,330 INFO Time: 509.6202492713928s
2020-11-04 21:01:05,331 INFO Epoch: 1
2020-11-04 21:09:28,506 INFO Training Loss: 1.54556321670942
2020-11-04 21:09:28,507 INFO Training Accuracy: 43.586000000000006%
2020-11-04 21:10:00,129 INFO Testing Loss: 1.490691261291504
2020-11-04 21:10:00,131 INFO Testing Accuracy: 46.79%
2020-11-04 21:10:00,133 INFO Time: 534.8012945652008s
2020-11-04 21:10:00,134 INFO Epoch: 2
2020-11-04 21:18:27,273 INFO Training Loss: 1.3295782542289676
2020-11-04 21:18:27,275 INFO Training Accuracy: 52.124%
2020-11-04 21:18:58,968 INFO Testing Loss: 1.0692749750614166
2020-11-04 21:18:58,969 INFO Testing Accuracy: 61.480000000000004%
2020-11-04 21:18:58,970 INFO Time: 538.8360788822174s


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image


def imshow(img):
    img = img + 1 / 2     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))