In [None]:
#imports

from __future__ import print_function
import os
import random
import numpy as np

import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
import torch.nn.parallel
import torchvision
import torchvision.transforms as transforms




In [None]:
#set variables

#training options
batch_size = 128
lr_val = 0.1
start_epoch = 1
lr_decay = 0.1
optimizer_val = 'sgd' #adam | lbfgs
weight_decay = 0.0005
momentum = 0.9
epochs = 2
save = 'trained_nets'
save_epoch = 10
ngpu = 1
rand_seed = 0
resume_model = ''
resume_opt = ''
save_folder = ''

#model parameters
model = 'resnet18'
loss_name = 'crossentropy'
raw_data = False
noaug = False
label_corrupt_prob = 0.0
trainloader_val = ''
testloader_val = ''
idx = 0

In [None]:
use_cuda = torch.cuda.is_available()
# print('Current devices: ' + str(torch.cuda.current_device()))
# print('Device count: ' + str(torch.cuda.device_count()))

In [None]:
def name_save_folder():
    save_folder = 'model' + '_' + str(optimizer_val) + '_lr=' + str(lr_val)
    if lr_decay != 0.1:
        save_folder += '_lr_decay=' + str(lr_decay)
    save_folder += '_bs=' + str(batch_size)
    save_folder += '_wd=' + str(weight_decay)
    save_folder += '_mom=' + str(momentum)
    save_folder += '_save_epoch=' + str(save_epoch)
    if loss_name != 'crossentropy':
        save_folder += '_loss=' + str(loss_name)
    if noaug:
        save_folder += '_noaug'
    if raw_data:
        save_folder += '_rawdata'
    if label_corrupt_prob > 0:
        save_folder += '_randlabel=' + str(label_corrupt_prob)
    if ngpu > 1:
        save_folder += '_ngpu=' + str(ngpu)
    if idx:
        save_folder += '_idx=' + str(idx)

    return save_folder

In [None]:
def get_data_loaders(trainloader, testloader):
    if trainloader and testloader:
        assert os.path.exists(trainloader), 'trainloader does not exist'
        assert os.path.exists(testloader), 'testloader does not exist'
        trainloader = torch.load(trainloader)
        testloader = torch.load(testloader)
        return trainloader, testloader

    normalize = transforms.Normalize(mean=[x/255.0 for x in [125.3, 123.0, 113.9]],
                                     std=[x/255.0 for x in [63.0, 62.1, 66.7]])

    if raw_data:
        transform_train = transforms.Compose([
            transforms.ToTensor(),
        ])
    else:
        if not noaug:
            # with data augmentation
            transform_train = transforms.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ])
        else:
            # no data agumentation
            transform_train = transforms.Compose([
                transforms.ToTensor(),
                normalize,
            ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])


    kwargs = {'num_workers': 2, 'pin_memory': True} if ngpu else {}
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True,
                                            transform=transform_train)
    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True,
                                           transform=transform_test)

    trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                              shuffle=True, **kwargs)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                             shuffle=False, **kwargs)

    return trainloader, testloader

In [None]:
#basicblock and resnet class

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1   = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2   = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion*planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion*planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1  = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1    = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512*block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out




# ImageNet models
def ResNet18():
    return ResNet(BasicBlock, [2,2,2,2])





In [None]:

# map between model name and function
models = {
    'resnet18'              : ResNet18
}


In [None]:
def load(model_name, model_file=None, data_parallel=False):
    net = models[model_name]()
    if data_parallel: # the model is saved in data paralle mode
        net = torch.nn.DataParallel(net)

    if model_file:
        assert os.path.exists(model_file), model_file + " does not exist."
        stored = torch.load(model_file, map_location=lambda storage, loc: storage)
        if 'state_dict' in stored.keys():
            net.load_state_dict(stored['state_dict'])
        else:
            net.load_state_dict(stored)

    if data_parallel: # convert the model back to the single GPU version
        net = net.module

    net.eval()
    return net

In [None]:
#initialize params; only for first training

def init_params(net):
    for m in net.modules():
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
            init.kaiming_normal_(m.weight, mode='fan_in')
            if m.bias is not None:
                init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
            init.constant_(m.weight, 1)
            init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
            init.normal_(m.weight, std=1e-3)
            if m.bias is not None:
                init.constant_(m.bias, 0)

In [None]:
# Training
def train(trainloader, net, criterion, optimizer, use_cuda=True):
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    if isinstance(criterion, nn.CrossEntropyLoss):
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            batch_size = inputs.size(0)
            total += batch_size
            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            optimizer.zero_grad()
            inputs, targets = Variable(inputs), Variable(targets)
            
            
            if optimizer_val=='sgd' or optimizer_val=='adam':
              outputs = net(inputs)
              loss = criterion(outputs, targets)
              loss.backward()
              optimizer.step()
              
            else:
              def closure():
                optimizer.zero_grad()
                outputs = net(inputs)
                loss = criterion(outputs, one_hot_targets)
                loss.backward()
                return loss    
                          
              optimizer.step(closure)

            train_loss += loss.item()*batch_size
            _, predicted = torch.max(outputs.data, 1)
            correct += predicted.eq(targets.data).cpu().sum().item()

    elif isinstance(criterion, nn.MSELoss):
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            batch_size = inputs.size(0)
            total += batch_size
            one_hot_targets = torch.FloatTensor(batch_size, 10).zero_()
            one_hot_targets = one_hot_targets.scatter_(1, targets.view(batch_size, 1), 1.0)
            one_hot_targets = one_hot_targets.float()
            if use_cuda:
                inputs, one_hot_targets = inputs.cuda(), one_hot_targets.cuda()
            inputs, one_hot_targets = Variable(inputs), Variable(one_hot_targets)
            
            
            if optimizer_val=='sgd' or optimizer_val=='adam':
              outputs = F.softmax(net(inputs))
              loss = criterion(outputs, one_hot_targets)

              loss.backward()
              optimizer.step()

            else:
              def closure():
                optimizer.zero_grad()
                outputs = F.softmax(net(inputs))
                loss = criterion(outputs, one_hot_targets)
                loss.backward()
                return loss    
              
              loss = optimizer.step(closure)
              outputs = F.softmax(net(inputs))
            train_loss += loss.item()*batch_size
            _, predicted = torch.max(outputs.data, 1)
            correct += predicted.cpu().eq(targets).cpu().sum().item()

    return train_loss/total, 100 - 100.*correct/total


In [None]:
def test(testloader, net, criterion, use_cuda=True):
    net.eval()
    test_loss = 0
    correct = 0
    total = 0

    if isinstance(criterion, nn.CrossEntropyLoss):
        for batch_idx, (inputs, targets) in enumerate(testloader):
            batch_size = inputs.size(0)
            total += batch_size

            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()
            inputs, targets = Variable(inputs), Variable(targets)
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item()*batch_size
            _, predicted = torch.max(outputs.data, 1)
            correct += predicted.eq(targets.data).cpu().sum().item()

    elif isinstance(criterion, nn.MSELoss):
        for batch_idx, (inputs, targets) in enumerate(testloader):
            batch_size = inputs.size(0)
            total += batch_size

            one_hot_targets = torch.FloatTensor(batch_size, 10).zero_()
            one_hot_targets = one_hot_targets.scatter_(1, targets.view(batch_size, 1), 1.0)
            one_hot_targets = one_hot_targets.float()
            if use_cuda:
                inputs, one_hot_targets = inputs.cuda(), one_hot_targets.cuda()
            inputs, one_hot_targets = Variable(inputs), Variable(one_hot_targets)
            outputs = F.softmax(net(inputs))
            loss = criterion(outputs, one_hot_targets)
            test_loss += loss.item()*batch_size
            _, predicted = torch.max(outputs.data, 1)
            correct += predicted.cpu().eq(targets).cpu().sum().item()

    return test_loss/total, 100 - 100.*correct/total

In [None]:
def main(lr_val, start_epoch):
  random.seed(rand_seed)
  np.random.seed(rand_seed)
  torch.manual_seed(rand_seed)

  if use_cuda:
    torch.cuda.manual_seed_all(rand_seed)
    cudnn.benchmark = True

  
  if not os.path.isdir(save):
    os.mkdir(save)

  save_folder = name_save_folder()

  if not os.path.exists('trained_nets/' + save_folder):
    os.makedirs('trained_nets/' + save_folder)

  f = open('trained_nets/' + save_folder + '/log.out', 'a')

  trainloader, testloader = get_data_loaders(trainloader_val, testloader_val)


  if label_corrupt_prob and not resume_model:
        torch.save(trainloader, 'trained_nets/' + save_folder + '/trainloader.dat')
        torch.save(testloader, 'trained_nets/' + save_folder + '/testloader.dat')

  

  # Model
  if resume_model:
      # Load checkpoint.
      print('==> Resuming from checkpoint..')
      checkpoint = torch.load(resume_model)
      if optimizer_val=='sgd':  #check if its first time; since first time we use sgd
        net = load(model)
      else:
        net = pytorch_model

      net.load_state_dict(checkpoint['state_dict'])
      start_epoch = checkpoint['epoch'] + 1

  else:
      if optimizer_val=='sgd':  #check if its first time; since first time we use sgd
        net = load(model)
      else:
        net = pytorch_model
    
    

  
  if ngpu > 1:
        net = torch.nn.DataParallel(net)

  
  criterion = nn.CrossEntropyLoss()


  if loss_name == 'crossentropy':
        criterion = nn.MSELoss()


  if use_cuda:
        net.cuda()
        criterion = criterion.cuda()

  

  # Optimizer
  if optimizer_val == 'sgd':
      optimizer = optim.SGD(net.parameters(), lr=lr_val, momentum=momentum, weight_decay=weight_decay, nesterov=True)
  elif optimizer_val=='adam':
      optimizer = optim.Adam(net.parameters(), lr=lr_val, weight_decay=weight_decay)
  else:
      optimizer = optim.LBFGS(net.parameters(), lr=lr_val)


  
  if resume_opt:
    checkpoint_opt = torch.load(resume_opt)
    optimizer.load_state_dict(checkpoint_opt['optimizer'])

  

  # record the performance of initial model
  if not resume_model:
      train_loss, train_err = test(trainloader, net, criterion, use_cuda)
      test_loss, test_err = test(testloader, net, criterion, use_cuda)
      status = 'e: %d loss: %.5f train_err: %.3f test_top1: %.3f test_loss %.5f \n' % (0, train_loss, train_err, test_err, test_loss)
      print(status)
      f.write(status)
      
      state = {
          'acc': 100 - test_err,
          'epoch': 0,
          'state_dict': net.module.state_dict() if ngpu > 1 else net.state_dict()
      }
      opt_state = {
          'optimizer': optimizer.state_dict()
      }
      torch.save(state, 'trained_nets/' + save_folder + '/model_0.t7')
      torch.save(opt_state, 'trained_nets/' + save_folder + '/opt_state_0.t7')

      
  
  for epoch in range(start_epoch, epochs + 1):
        loss, train_err = train(trainloader, net, criterion, optimizer, use_cuda)
        test_loss, test_err = test(testloader, net, criterion, use_cuda)
        status = 'e: %d loss: %.5f train_err: %.3f test_top1: %.3f test_loss %.5f \n' % (epoch, loss, train_err, test_err, test_loss)
        print(status)
        f.write(status)
        

        # Save checkpoint.
        acc = 100 - test_err
        if epoch == 1 or epoch % save_epoch == 0 or epoch == 150:
            state = {
                'acc': acc,
                'epoch': epoch,
                'state_dict': net.module.state_dict() if ngpu > 1 else net.state_dict(),
            }
            opt_state = {
                'optimizer': optimizer.state_dict()
            }
            torch.save(state, 'trained_nets/' + save_folder + '/model_' + str(epoch) + '.t7')
            torch.save(opt_state, 'trained_nets/' + save_folder + '/opt_state_' + str(epoch) + '.t7')

            

        if int(epoch) == 150 or int(epoch) == 225 or int(epoch) == 275:
            lr_val *= lr_decay
            for param_group in optimizer.param_groups:
                param_group['lr'] *= lr_decay

  torch.save(net.state_dict(),'model_'+optimizer_val+"_" '.pt')

  f.close()

In [None]:
main(lr_val, start_epoch)

Files already downloaded and verified
Files already downloaded and verified




e: 0 loss: 0.09000 train_err: 89.748 test_top1: 89.390 test_loss 0.09000 





e: 1 loss: 0.11360 train_err: 88.188 test_top1: 88.130 test_loss 0.15573 

e: 2 loss: 0.17713 train_err: 89.716 test_top1: 88.910 test_loss 0.17781 



In [None]:

def setmodel(optimizer_val):
  pytorch_model =  ResNet18()

  #/content/model_sgd_.pt
  pytorch_model.load_state_dict(torch.load('/content/model_'+optimizer_val+'_.pt'))
  pytorch_model.eval()

  return pytorch_model



In [None]:
pytorch_model = setmodel('sgd')
optimizer_val = 'adam'
main(lr_val, start_epoch)

Files already downloaded and verified
Files already downloaded and verified




e: 0 loss: 0.17854 train_err: 89.270 test_top1: 88.910 test_loss 0.17781 





e: 1 loss: 0.18003 train_err: 90.020 test_top1: 90.000 test_loss 0.18000 

e: 2 loss: 0.18000 train_err: 90.000 test_top1: 90.000 test_loss 0.18000 



In [None]:
pytorch_model = setmodel('adam')
optimizer_val = 'LBFGS'
main(lr_val, start_epoch)

Files already downloaded and verified
Files already downloaded and verified




e: 0 loss: 0.18000 train_err: 90.000 test_top1: 90.000 test_loss 0.18000 





e: 1 loss: 0.18000 train_err: 90.000 test_top1: 90.000 test_loss 0.18000 

e: 2 loss: 0.18000 train_err: 90.000 test_top1: 90.000 test_loss 0.18000 

