In [1]:
from __future__ import print_function
import numpy as np
import torch
import torch.autograd as ag
from utils import mnist_reader
import MNISTtools
import matplotlib.pyplot as plt
import time
from tqdm import tqdm_notebook
from sklearn.metrics import confusion_matrix
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import data_split
import gc
from utils import utils
import copy

In [2]:
# Can also add data augmentation transforms here
train_transform = transforms.Compose([transforms.Resize(224), 
                                      transforms.RandomResizedCrop(224),
                                      transforms.RandomHorizontalFlip(),
                                      transforms.ToTensor(), 
                                      transforms.Lambda(lambda x: torch.cat([x,x,x],0)) ])

# transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))   #transforms.ToPILImage(),
test_transform = transforms.Compose([ transforms.Resize(224), 
                                     transforms.ToTensor(), transforms.Lambda(lambda x: torch.cat([x,x,x],0)) ])

In [3]:
# print('Downloading/Checking for data......')
trainset = torchvision.datasets.FashionMNIST(root='data/downloads', train=True,
                                        download=True, transform=train_transform)       # download=True for the 1st time
testset = torchvision.datasets.FashionMNIST(root='data/downloads', train=False,
                                        download=True, transform=test_transform)        # download=True for the 1st time
train, validation = data_split.train_valid_split(trainset)    # separates 10% for validation

In [4]:
batch_size = 10

In [5]:
trainloader = torch.utils.data.DataLoader(train, batch_size, shuffle=True, num_workers=2)
validloader = torch.utils.data.DataLoader(validation, batch_size, shuffle=False, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size, shuffle=False, num_workers=2)

In [6]:
print('# of batches in training: ',len(trainloader), ',   Total train data: ',len(trainloader)*batch_size)
print('# of batches in test: ',len(testloader), ',   Total test data: ',len(testloader)*batch_size)


# of batches in training:  5400 ,   Total train data:  54000
# of batches in test:  1000 ,   Total test data:  10000


# Loading VGG16 (Batch Normalization) trained on imagenet

In [55]:
model = torchvision.models.vgg16_bn(pretrained='imagenet')

#--------------------------------------------------------------------------------------
# REMEMBER: vgg16.features --> convolutional layers, vgg16.classifier --> FC layers
#--------------------------------------------------------------------------------------

# freeze all parameters in covolutional layers
for parameter in model.features.parameters():
    parameter.require_grad = False

in_ftrs = model.classifier[6].in_features
features = list(model.classifier.children())[:-1]       # Removing last layer to add out 10 units layer
    
features.extend([nn.Linear(in_ftrs, 10)])                # adding out layer with 10 units
model.classifier = nn.Sequential(*features)             # replacing it with the model with new last layer

# Loading  Resnet18 trained on imagenet


In [85]:
model = torchvision.models.resnet18(pretrained='imagenet')

in_ftrs = model.fc.in_features

num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)


# Loading  Squeezenet1_0 trained on imagenet


In [14]:
model = torchvision.models.squeezenet1_0(pretrained='imagenet')
for parameter in model.features.parameters():
    parameter.require_grad = False


model.classifier = nn.Sequential(
    nn.Dropout(p=0.5),
    nn.Conv2d(512, 10, kernel_size=1),
    nn.ReLU(inplace=True),
    nn.AvgPool2d(13)
)
model.forward = lambda x: model.classifier(model.features(x)).view(x.size(0), 10)

In [15]:
print(model)

SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(96, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace)
    )
    (5): Fire(
      (squeeze): Conv2d(128, 32, k

In [16]:
model = model.cuda()

In [17]:
def test(model, dataloader, compute_loss=False, criterion=None):
    correct = 0
    total = 0
    acc_test = 0.0
    total_loss = 0.0
    
    model.train(False)
    model.eval()    
    for i,data in enumerate(dataloader):
        images, labels = data

        images = images.cuda()
        labels = labels.cuda()

        images = ag.Variable(images, volatile=True)
        labels = ag.Variable(labels, volatile=True)

        outputs = model(images)
        
        if compute_loss:
            loss = criterion(outputs, labels)
            total_loss += loss.data[0]
        
        _, predicted = torch.max(outputs.data, 1)
        acc_test += torch.sum(predicted == labels.data)
        total += len(labels.data)
        del outputs, predicted, images, labels
        torch.cuda.empty_cache()
   
    return acc_test*1.0/total, total_loss*1.0/total

In [18]:
test(model,testloader)

(0.0878, 0.0)

In [121]:
def to_file(valid_accs, train_loss_list, valid_loss_list, epoch, best_epoch):
    utils.write_list_to_file('assets/validation_accuracies.txt', valid_accs)
    utils.write_list_to_file('assets/train_loss_list_epoch_'+str(epoch)+'.txt', train_loss_list)
    utils.write_list_to_file('assets/validation_losses.txt', valid_loss_list)
    utils.write_list_to_file('assets/best_epoch.txt', [best_epoch])

# Training the model

In [122]:
model.train(True)
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)


In [124]:
start=time.time()
best_acc=0.0
best_model=None
best_epoch=1
valid_accs=[]
valid_loss_list=[]
loss_list = []

for epoch in range(3):  # loop over the dataset multiple times
    model.train(True)
    running_loss = 0.0
    train_loss_list=[]
    
    for i, data in enumerate(trainloader, 0):       # 0 is just to start i from 0
        
        #% of the dataset 
        #if(i> len(trainloader)/10):
        #    break

        # get the inputs
        inputs, labels = data
        
        inputs = ag.Variable(inputs, requires_grad = False)
        labels = ag.Variable(labels, requires_grad = False)
        
        # transformations
        if torch.cuda.is_available():
            inputs = inputs.cuda()
            labels = labels.cuda()
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        #curr_loss = loss[0].data.cpu().numpy()[0]
        #running_loss += curr_loss
        running_loss += loss
        train_loss_list.append(loss)
        
        del inputs, labels, loss, outputs
        torch.cuda.empty_cache()
        
    valid_acc, avg_valid_loss=test(model, validloader,True, criterion)
    valid_accs.append(valid_acc)
    valid_loss_list.append(avg_valid_loss)
    
    if valid_acc > best_acc:                                          # check for best model
        best_acc = valid_acc
        best_model = copy.deepcopy(model.state_dict())
        best_epoch = epoch+1
        torch.save(best_model, 'assets/Squeezenet_best_model.pt')      # save best model
        
    to_file(valid_accs, train_loss_list, valid_loss_list, epoch+1, best_epoch)
    loss_list+=train_loss_list
    print('epoch: ', epoch+1)
    print('valid acc: ', valid_acc)
    print('Total time taken in training (secs): ',time.time()-start)
print('Total time taken in training (secs): ',time.time()-start)
print('Finished Training')

RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch_1523240155148/work/torch/lib/THC/generic/THCStorage.cu:58

In [None]:
# t=str(time.strftime("%c"))
# utils.write_list_to_file('assets/list_' + str(time.strftime("%c")) + '.txt', l)

# Saving the model (very important)

In [None]:
torch.save(model.state_dict(), 'ResNet_100%.pt')

In [None]:
torch.cuda.empty_cache()

# Load the saved model
To load a saved model and test it on the test set

In [6]:
# load the architecture's skeleton
model = torchvision.models.vgg16_bn(pretrained=False)
for parameter in model.features.parameters():
    parameter.require_grad = False

in_ftrs = model.classifier[6].in_features
features = list(model.classifier.children())[:-1]       # Removing last layer to add out 10 units layer
features.extend([nn.Linear(in_ftrs, 10)])                # adding out layer with 10 units
model.classifier = nn.Sequential(*features)

In [29]:
model = torchvision.models.resnet18(pretrained=False)
in_ftrs = model.fc.in_features
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)


In [30]:
# # load the saved model
model.load_state_dict(torch.load('assets/Resnet_best_model.pt'))
model = model.cuda()

In [31]:
test(model, testloader)

(0.9042, 0.0)

## Plotting Training Loss

In [None]:
#plt.subplot(1,2,1)
#plt.plot(range(len(loss_list)),loss_list)

#plt.xlabel("Batches Trained")
#plt.ylabel("Loss")
#plt.title("Training Loss")
#plt.show()

import seaborn as sns
sns.set()

avg_list = []
for i in range(1,len(loss_list)):
    avg_list.append(sum(loss_list[:i])/(1.0*len(loss_list[:i])))
#plt.subplot(1,2,2)

plt.plot(range(len(valid_loss_list)),valid_loss_list, 'r')
#plt.plot(range(len(valid_loss_list)),[loss_list[i] for i in np.linspace(0,len(loss_list)-1,25, dtype = int)])
plt.xlabel("Number of Epochs")
plt.ylabel("Average Loss")
plt.title("Average Validation Loss ResNet-18 (100% of Data)")
plt.show()

#sns.regplot(x="x", y="f", data=df1, order=2, ax=ax)
#sns.regplot(x="x", y="g", data=df2, order=2, ax=ax2)


In [56]:
print(len(valid_loss_list))
#print(loss_list(np.linspace(0,len(loss_list),25, dtype = int)))
print([loss_list[i] for i in np.linspace(0,len(loss_list)-1,25, dtype = int)])

25
[2.7158625, 0.79797953, 0.9465567, 0.53658503, 0.42776138, 0.83123416, 0.59055316, 0.495147, 0.47180885, 0.70226425, 0.5307456, 0.38417262, 0.5232768, 0.58815706, 0.4003051, 0.5211159, 0.3574661, 0.48919147, 0.37042063, 0.42259374, 0.5711557, 0.3930994, 0.38428846, 0.41211215, 0.568676]


In [None]:
plt.plot(range(len(avg_list)),avg_list)
plt.xlabel("Batches Trained")
plt.ylabel("Average Loss")
plt.title("Average Training Loss ResNet-18 (100% of Data)")
plt.show()

In [None]:
plt.plot(range(len(valid_accs)),valid_accs, 'g')
plt.xlabel("Number of Epochs")
plt.ylabel("Accuracy")
plt.title("Validation Set Accuracy ResNet-18 (10% of Data)")
plt.show()