In [20]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import copy

In [None]:
# during training the loss function changes
# there will be huge fluctuations
# it is desirable to store the copies of the entire model with 132M parameters
# when the loss is minimum for every iteration (not for every 100th iteration which it shows but even for in between iterations)
# copy package allows to store the copies of deep structures in python
# once the training is completed, restore or load the best model and rerun the iterations from the restored best model
# reduce the learning rate and try to learn further from that point onwards

In [3]:
# to find out whether there is actuall GPU or not in this particular instance
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [4]:
# declare transform_train, transform_test objects
transform_train = transforms.Compose([transforms.RandomResizedCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

transform_test = transforms.Compose([transforms.RandomResizedCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

In [5]:
trainset = torchvision.datasets.CIFAR10(root='./data',
                                        train=True,
                                        download=True,
                                        transform = transform_train)
testset = torchvision.datasets.CIFAR10(root='./data',
                                       train=False,
                                       download=True,
                                       transform=transform_test)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [18]:
num_classes = 10

In [7]:
trainloader = torch.utils.data.DataLoader(trainset,batch_size=batch_size,shuffle=True)

In [13]:
from torchvision import models
vgg = models.vgg16_bn(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg16_bn-6c64b313.pth" to /root/.cache/torch/hub/checkpoints/vgg16_bn-6c64b313.pth


HBox(children=(FloatProgress(value=0.0, max=553507836.0), HTML(value='')))




In [14]:
batch_size = 16

trainloader = torch.utils.data.DataLoader(trainset,batch_size=batch_size,shuffle=True)
testloader = torch.utils.data.DataLoader(testset,batch_size=batch_size,shuffle=False)

In [15]:
def evaluation(dataloader,model):
  total, correct = 0,0
  for data in dataloader:
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)
    outputs = model(inputs)
    _,pred = torch.max(outputs.data,1)
    total += labels.size(0)
    correct += (pred == labels).sum().item()
    return 100*correct/total

In [17]:
final_in_features = vgg.classifier[6].in_features
vgg.classifier[6] = nn.Linear(final_in_features,num_classes)

In [19]:
vgg = vgg.to(device)
loss_fn = nn.CrossEntropyLoss()
opt = optim.SGD(vgg.parameters(),lr=0.05)

In [22]:
loss_epoch_arr = []
max_epochs = 1

min_loss = 1000

n_iters = np.ceil(50000/batch_size)

for epoch in range(max_epochs):

  for i, data in enumerate(trainloader,0):

    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)

    opt.zero_grad()

    outputs = vgg(inputs)
    loss = loss_fn(outputs,labels)
    loss.backward()
    opt.step()

    if min_loss > loss.item():
      min_loss = loss.item()
      best_model = copy.deepcopy(vgg.state_dict())
      print('min loss %0.2f ' % min_loss)

    del inputs, labels, outputs
    torch.cuda.empty_cache()

    if i%100 == 0:
      print('Iteration: %d/%d, Loss: %0.2f' %(i, n_iters, loss.item()))

  loss_epoch_arr.append(loss.item())

  # print('Epoch: %d/%d, Test acc: %0.2f, Train acc: %0.2f' %(
  #     epoch,max_epochs,
  #     evaluation(testloader,best_model), evaluation(trainloader,best_model)))
  
  plt.plot(loss_epoch_arr)
  plt.show()

min loss 1.92 
Iteration: 0/3125, Loss: 1.92
min loss 1.61 
min loss 1.34 
min loss 1.21 
Iteration: 100/3125, Loss: 1.68
min loss 1.03 
min loss 0.91 
Iteration: 200/3125, Loss: 1.47
Iteration: 300/3125, Loss: 1.29
min loss 0.70 
min loss 0.57 
Iteration: 400/3125, Loss: 1.55
Iteration: 500/3125, Loss: 1.31
Iteration: 600/3125, Loss: 1.41
min loss 0.55 
min loss 0.48 
Iteration: 700/3125, Loss: 1.36
Iteration: 800/3125, Loss: 1.12
Iteration: 900/3125, Loss: 1.41
Iteration: 1000/3125, Loss: 1.38
Iteration: 1100/3125, Loss: 0.83
Iteration: 1200/3125, Loss: 1.25
Iteration: 1300/3125, Loss: 1.05
Iteration: 1400/3125, Loss: 0.97
Iteration: 1500/3125, Loss: 0.92
Iteration: 1600/3125, Loss: 1.06
min loss 0.45 
Iteration: 1700/3125, Loss: 0.96
min loss 0.38 
Iteration: 1800/3125, Loss: 0.86
Iteration: 1900/3125, Loss: 1.45
Iteration: 2000/3125, Loss: 1.09
Iteration: 2100/3125, Loss: 0.95
Iteration: 2200/3125, Loss: 0.65
min loss 0.25 
Iteration: 2300/3125, Loss: 1.31
Iteration: 2400/3125, Los

TypeError: ignored

In [26]:
# the network will import the weights of all parameters as per the best_model
vgg.load_state_dict(best_model)
print(evaluation(testloader,vgg), evaluation(trainloader,vgg))

75.0 87.5
