In [None]:
import torch
from torchvision import datasets
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


## Mounting Google Drive

In [None]:
from google.colab import drive
drive.mount('/gdrive/')
!ls /gdrive

Mounted at /gdrive/
MyDrive


## Extracting The Dataset From a Zip File


In [None]:
import os
import zipfile

BASE_PATH = '/gdrive/My Drive/colab_files/'
CK_PATH = '/gdrive/My Drive/colab_files/cks'

if not os.path.exists(BASE_PATH):
  os.makedirs(BASE_PATH)

# add checkpoints to store the history of training
if not os.path.exists(CK_PATH):
  os.makedirs(CK_PATH)

if not os.path.exists('extracted_birds'):
  !mkdir extracted_birds
  os.chdir('extracted_birds')
  # create an object to manipulate the zip
  zip_file = zipfile.ZipFile(os.path.join(BASE_PATH, 'birds.zip'))
  # to extract all files inside
  zip_file.extractall()
  
  # switch back to the parent directory
  os.chdir('..')

# verify that the extracted_birds directory is there
!ls

FileNotFoundError: ignored

## Getting Image Size
**This cell should not be ran for training a model**, and is only for testing purpose. Checking the size of sample images in the given dataset.

### Results
See output below. This can roughly tell us what size to choose for `transforms.RandomCrop()`


In [None]:
from PIL import Image

TEST_PATH = 'extracted_birds/birds/test/0'
# Read a PIL image
for img_file in os.listdir(TEST_PATH):
  print(f'image file name is : {img_file}')
  # Read a PIL image
  image = Image.open(os.path.join(TEST_PATH, img_file))

  # Define a transform to convert PIL 
  # image to a Torch tensor
  transform = transforms.Compose([
      transforms.PILToTensor()
  ])

  # Convert the PIL image to Torch tensor
  img_tensor = transform(image)
  img_size = transforms.functional.get_image_size(img_tensor)
  print(f'image width: {img_size[0]}, image height: {img_size[1]}')

image file name is : 81d5cdbb709e4ffd8b1c29e0f616780a.jpg
image width: 1024, image height: 725
image file name is : 626e0b90838a41d9b71df171d23157cf.jpg
image width: 800, image height: 800
image file name is : ee217e5a95f54e97995eb2e334405154.jpg
image width: 800, image height: 548
image file name is : 0133dc8543f44c1fbb5a7941a286b1db.jpg
image width: 1024, image height: 680
image file name is : 76f84b5eb6334474a2d323a3ba7efc24.jpg
image width: 1024, image height: 606
image file name is : 1131a4c5f40f4636a4be4fed9e83b4c8.jpg
image width: 1024, image height: 683
image file name is : 9d7268bdebc24d8cbe09d2734961ba14.jpg
image width: 573, image height: 860
image file name is : dcbdbf9fd8a14e3bbc36b4ff1327461e.jpg
image width: 1008, image height: 820
image file name is : 69034a46663b4e0eb8db02cf696ebe8d.jpg
image width: 1024, image height: 831
image file name is : 7bed42d90d8843bfb6f96083fc50ea23.jpg
image width: 1024, image height: 768
image file name is : d7ffcb4b168444e7a2fd15eb1f7d4a23

KeyboardInterrupt: ignored

## Process Dataset and Data Augmentation

### Potential parameters to experiment:

* img_size
* batch_size

In [None]:
def get_birds_data(augmentation=0, img_size=128, batch_size=64):
  transform_train = transforms.Compose([
    transforms.Resize(img_size),
    transforms.RandomCrop(img_size, padding=8, padding_mode='edge'),
    transforms.RandomHorizontalFlip(),    # 50% of time flip image along y-axis
    transforms.ToTensor(),
    # using imagenet mean and std 
    # https://pytorch.org/hub/pytorch_vision_resnet/ mentions these values
    # https://stackoverflow.com/questions/58151507/why-pytorch-officially-use-mean-0-485-0-456-0-406-and-std-0-229-0-224-0-2
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
  ])

  transform_test = transforms.Compose([
    transforms.Resize(img_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
  ])

  # using imageFolder 
  # https://pytorch.org/vision/main/generated/torchvision.datasets.DatasetFolder.html#torchvision.datasets.DatasetFolder
  train_ds = datasets.ImageFolder(root='extracted_birds/birds/train', transform=transform_train)
  # Random split, 90% train set and 10% validation set
  train_set_size = int(len(train_ds) * 0.9)
  valid_set_size = len(train_ds) - train_set_size
  # fix the random seed to find patterns with hyperparameters and train better
  train_set, valid_set = random_split(train_ds, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(47))
  trainloader = DataLoader(train_set, batch_size=batch_size, shuffle=True,
                                            num_workers=2)
  validloader = DataLoader(valid_set, batch_size=1, shuffle=True,
                                            num_workers=2)

  test_ds = datasets.ImageFolder(root='extracted_birds/birds/test', transform=transform_test)
  # batch size might not need to be as big as the one for trainloader/validloader
  testloader = DataLoader(test_ds, batch_size=1, shuffle=False,
                                          num_workers=2)
  
  # get all classes, since each line represents a name for each class
  classes = open('extracted_birds/birds/names.txt').read().strip().split('\n')
  # train_ds.class_to_idx: dictionary mapping class name to class index 
  # idx is a label (int) of each class (folder name), in our case folder names are also numbers
  # https://github.com/pytorch/vision/blob/caf12f840037193fb3d1e6c60168c37dfa218f43/torchvision/datasets/folder.py#L35
  idx_to_class = {int(idx) : int(class_name) for class_name, idx in train_ds.class_to_idx.items()}
  idx_to_name = {k: classes[v] for k,v in idx_to_class.items()}
  
  return {'train': trainloader, 'valid': validloader, 'test': testloader, 'to_class': idx_to_class, 'to_name':idx_to_name}

# data = get_birds_data()
# print(data['train'].__dict__)
# print(data['test'].__dict__)

### Define Functions Used For Fine-tuning Pre-Trained Model

In [None]:
# add step size and gamma values for scheduler
# add logic for loading checkpoints
# add checkpoint path, checkpoint state parameters
# add start epoch, initialize to 0, only update when there is a checkpoint state

def train(net, dataloader, checkpoint=None, ck_path=None, epochs=1, lr=0.01, 
          momentum=0.9, decay=0.0, verbose=1, step_size=3, gamma=0.1):
  net.to(device)
  net.train()
  start_epoch = 0
  # keep track of used learning rates for plotting
  lrs = []
  losses = []
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=decay)
  # use a step scheduler for learning rate schedules
  # https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.StepLR.html#torch.optim.lr_scheduler.StepLR
  # https://neptune.ai/blog/how-to-choose-a-learning-rate-scheduler
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

  # https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
  if checkpoint:
    net.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch']
    losses = checkpoint['losses']

  for epoch in range(start_epoch, epochs):
    sum_loss = 0.0
    for i, batch in enumerate(dataloader, 0):
      # get the inputs; data is a list of [inputs, labels]
      inputs, labels = batch[0].to(device), batch[1].to(device)

      # zero the parameter gradients
      optimizer.zero_grad()

      # forward + backward + optimize 
      outputs = net(inputs)
      loss = criterion(outputs, labels)
      loss.backward()  # autograd magic, computes all the partial derivatives
      optimizer.step() # takes a step in gradient direction
      

      # print statistics
      losses.append(loss.item())
      sum_loss += loss.item()
      if i % 10 == 9:    # print every 10 mini-batches
        if verbose:
          print('[%d, %5d] loss: %.3f' % (epoch, i + 1, sum_loss / 10))
        sum_loss = 0.0

    if ck_path:
      # save current checkpoint into a pickle file
      torch.save({
                  'epoch': epoch + 1,
                  'model_state_dict': net.state_dict(),
                  'optimizer_state_dict': optimizer.state_dict(),
                  'losses': losses,
                 }, os.path.join(CK_PATH, 'ck-%d.pkl' % (epoch + 1)))
      
    lrs.append(optimizer.param_groups[0]["lr"])
    scheduler.step()
  return losses

def accuracy(net, dataloader):
  net.to(device)
  # turn off specific parts of the model (i.e., dropouts, batchnorm layers)
  # for evaluation so gradients are not used
  net.eval()
  correct = 0
  total = 0
  with torch.no_grad():
    for batch in dataloader:
      images, labels = batch[0].to(device), batch[1].to(device)
      outputs = net(images)
      _, predicted = torch.max(outputs.data, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
  return correct/total

def predict(net, dataloader, ofname):
  out = open(ofname, 'w')
  out.write("path,class\n")
  net.to(device)
  net.eval()
  correct = 0
  total = 0
  with torch.no_grad():
    for i, (images, labels) in enumerate(dataloader, 0):
      if i%100 == 0:
        print(i)
      images, labels = images.to(device), labels.to(device)
      outputs = net(images)
      _, predicted = torch.max(outputs.data, 1)
      fname, _ = dataloader.dataset.samples[i]
      out.write("test/{},{}\n".format(fname.split('/')[-1], data['to_class'][predicted.item()]))
  out.close()

def smooth(x, size):
  return np.convolve(x, np.ones(size)/size, mode='valid')

## Define Pre-Trained Model For Image Classification

In [None]:
# DON'T RERUN THIS CELL
EPOCHS = 15
IMG_SIZE = 128
BATCH_SIZE = 64
STEP_SIZE = 5
GAMMA = 0.1
DECAY = 0.00047

data = get_birds_data(1, IMG_SIZE, BATCH_SIZE)
print(data['train'].__dict__)
print(data['test'].__dict__)

# https://pytorch.org/hub/pytorch_vision_resnet/
net = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
# https://www.pluralsight.com/guides/introduction-to-resnet
# set the number of output features to be the number of classes
net.fc = nn.Linear(net.fc.in_features, 555)

losses = train(net, data['train'], epochs=EPOCHS, decay=DECAY, step_size=STEP_SIZE, gamma=GAMMA)

print("Training   accuracy: %f" % accuracy(net, data['train']))
print("Validation accuracy: %f" % accuracy(net, data['valid']))

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

[1,    10] loss: 6.371
[1,    20] loss: 6.267
[1,    30] loss: 6.174
[1,    40] loss: 5.957
[1,    50] loss: 5.671
[1,    60] loss: 5.370
[1,    70] loss: 5.215
[1,    80] loss: 4.932
[1,    90] loss: 4.740
[1,   100] loss: 4.673
[1,   110] loss: 4.291
[1,   120] loss: 4.355
[1,   130] loss: 4.109
[1,   140] loss: 4.068
[1,   150] loss: 4.027
[1,   160] loss: 3.911
[1,   170] loss: 3.799
[1,   180] loss: 3.671
[1,   190] loss: 3.673
[1,   200] loss: 3.675
[1,   210] loss: 3.607
[1,   220] loss: 3.575
[1,   230] loss: 3.531
[1,   240] loss: 3.441
[1,   250] loss: 3.271
[1,   260] loss: 3.361
[1,   270] loss: 3.262
[1,   280] loss: 3.367
[1,   290] loss: 3.126
[1,   300] loss: 3.185
[1,   310] loss: 3.137
[1,   320] loss: 3.164
[1,   330] loss: 3.013
[1,   340] loss: 3.030
[1,   350] loss: 2.803
[1,   360] loss: 2.995
[1,   370] loss: 2.949
[1,   380] loss: 3.017
[1,   390] loss: 2.952
[1,   400] loss: 2.935
[1,   410] loss: 2.832
[1,   420] loss: 2.875
[1,   430] loss: 2.868
[1,   440] 

KeyError: ignored

In [None]:
# DON'T RERUN THIS CELL
EPOCHS = 10
IMG_SIZE = 256
BATCH_SIZE = 64
STEP_SIZE = 3
GAMMA = 0.1
DECAY = 0.00047

data = get_birds_data(1, IMG_SIZE, BATCH_SIZE)
# https://pytorch.org/hub/pytorch_vision_resnet/
net = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
# https://www.pluralsight.com/guides/introduction-to-resnet
# set the number of output features to be the number of classes
net.fc = nn.Linear(net.fc.in_features, 555)

losses = train(net, data['train'], ck_path=CK_PATH, epochs=EPOCHS, decay=DECAY, step_size=STEP_SIZE, gamma=GAMMA)

print("Training   accuracy: %f" % accuracy(net, data['train']))
print("Validation accuracy: %f" % accuracy(net, data['valid']))
print("Testing accuracy: %f" % accuracy(net, data['test']))

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

[1,    10] loss: 6.357
[1,    20] loss: 6.310
[1,    30] loss: 6.218
[1,    40] loss: 6.004
[1,    50] loss: 5.724
[1,    60] loss: 5.589
[1,    70] loss: 5.348
[1,    80] loss: 5.064
[1,    90] loss: 4.935
[1,   100] loss: 4.747
[1,   110] loss: 4.539
[1,   120] loss: 4.382
[1,   130] loss: 4.260
[1,   140] loss: 3.973
[1,   150] loss: 3.918
[1,   160] loss: 3.827
[1,   170] loss: 3.757
[1,   180] loss: 3.575
[1,   190] loss: 3.578
[1,   200] loss: 3.474
[1,   210] loss: 3.371
[1,   220] loss: 3.283
[1,   230] loss: 3.317
[1,   240] loss: 3.095
[1,   250] loss: 3.095
[1,   260] loss: 3.035
[1,   270] loss: 2.899
[1,   280] loss: 2.844
[1,   290] loss: 2.816
[1,   300] loss: 2.778
[1,   310] loss: 2.639
[1,   320] loss: 2.636
[1,   330] loss: 2.528
[1,   340] loss: 2.722
[1,   350] loss: 2.609
[1,   360] loss: 2.509
[1,   370] loss: 2.489
[1,   380] loss: 2.523
[1,   390] loss: 2.368
[1,   400] loss: 2.290
[1,   410] loss: 2.359
[1,   420] loss: 2.212
[1,   430] loss: 2.267
[1,   440] 

In [None]:
# DON'T RERUN THIS CELL
# Load model from checkpoint
net = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
net.fc = nn.Linear(net.fc.in_features, 555) # This will reinitialize the layer as well
state = torch.load(CK_PATH + '/Iter1/ck-10.pkl')
print(type(net))
net.load_state_dict(state['model_state_dict'])
# predict(net, data['valid'], CK_PATH + "preds.csv")
predict(net, data['test'], CK_PATH + "preds.csv")

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


<class 'torchvision.models.resnet.ResNet'>
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8100
8200
8300
8400
8500
8600
8700
8800
8900
9000
9100
9200
9300
9400
9500
9600
9700
9800
9900


In [None]:
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
optimizer.load_state_dict(state['optimizer_state_dict'])
epoch = state['epoch']
loss = state['loss']

print(f'epoch: {epoch}')
print(f'loss: {loss}')
print(net.eval())
#print("Testing accuracy: %f" % accuracy(net, data['test']))

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


epoch: 10
loss: 0.7611852884292603
ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 2

In [None]:
# TODO: train for 1 epoch and check for test accuracy, whether there is a huge difference
EPOCHS = 1
IMG_SIZE = 256
BATCH_SIZE = 64
STEP_SIZE = 3
GAMMA = 0.1
DECAY = 0.00047

data = get_birds_data(img_size=IMG_SIZE, batch_size=BATCH_SIZE)
# print(data['train'].__dict__)
# print(data['test'].__dict__)

net = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
net.fc = nn.Linear(net.fc.in_features, 555)

losses = train(net, data['train'], ck_path=CK_PATH, epochs=EPOCHS, decay=DECAY, step_size=STEP_SIZE, gamma=GAMMA)

print("Training   accuracy: %f" % accuracy(net, data['train']))
print("Validation accuracy: %f" % accuracy(net, data['valid']))
print("Testing accuracy: %f" % accuracy(net, data['test']))

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


[0,    10] loss: 6.354
[0,    20] loss: 6.336
[0,    30] loss: 6.169
[0,    40] loss: 6.053
[0,    50] loss: 5.774
[0,    60] loss: 5.550
[0,    70] loss: 5.326
[0,    80] loss: 5.189
[0,    90] loss: 4.935
[0,   100] loss: 4.654
[0,   110] loss: 4.562
[0,   120] loss: 4.483
[0,   130] loss: 4.215
[0,   140] loss: 4.078
[0,   150] loss: 3.915
[0,   160] loss: 3.760
[0,   170] loss: 3.749
[0,   180] loss: 3.630
[0,   190] loss: 3.537
[0,   200] loss: 3.380
[0,   210] loss: 3.404
[0,   220] loss: 3.328
[0,   230] loss: 3.122
[0,   240] loss: 3.102
[0,   250] loss: 3.075
[0,   260] loss: 3.012
[0,   270] loss: 2.980
[0,   280] loss: 2.897
[0,   290] loss: 2.969
[0,   300] loss: 2.809
[0,   310] loss: 2.750
[0,   320] loss: 2.714
[0,   330] loss: 2.613
[0,   340] loss: 2.735
[0,   350] loss: 2.629
[0,   360] loss: 2.577
[0,   370] loss: 2.482
[0,   380] loss: 2.497
[0,   390] loss: 2.433
[0,   400] loss: 2.393
[0,   410] loss: 2.351
[0,   420] loss: 2.342
[0,   430] loss: 2.348
[0,   440] 

In [None]:
# TODO: train for 1 epoch and check for test accuracy, whether there is a huge difference
EPOCHS = 10
IMG_SIZE = 256
BATCH_SIZE = 64
STEP_SIZE = 3
GAMMA = 0.3
DECAY = 0.00047

data = get_birds_data(img_size=IMG_SIZE, batch_size=BATCH_SIZE)
# print(data['train'].__dict__)
# print(data['test'].__dict__)

net = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
net.fc = nn.Linear(net.fc.in_features, 555)

losses = train(net, data['train'], ck_path=CK_PATH, epochs=EPOCHS, decay=DECAY, step_size=STEP_SIZE, gamma=GAMMA)

print("Training   accuracy: %f" % accuracy(net, data['train']))
print("Validation accuracy: %f" % accuracy(net, data['valid']))
print("Testing accuracy: %f" % accuracy(net, data['test']))

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

[0,    10] loss: 6.330
[0,    20] loss: 6.299
[0,    30] loss: 6.200
[0,    40] loss: 5.960
[0,    50] loss: 5.796
[0,    60] loss: 5.510
[0,    70] loss: 5.315
[0,    80] loss: 5.133
[0,    90] loss: 5.004
[0,   100] loss: 4.783
[0,   110] loss: 4.606
[0,   120] loss: 4.342
[0,   130] loss: 4.262
[0,   140] loss: 4.029
[0,   150] loss: 3.982
[0,   160] loss: 3.781
[0,   170] loss: 3.727
[0,   180] loss: 3.613
[0,   190] loss: 3.565
[0,   200] loss: 3.479
[0,   210] loss: 3.329
[0,   220] loss: 3.328
[0,   230] loss: 3.279
[0,   240] loss: 3.128
[0,   250] loss: 3.066
[0,   260] loss: 3.023
[0,   270] loss: 2.957
[0,   280] loss: 2.876
[0,   290] loss: 2.776
[0,   300] loss: 2.940
[0,   310] loss: 2.616
[0,   320] loss: 2.689
[0,   330] loss: 2.623
[0,   340] loss: 2.613
[0,   350] loss: 2.397
[0,   360] loss: 2.503
[0,   370] loss: 2.528
[0,   380] loss: 2.463
[0,   390] loss: 2.592
[0,   400] loss: 2.359
[0,   410] loss: 2.513
[0,   420] loss: 2.325
[0,   430] loss: 2.402
[0,   440] 

In [None]:
net = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
net.fc = nn.Linear(net.fc.in_features, 555) # This will reinitialize the layer as well
state = torch.load(CK_PATH + '/ck-10.pkl')
print(type(net))
net.load_state_dict(state['model_state_dict'])
# predict(net, data['valid'], CK_PATH + "preds.csv")
predict(net, data['test'], CK_PATH + "preds1.csv")

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


<class 'torchvision.models.resnet.ResNet'>
0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500
7600
7700
7800
7900
8000
8100
8200
8300
8400
8500
8600
8700
8800
8900
9000
9100
9200
9300
9400
9500
9600
9700
9800
9900


In [None]:
# TODO: train for 1 epoch and check for test accuracy, whether there is a huge difference
EPOCHS = 10
IMG_SIZE = 256
BATCH_SIZE = 64
STEP_SIZE = 3
GAMMA = 0.5
DECAY = 0.00047

data = get_birds_data(img_size=IMG_SIZE, batch_size=BATCH_SIZE)
# print(data['train'].__dict__)
# print(data['test'].__dict__)

net = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
net.fc = nn.Linear(net.fc.in_features, 555)

losses = train(net, data['train'], ck_path=CK_PATH, epochs=EPOCHS, decay=DECAY, step_size=STEP_SIZE, gamma=GAMMA)

print("Training   accuracy: %f" % accuracy(net, data['train']))
print("Validation accuracy: %f" % accuracy(net, data['valid']))
print("Testing accuracy: %f" % accuracy(net, data['test']))

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


[0,    10] loss: 6.337
[0,    20] loss: 6.299
[0,    30] loss: 6.186
[0,    40] loss: 5.963
[0,    50] loss: 5.775
[0,    60] loss: 5.671
[0,    70] loss: 5.323
[0,    80] loss: 5.238
[0,    90] loss: 4.980
[0,   100] loss: 4.782
[0,   110] loss: 4.457
[0,   120] loss: 4.338
[0,   130] loss: 4.258
[0,   140] loss: 4.104
[0,   150] loss: 4.020
[0,   160] loss: 3.806
[0,   170] loss: 3.671
[0,   180] loss: 3.606
[0,   190] loss: 3.545
[0,   200] loss: 3.456
[0,   210] loss: 3.282
[0,   220] loss: 3.337
[0,   230] loss: 3.338
[0,   240] loss: 3.145
[0,   250] loss: 3.049
[0,   260] loss: 2.959
[0,   270] loss: 2.878
[0,   280] loss: 2.836
[0,   290] loss: 2.844
[0,   300] loss: 2.788
[0,   310] loss: 2.679
[0,   320] loss: 2.723
[0,   330] loss: 2.780
[0,   340] loss: 2.666
[0,   350] loss: 2.523
[0,   360] loss: 2.547
[0,   370] loss: 2.413
[0,   380] loss: 2.524
[0,   390] loss: 2.406
[0,   400] loss: 2.391
[0,   410] loss: 2.300
[0,   420] loss: 2.462
[0,   430] loss: 2.344
[0,   440] 

In [None]:
# add step size and gamma values for scheduler
# add logic for loading checkpoints
# add checkpoint path, checkpoint state parameters
# add start epoch, initialize to 0, only update when there is a checkpoint state

def train(net, dataloader, checkpoint=None, ck_path=None, epochs=1, lr=0.01, 
          momentum=0.9, decay=0.0, verbose=1, step_size=3, gamma=0.1):
  net.to(device)
  net.train()
  start_epoch = 0
  # keep track of used learning rates for plotting
  lrs = []
  losses = []
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=decay)
  # use a step scheduler for learning rate schedules
  # https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.StepLR.html#torch.optim.lr_scheduler.StepLR
  # https://neptune.ai/blog/how-to-choose-a-learning-rate-scheduler
  scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

  # https://pytorch.org/tutorials/recipes/recipes/saving_and_loading_a_general_checkpoint.html
  if checkpoint:
    net.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch']
    losses = checkpoint['losses']

  for epoch in range(start_epoch, epochs):
    sum_loss = 0.0
    for i, batch in enumerate(dataloader, 0):
      # get the inputs; data is a list of [inputs, labels]
      inputs, labels = batch[0].to(device), batch[1].to(device)

      # zero the parameter gradients
      optimizer.zero_grad()

      # forward + backward + optimize 
      outputs = net(inputs)
      loss = criterion(outputs, labels)
      loss.backward()  # autograd magic, computes all the partial derivatives
      optimizer.step() # takes a step in gradient direction
      

      # print statistics
      losses.append(loss.item())
      sum_loss += loss.item()
      if i % 10 == 9:    # print every 10 mini-batches
        if verbose:
          print('[%d, %5d] loss: %.3f' % (epoch, i + 1, sum_loss / 10))
        sum_loss = 0.0

    if ck_path:
      # save current checkpoint into a pickle file
      torch.save({
                  'epoch': epoch + 1,
                  'model_state_dict': net.state_dict(),
                  'optimizer_state_dict': optimizer.state_dict(),
                  'losses': losses,
                 }, os.path.join(CK_PATH, 'ck-%d.pkl' % (epoch + 1)))
      
    lrs.append(optimizer.param_groups[0]["lr"])
    scheduler.step()
  return losses

In [None]:
# TODO: train for 1 epoch and check for test accuracy, whether there is a huge difference
EPOCHS = 10
IMG_SIZE = 256
BATCH_SIZE = 64
STEP_SIZE = 3
GAMMA = 0.3
DECAY = 0.00047

data = get_birds_data(img_size=IMG_SIZE, batch_size=BATCH_SIZE)
# print(data['train'].__dict__)
# print(data['test'].__dict__)

net = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
net.fc = nn.Linear(net.fc.in_features, 555)

losses = train(net, data['train'], ck_path=CK_PATH, epochs=EPOCHS, decay=DECAY, step_size=STEP_SIZE, gamma=GAMMA)

print("Training   accuracy: %f" % accuracy(net, data['train']))
print("Validation accuracy: %f" % accuracy(net, data['valid']))
print("Testing accuracy: %f" % accuracy(net, data['test']))

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


RuntimeError: ignored

In [None]:
EPOCHS = 20
IMG_SIZE = 256
BATCH_SIZE = 32
DECAY = 0.00047
# Load model fraom checkpoint
net = torch.hub.load('pytorch/vision:v0.10.0', 'resnext101_32x8d', pretrained=True)
net.fc = nn.Linear(net.fc.in_features, 555) # This will reinitialize the layer as well
state = torch.load(BASE_PATH + '/ck-10.pkl')
net.load_state_dict(state['model_state_dict'])
data = get_birds_data(img_size=IMG_SIZE, batch_size=BATCH_SIZE)
# predict(net, data['valid'], CK_PATH + "preds.csv")
predict(net, data['test'], CK_PATH + "preds.csv")

In [None]:
def train(net, dataloader, checkpoint=None, ck_path=None, epochs=1, lr=0.01, 
          momentum=0.9, decay=0.0, verbose=1, schedule={}):
  net.to(device)
  net.train()
  start_epoch = 0
  losses = []
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay=decay)

  if checkpoint:
    net.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch']
    losses = checkpoint['losses']
  
  # Fast forward lr schedule through already trained epochs
  for epoch in range(start_epoch):
    if epoch in schedule:
      print ("Learning rate: %f"% schedule[epoch])
      for g in optimizer.param_groups:
        g['lr'] = schedule[epoch]

  for epoch in range(start_epoch, epochs):
    sum_loss = 0.0

    # Update learning rate when scheduled
    if epoch in schedule:
      print ("Learning rate: %f"% schedule[epoch])
      for g in optimizer.param_groups:
        g['lr'] = schedule[epoch]
    
    for i, batch in enumerate(dataloader, 0):
      # get the inputs; data is a list of [inputs, labels]
      inputs, labels = batch[0].to(device), batch[1].to(device)

      # zero the parameter gradients
      optimizer.zero_grad()

      # forward + backward + optimize 
      outputs = net(inputs)
      loss = criterion(outputs, labels)
      loss.backward()  # autograd magic, computes all the partial derivatives
      optimizer.step() # takes a step in gradient direction
      

      # print statistics
      losses.append(loss.item())
      sum_loss += loss.item()
      if i % 10 == 9:    # print every 10 mini-batches
        if verbose:
          print('[%d, %5d] loss: %.3f' % (epoch, i + 1, sum_loss / 10))
        sum_loss = 0.0

    if ck_path:
      # save current checkpoint into a pickle file
      torch.save({
                  'epoch': epoch + 1,
                  'model_state_dict': net.state_dict(),
                  'optimizer_state_dict': optimizer.state_dict(),
                  'losses': losses,
                 }, os.path.join(CK_PATH, 'ck-%d.pkl' % (epoch + 1)))
  return losses

In [None]:
# Changed train() function so that it doesn't use stepLR
EPOCHS = 20
IMG_SIZE = 256
BATCH_SIZE = 32
DECAY = 0.00047

data = get_birds_data(img_size=IMG_SIZE, batch_size=BATCH_SIZE)

net = torch.hub.load('pytorch/vision:v0.10.0', 'resnet152', pretrained=True)
net.fc = nn.Linear(net.fc.in_features, 555)

losses = train(net, data['train'], ck_path=CK_PATH, epochs=EPOCHS, decay=DECAY, schedule={0: 0.01, 3: 0.0075, 5: 0.005, 7: 0.0025, 9: 0.001})

print("Training   accuracy: %f" % accuracy(net, data['train']))
print("Validation accuracy: %f" % accuracy(net, data['valid']))

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


Learning rate: 0.010000
[0,    10] loss: 6.356
[0,    20] loss: 6.347
[0,    30] loss: 6.315
[0,    40] loss: 6.144
[0,    50] loss: 5.989
[0,    60] loss: 5.724
[0,    70] loss: 5.542
[0,    80] loss: 5.307
[0,    90] loss: 5.213
[0,   100] loss: 5.144
[0,   110] loss: 4.967
[0,   120] loss: 4.912
[0,   130] loss: 4.737
[0,   140] loss: 4.558
[0,   150] loss: 4.548
[0,   160] loss: 4.220
[0,   170] loss: 4.237
[0,   180] loss: 4.240
[0,   190] loss: 4.154
[0,   200] loss: 3.939
[0,   210] loss: 4.064
[0,   220] loss: 3.744
[0,   230] loss: 3.829
[0,   240] loss: 3.673
[0,   250] loss: 3.650
[0,   260] loss: 3.580
[0,   270] loss: 3.511
[0,   280] loss: 3.697
[0,   290] loss: 3.322
[0,   300] loss: 3.505
[0,   310] loss: 3.321
[0,   320] loss: 3.295
[0,   330] loss: 3.152
[0,   340] loss: 3.464
[0,   350] loss: 3.307
[0,   360] loss: 3.215
[0,   370] loss: 3.096
[0,   380] loss: 3.092
[0,   390] loss: 3.064
[0,   400] loss: 2.820
[0,   410] loss: 3.096
[0,   420] loss: 2.940
[0,   430]

In [None]:
# Lost progress at epoch 17 for the model trained above, load from there
EPOCHS = 20
IMG_SIZE = 256
BATCH_SIZE = 32
DECAY = 0.00047

data = get_birds_data(img_size=IMG_SIZE, batch_size=BATCH_SIZE)
net = torch.hub.load('pytorch/vision:v0.10.0', 'resnet152', pretrained=True)
net.fc = nn.Linear(net.fc.in_features, 555) # This will reinitialize the layer as well
state = torch.load(CK_PATH + '/ck-17.pkl')
losses = train(net, data['train'], checkpoint=state, ck_path=CK_PATH, epochs=EPOCHS, decay=DECAY, schedule={0: 0.01, 3: 0.0075, 5: 0.005, 7: 0.0025, 9: 0.001})
print("Training   accuracy: %f" % accuracy(net, data['train']))
print("Validation accuracy: %f" % accuracy(net, data['valid']))