In [1]:
import numpy as np
import torch
from torch.nn import Module
from torch.nn import Sequential
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import AdaptiveAveragePool2d
from torch.nn import ReLU
from torch.nn import LogSoftmax
from torch.nn import BatchNorm2d
import torch.nn as nn
from torch import flatten
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
def data_loader(data_dir,
                batch_size,
                random_seed = 42,
                valid_size = 0.2,
                shuffle = True,
                test = False):
  
  # replace mean and std with appropriate values to apply normalisation
  normalize = transforms.Normalize(
      mean = [0, 0, 0],
      std = [1, 1, 1]
  )

  # transforms
  transform = transforms.Compose([
      # transforms.Resize((siz1, siz2)),
      transforms.ToTensor(),
      normalize
  ])

  if test:
    dataset = datasets.ImageNet(
      root=data_dir,
      train=False,
      download=True,
      transform=transform,
    )

    data_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=shuffle
    )

    return data_loader

  train_dataset = datasets.ImageNet(
      root=data_dir,
      train=True,
      download=True,
      transform=transform,
  )

  valid_dataset = datasets.ImageNet(
      root=data_dir,
      train=True,
      download=True,
      transform=transform,
  )

  num_train = len(train_dataset)
  indices = list(range(num_train))
  split = int(np.floor(valid_size * num_train))

  if shuffle:
      np.random.seed(random_seed)
      np.random.shuffle(indices)

  train_idx, valid_idx = indices[split:], indices[:split]
  train_sampler = SubsetRandomSampler(train_idx)
  valid_sampler = SubsetRandomSampler(valid_idx)

  train_loader = torch.utils.data.DataLoader(
      train_dataset, batch_size=batch_size, sampler=train_sampler)

  valid_loader = torch.utils.data.DataLoader(
      valid_dataset, batch_size=batch_size, sampler=valid_sampler)

  return (train_loader, valid_loader)


train_loader, valid_loader = data_loader(data_dir = './data',
                                         batch_size = 64)

test_loader = data_loader(data_dir = './data',
                          batch_size = 64,
                          test = True)

In [None]:
def ResNetBlock(Module):
  def __init__(self, inChannels, middleChannels, outChannels, shortcut):
    super(ResNetBlock, self).__init__()

    if shortcut:
      self.shortcut = Sequential(
          Conv2d(inChannels, outChannels, filter_size = 1, stride = 2, padding = 0)
          BatchNorm2d(outChannels)
      )
      self.conv1 = Sequential(
          Conv2d(inChannels, middleChannels, filter_size = 1, stride = 2, padding = 0)
          BatchNorm2d(middleChannels)
          ReLU()
      )
    else:
      self.shortcut = Sequential()
      self.conv1 = Sequential(
          Conv2d(inChannels, middleChannels, filter_size = 1, stride = 1, padding = 0)
          BatchNorm2d(middleChannels)
          ReLU()
      )

    self.conv2 = Sequential(
        Conv2d(middleChannels, middleChannels, filter_size = 3, stride = 1, padding = 2)
        BatchNorm2d(middleChannels)
        ReLU()
    )

    self.conv3 = Sequential(
        Conv2d(middleChannels, outChannels, filter_size = 1, stride = 1, padding = 0)
        BatchNorm2d(outChannels)
    )


  def forward(self, input):
    shortcut = self.shortcut(input)
    input = self.conv1(input)
    input = self.conv2(input)
    input = self.conv3(input)
    input = input + shortcut
    input = ReLU()(input)

    return input


def ResNet50V2(Module):
  def __init__(self, inChannels, classes, resblock, hyperparameters):
    super(ResNet50V2, self).__init__()

    self.image_w = self.hyperparameters['image_w']
    self.image_h = self.hyperparameters['image_h']

    # all conv layers are followed by MaxPool2d and have ReLU activation except the last operation of the block
    # all calculations are based on sample images from ImageNet
    # the inputs to the layers are flexible according to hyperparameters

    # Block-1
    # conv1 - 64, f = 7, p = 3, s = 2, out - 112 x 112 x 64
    # maxpl - 64, f = 3, p = 0, s = 2, out - 56 x 56 x 64
    self.Block1 = Sequential(
        Conv2d(in_channels=inChannels, out_channels=64,
               kernel_size=7, stride=2, padding=3),
        BatchNorm2d(64),
        ReLU(),
        MaxPool2d(kernel_size=3, stride=2, padding=0),
    )

    # Block-2
    # conv1 - 64, f = 1, p = 0, s = 1, out - 56 x 56 x 64
    # conv2 - 64, f = 3, p = 2, s = 1, out - 56 x 56 x 64
    # conv3 - 256, f = 1, p = 0, s = 1, out - 56 x 56 x 256
    # repeat x3
    self.Block2 = Sequential(
        resblock(in_channels, 64, 256, False),
        resblock(256, 64, 256, False),
        resblock(256, 64, 256, False)
    )

    # Block-3
    # conv1 - 128, f = 1, p = 0, s = 1, out - 28 x 28 x 128
    # conv2 - 128, f = 3, p = 2, s = 1, out - 28 x 28 x 128
    # conv3 - 512, f = 1, p = 0, s = 1, out - 28 x 28 x 512
    # repeat x4
    self.Block3 = Sequential(
        resblock(256, 128, 512, True),
        resblock(512, 128, 512, False),
        resblock(512, 128, 512, False),
        resblock(512, 128, 512, False),
    )

    # Block-4
    # conv1 - 256, f = 1, p = 0, s = 1, out - 14 x 14 x 256
    # conv2 - 256, f = 3, p = 2, s = 1, out - 14 x 14 x 256
    # conv3 - 1024, f = 1, p = 0, s = 1, out - 14 x 14 x 1024
    # repeat x6
    self.Block4 = Sequential(
        resblock(512, 256, 1024, True),
        resblock(1024, 256, 1024, False),
        resblock(1024, 256, 1024, False),
        resblock(1024, 256, 1024, False),
        resblock(1024, 256, 1024, False),
        resblock(1024, 256, 1024, False),
    )

    # Block-5
    # conv1 - 512, f = 1, p = 0, s = 1, out - 7 x 7 x 512
    # conv2 - 512, f = 3, p = 2, s = 1, out - 7 x 7 x 512
    # conv3 - 2048, f = 1, p = 0, s = 1, out - 7 x 7 x 2048
    # repeat x3
    self.Block5 = Sequential(
        resblock(1024, 512, 2048, True),
        resblock(2048, 512, 2048, False),
        resblock(2048, 512, 2048, False),
    )

    # Block 6
    # avgPool - 2048, f = 3, p = 2, s = 0, out - 7 x 7 x 2048
    # fc-1000
    self.Block6_1 = AveragePool2d(kernel_size = 3, padding = 2)
    self.Block6_2 = Sequential(
        Linear(in_features=7 * 7 * 2048, out_features=1000),
        ReLU(),
        Dropout(0.5),
        Linear(in_features=1000, out_features=num_classes)
    )




  def forward(self, x):
    x = self.Block1(x)
    x = self.Block2(x)
    x = self.Block3(x)
    x = self.Block4(x)
    x = self.Block5(x)
    x = self.Block6_1(x)
    x = flatten(x)
    x = self.Block6_2(x)

    return op

  def calc_dim(self, dim, f, p, s):
    # f: filter size
    # p: padding
    # s: stride

    return (dim + 2*p - f)//s + 1

In [None]:
num_classes = 21841
num_epochs = 20
batch_size = 16
learning_rate = 0.005

hyperparameters = {
  'image_w': 224,
  'image_h': 224
}

model = ResNet50V2(num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay = 0.005, momentum = 0.9)  


# Train the model
total_step = len(train_loader)

In [None]:
total_step = len(train_loader)

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
            
    # Validation
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
    
        print('Accuracy of the network on the {} validation images: {} %'.format(5000, 100 * correct / total)) 

In [None]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        del images, labels, outputs

    print('Accuracy of the network on the {} test images: {} %'.format(10000, 100 * correct / total))   