In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import os
import sys
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import warnings
warnings.filterwarnings("ignore")
import torchvision
import torchvision.models as models
import time
import copy
from torch.nn import init
import math

In [0]:
data_path = "tiny-imagenet-200"
#normalise the data by its mean and standard deviation
data_transforms = {
    'train': transforms.ToTensor(),
    'val': transforms.ToTensor()
}


data_transform = transforms.ToTensor()
image_datasets = {x: torchvision.datasets.ImageFolder(os.path.join(data_path, x),
                                          data_transforms[x])
                  for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

In [0]:
class BasicBlock(nn.Module):
  def __init__(self,in_channels,out_channels,drop,stride=1):
    super().__init__()
    self.bn1 = nn.BatchNorm2d(in_channels)
    self.conv1 = nn.Conv2d(in_channels=in_channels,out_channels=out_channels,
                           kernel_size=3,stride=stride,padding=1,bias=False)
    #dropout automatically shuts down during eval mode
    self.drop = nn.Dropout(drop,inplace=True)
    self.bn2 = nn.BatchNorm2d(out_channels)
    self.conv2 = nn.Conv2d(in_channels=out_channels,out_channels=out_channels,
                           kernel_size=3,stride=1,padding=1,bias=False)
    self.relu = nn.ReLU(inplace=True)
    
    if in_channels!=out_channels or stride!=1:
      self.identity = nn.Sequential(nn.Conv2d(in_channels=in_channels,out_channels=out_channels,
                                             kernel_size=1,stride=stride,bias=False))
    else:
      self.identity = lambda x: x      
    
    def forward(self,x):
      out = self.conv1(self.relu(self.bn1(x)))
      out = self.drop(out)
      out = self.conv2(self.relu(self.bn2(out)))
      i = self.identity(x)
      out += i
      return out
      

In [0]:
nn.Conv2d?

In [0]:
class Resnet(nn.Module):
  '''architecture based on pre-activated resnet20'''
  
  def __init__(self,block,num_classes=200):
    super(Resnet, self).__init__()
    #changed stride 2 for tiny imagenet
    self.conv1 = nn.Conv2d(in_channels=3,out_channels=16,kernel_size=3,stride=2,padding=1,bias=False)
    self.in_channels = 16
    self.stage1 = self.create_stage(block,16,num_blocks=3,stride=1)
    self.stage2 = self.create_stage(block,32,num_blocks=3,stride=2)
    self.stage3 = self.create_stage(block,64,num_blocks=3,stride=2)
    self.lastact = nn.Sequential(nn.BatchNorm2d(64),nn.ReLU(inplace=True))
    #replaced avg pool with adaptive pool
    self.avgPool = nn.AdaptiveAvgPool2d(1)
    self.classifier = nn.Linear(64,num_classes)
    
    for m in self.modules():
      if isinstance(m, nn.Conv2d):
        n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
        m.weight.data.normal_(0, math.sqrt(2. / n))
        #m.bias.data.zero_()
      elif isinstance(m, nn.BatchNorm2d):
        m.weight.data.fill_(1)
        m.bias.data.zero_()
      elif isinstance(m, nn.Linear):
        init.kaiming_normal(m.weight)
        m.bias.data.zero_()
    
  def create_stage(self,block,out_channels,num_blocks,stride=1):
    layers = []
    layers.append(block(self.in_channels,out_channels,0.3,stride))
    self.in_channels=out_channels
    for i in range(1,num_blocks):
      layers.append(block(self.in_channels,out_channels,0.3,1))
      
    return nn.Sequential(*layers)
  
  def forward(self,x):
    out = self.conv1(x)
    out = self.stage1(out)
    out = self.stage2(out)
    out = self.stage3(out)
    out = self.lastact(out)
    out = self.avgPool(out)
    out = out.view(out.size(0), -1)
    return self.classifier(out) 

In [19]:
k = nn.Conv2d(3, 16, 3, stride=2,padding=1)
i = torch.randn([1,3,64,64])
k(i).size()

torch.Size([1, 16, 32, 32])

In [30]:
BasicBlock(16,16,0.3,2)

BasicBlock(
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (drop): Dropout(p=0.3, inplace)
  (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (relu): ReLU(inplace)
  (residual): Sequential(
    (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(2, 2), bias=False)
  )
)

In [43]:
Resnet(BasicBlock,200)

Resnet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (stage1): Sequential(
    (0): BasicBlock(
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (drop): Dropout(p=0.3, inplace)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (relu): ReLU(inplace)
    )
    (1): BasicBlock(
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (drop): Dropout(p=0.3, inplace)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 