In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

#MODELPATH = '/content/gdrive/My Drive/Colab Notebooks/yai7_CV1/modelsave/'

#IMAGEWOOFPATH = '/content/gdrive/My Drive/Colab Notebooks/data/imagewoof2'

IMAGENETTEPATH = '/content/gdrive/My Drive/Colab Notebooks/data/imagenette2'

Mounted at /content/gdrive


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init

from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
print("torch version:", torch.__version__)
print("check device:", torch.cuda.get_device_name())
print("how many?:", torch.cuda.device_count())
print("so can i use it?", torch.cuda.is_available())

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

torch version: 1.7.0+cu101
check device: Tesla T4
how many?: 1
so can i use it? True


In [3]:
# transformation for CIFAR 10
# yet to implement standard color augmentation

train_transform = transforms.Compose([
                             transforms.Resize(224),
                             transforms.RandomCrop(224, padding=28),
                             transforms.RandomHorizontalFlip(p=0.5),
                             transforms.ToTensor(),
                             transforms.Normalize(mean=(0.4963, 0.4612, 0.3981), std=(0.2224, 0.2163, 0.2181))
])

test_transform = transforms.Compose([
                            transforms.Resize(224),
                            transforms.CenterCrop(224),         
                            transforms.ToTensor(),
                            transforms.Normalize(mean=(0.4963, 0.4612, 0.3981), std=(0.2224, 0.2163, 0.2181))
])

In [4]:
from google.colab import output
!mkdir "./train"
!mkdir "./val"
# !cp 파일1 파일2 # 파일1을 파일2로 복사 붙여넣기
!cp "/content/gdrive/My Drive/Colab Notebooks/data/imagenette2/train.zip" "train.zip"
!cp "/content/gdrive/My Drive/Colab Notebooks/data/imagenette2/val.zip" "val.zip"
# data_2.zip을 현재 디렉터리에 압축해제
!unzip "train.zip" -d "./train/"
!unzip "val.zip" -d "./val/"

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: ./train/n03445777/n03445777_8446.JPEG  
  inflating: ./train/n03445777/n03445777_8448.JPEG  
  inflating: ./train/n03445777/n03445777_8474.JPEG  
  inflating: ./train/n03445777/n03445777_8477.JPEG  
  inflating: ./train/n03445777/n03445777_8484.JPEG  
  inflating: ./train/n03445777/n03445777_8544.JPEG  
  inflating: ./train/n03445777/n03445777_8546.JPEG  
  inflating: ./train/n03445777/n03445777_8599.JPEG  
  inflating: ./train/n03445777/n03445777_8616.JPEG  
  inflating: ./train/n03445777/n03445777_8624.JPEG  
  inflating: ./train/n03445777/n03445777_8628.JPEG  
  inflating: ./train/n03445777/n03445777_866.JPEG  
  inflating: ./train/n03445777/n03445777_8663.JPEG  
  inflating: ./train/n03445777/n03445777_8677.JPEG  
  inflating: ./train/n03445777/n03445777_8678.JPEG  
  inflating: ./train/n03445777/n03445777_8679.JPEG  
  inflating: ./train/n03445777/n03445777_8683.JPEG  
  inflating: ./train/n03445777/n034

In [5]:
train_set = torchvision.datasets.ImageFolder(root='./train', transform=train_transform)
test_set = torchvision.datasets.ImageFolder(root='./val', transform=test_transform)
train_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False, num_workers=4)

print(f'train_loader {len(train_loader)}')
print(f'test_loader {len(test_loader)}')

train_loader 296
test_loader 123


In [6]:
"""train_set = torchvision.datasets.ImageFolder(root=IMAGENETTEPATH+'/train', transform=train_transform)
test_set = torchvision.datasets.ImageFolder(root=IMAGENETTEPATH+'/val', transform=test_transform)
train_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=4)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False, num_workers=4)

print(f'train_loader {len(train_loader)}')
print(f'test_loader {len(test_loader)}')"""

"train_set = torchvision.datasets.ImageFolder(root=IMAGENETTEPATH+'/train', transform=train_transform)\ntest_set = torchvision.datasets.ImageFolder(root=IMAGENETTEPATH+'/val', transform=test_transform)\ntrain_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=4)\ntest_loader = DataLoader(test_set, batch_size=32, shuffle=False, num_workers=4)\n\nprint(f'train_loader {len(train_loader)}')\nprint(f'test_loader {len(test_loader)}')"

In [None]:
# check on the images

def imshow(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1,2,0)))
    plt.show()

dataiter = iter(train_loader)
images, labels = dataiter.next()

imshow(torchvision.utils.make_grid(images))

In [None]:
# define the basic building blocks for ResNet

# option A
class Padding(nn.Module):
    def __init__(self, in_c, out_c, stride):
        super(Padding, self).__init__()
        self.padding = F.pad
        self.pad_dim = (out_c - in_c) / 2
        self.pad_dim = int(self.pad_dim)
        self.downsample = nn.MaxPool2d(1, stride=stride)

    def forward(self, x):
        x = self.padding(x, (0,0,0,0, self.pad_dim, self.pad_dim), "constant", 0)
        x = self.downsample(x)
        return x

# option Mine
class Concat(nn.Module):
    def __init__(self, in_c, out_c, stride):
        super(Concat, self).__init__()
        
        self.dim_remainder = (out_c - in_c) % in_c
        self.concat_times = out_c // in_c
        self.downsample = nn.MaxPool2d(1, stride=stride)


    def forward(self, x):
        #print("cat 1", x.shape)
        #print(self.concat_times)
        cat_x = [x] * self.concat_times
        cat_x = torch.cat(cat_x, dim=1)
        #cat_x = torch.cat((cat_x, x[:self.dim_remainder]), dim=1)
        cat_x = self.downsample(cat_x)
        #print(cat_x.shape)

        return cat_x

class BuildingBlock(nn.Module):

    def __init__(self, in_c, out_c, stride=1, option='B'):
        super(BuildingBlock, self).__init__()

        self.conv1 = nn.Conv2d(in_c, out_c, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_c)
        self.conv2 = nn.Conv2d(out_c, out_c, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_c)

        self.shortcut = nn.Sequential()
        if in_c != out_c:
            if option == 'A':
                self.shortcut = Padding(in_c, out_c, stride)
                # why not concantenate x instead of padding?
                # since dim increase by factor of 2 all the time

            if option == 'B':
                self.shortcut = nn.Sequential(
                    nn.Conv2d(in_c, out_c, kernel_size=1, stride=stride),
                    nn.BatchNorm2d(out_c)
                )
                # i don't like the idea of batchnormalization for projection shortcut
                # should i add BN?
            
            # additional option I thought of hehe
            if option == 'Mine':
                self.shortcut = Concat(in_c, out_c, stride)
        
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out


In [None]:
# https://github.com/KellerJordan/ResNet-PyTorch-CIFAR10/blob/master/model.py
# for shortcut reference

In [None]:
class Bottleneck(nn.Module):

    def __init__(self, in_c, process_c, stride=1, option='B'):
        super(Bottleneck, self).__init__()

        self.out_c = process_c * 4

        self.conv1 = nn.Conv2d(in_c, process_c, kernel_size=1)
        self.bn1 = nn.BatchNorm2d(process_c)

        self.conv2 = nn.Conv2d(process_c, process_c, kernel_size=3,
                               stride=stride, padding=1)
        self.bn2 = nn.BatchNorm2d(process_c)

        self.conv3 = nn.Conv2d(process_c, self.out_c, kernel_size=1)
        self.bn3 = nn.BatchNorm2d(self.out_c)

        self.shortcut = nn.Sequential()
        if in_c != self.out_c:
            if option == 'A':
                self.shortcut = Padding(in_c, self.out_c)
                # why not concantenate x instead of padding?
                # since dim increase by factor of 2 all the time

            if option == 'B':
                self.shortcut = nn.Sequential(
                    nn.Conv2d(in_c, self.out_c, kernel_size=1, stride=stride),
                    nn.BatchNorm2d(self.out_c)
                )
                # i don't like the idea of batchnormalization for projection shortcut
                # should i add BN?
            
            # additional option I thought of hehe
            if option == 'Mine':
                self.shortcut = Concat(in_c, self.out_c)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class Gate(nn.Module):
    def __init__(self):
        super(Gate, self).__init__()
        # slightly changed for CIFAR10
        # in order to have dimension 32x32 --> 16x16
        self.conv_gate = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
        self.bn_gate = nn.BatchNorm2d(64)
        self.maxpool_gate = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    def forward(self, x):
        x = F.relu(self.bn_gate(self.conv_gate(x)))
        x = self.maxpool_gate(x)
        return x

In [None]:
class ResNet18B(nn.Module):
    def __init__(self):
        super(ResNet18B, self).__init__()

        self.gate = Gate()

        self.conv2_1 = BuildingBlock(64, 64, 1, 'Mine')
        self.conv2_2 = BuildingBlock(64, 64, 1, 'Mine')

        self.conv3_1 = BuildingBlock(64, 128, 2, 'Mine')
        self.conv3_2 = BuildingBlock(128, 128, 1, 'Mine')

        self.conv4_1 = BuildingBlock(128, 256, 2, 'Mine')
        self.conv4_2 = BuildingBlock(256, 256, 1, 'Mine')

        self.conv5_1 = BuildingBlock(256, 512, 2, 'Mine')
        self.conv5_2 = BuildingBlock(512, 512, 1, 'Mine')

        self.output = nn.Linear(512, 10)

    def forward(self, x):
        #print("input", x.shape)
        x = self.gate(x)

        #print("1", x.shape)
        x = self.conv2_1(x)
        x = self.conv2_2(x)
        #print("2", x.shape)

        x = self.conv3_1(x)
        x = self.conv3_2(x)
        #print("3", x.shape)

        x = self.conv4_1(x)
        x = self.conv4_2(x)
        #print("4", x.shape)

        x = self.conv5_1(x)
        x = self.conv5_2(x)
        #print("5", x.shape)

        x = F.avg_pool2d(x, 7)
        #print("avgpool", x.shape)

        x = x.view(-1, 512)
        #print("fc", x.shape)

        x = self.output(x)

        return x

In [None]:
resnet = ResNet18B()
resnet.to(device)

In [None]:
def weights_init(m):
    if isinstance(m, nn.Conv2d):
        #print(m)
        init.kaiming_normal_(m.weight)
        #torch.nn.init.zeros_(m.bias)

    elif isinstance(m, nn.Linear):
        #print(m)
        init.kaiming_normal_(m.weight)
        init.zeros_(m.bias)

    # no initialization for BatchNorm2d

In [None]:
resnet.apply(weights_init)

In [None]:
def train_one_epoch(model, train_loader, loss_function, optimizer, epoch):
    model.train()
    length = len(train_loader)
    running_loss = 0.0

    for i, (image, label) in enumerate(train_loader):
        image = image.to(device)
        label = label.to(device)

        optimizer.zero_grad()

        pred = model(image)
        print(pred)
        print(label)

        print(pred.shape)
        print(label.shape)
        loss = loss_function(pred, label)
    
        loss.backward()
        
        optimizer.step()

        running_loss += loss.item()

        if i % 50 == 0:
            print(f'after looking at {(i+1)*128} images, running_loss is {running_loss/(i+1):.4f}')

    print(f'epoch {epoch} : loss {running_loss/length:.4f}')


def eval_one_epoch(model, test_loader, epoch):
    model.eval()
    length = len(test_loader)
    running_acc = 0.0

    for i, (image, label) in enumerate(test_loader):
        image, label = image.to(device), label.numpy()

        pred = model(image)
        pred_label = pred.argmax(axis=1).detach().cpu().numpy()
        running_acc += (pred_label==label).mean()

        if i % 50 == 0:
            print(f'after looking at {(i+1)*128} images, running_acc is {running_acc/(i+1):.4f}')

    val_acc = running_acc/length

    print(f'epoch {epoch} : acc {val_acc:.4f}')

    return val_acc

In [None]:
# hyperparameters from paper
# assuming same for pretraining

learning_rate = 0.01
momentum = 0.9
weight_decay = 1e-4
epochs = 100

In [None]:
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(resnet.parameters(), lr=learning_rate, 
                            momentum=momentum, weight_decay=weight_decay)

milestones = [60, 80]

scheduler = torch.optim.lr_scheduler.MultiStepLR(
    optimizer, milestones=milestones, gamma=0.1
)

# 391 iterations per epoch
# 32k iter = 82 epochs
# 42k iter = 108 epochs
# 64k iter = 164 epochs

In [None]:
val_acc_list = []

for epoch in range(epochs):
    train_one_epoch(resnet, train_loader, loss_function, optimizer, epoch)
    val_acc_list.append(eval_one_epoch(resnet, test_loader, epoch))
    scheduler.step()

after looking at 128 images, running_loss is 5.9551
after looking at 6528 images, running_loss is 5.6347
after looking at 12928 images, running_loss is 4.2230
after looking at 19328 images, running_loss is 3.6574
after looking at 25728 images, running_loss is 3.2967
after looking at 32128 images, running_loss is 3.0351
epoch 0 : loss 2.8716
after looking at 128 images, running_acc is 0.5312
after looking at 6528 images, running_acc is 0.4479
after looking at 12928 images, running_acc is 0.3670
epoch 0 : acc 0.3788
after looking at 128 images, running_loss is 1.4851
after looking at 6528 images, running_loss is 1.7095
after looking at 12928 images, running_loss is 1.7625
after looking at 19328 images, running_loss is 1.7372
after looking at 25728 images, running_loss is 1.6958
after looking at 32128 images, running_loss is 1.6618
epoch 1 : loss 1.6377
after looking at 128 images, running_acc is 0.5938
after looking at 6528 images, running_acc is 0.3897
after looking at 12928 images, run

In [None]:
val_acc_list

[0.37875048393341076,
 0.4787190282617112,
 0.5788206542779714,
 0.618309620596206,
 0.6714455090979481,
 0.7310177119628339,
 0.7019212156407278,
 0.6945775261324041,
 0.7496854432830042,
 0.7504960317460317,
 0.7297473867595818,
 0.7592431281455672,
 0.7120959156020131,
 0.777269647696477,
 0.7883154277971351,
 0.7826171118854045,
 0.7729747386759582,
 0.7940500387146728,
 0.8007888114595432,
 0.7837422570654278,
 0.7865369725125823,
 0.8109635114208285,
 0.8122096399535423,
 0.8204849012775842,
 0.8132259001161439,
 0.7809596399535423,
 0.8336841850561363,
 0.824259581881533,
 0.8129718350754935,
 0.8420804297328688,
 0.8338172667440961,
 0.8344584785133566,
 0.8444879984514131,
 0.8283609175377469,
 0.8381363724351529,
 0.8510815911730545,
 0.8223601432442896,
 0.8151495354239257,
 0.841427119628339,
 0.8494483159117305,
 0.8425764614789005,
 0.8559209252806814,
 0.8357288037166086,
 0.8579655439411537,
 0.8434838366240804,
 0.8470286488579172,
 0.8320509097948122,
 0.8303571428571

In [None]:
torch.randn([128, 512, 2, 2])

tensor([[[[ 2.6774e-01,  1.2382e-01],
          [ 3.5893e-01, -1.0991e+00]],

         [[-1.4468e+00,  1.4188e+00],
          [ 3.2748e-01, -2.5286e-01]],

         [[-1.4450e-01, -7.1344e-01],
          [-8.2402e-01, -7.6291e-01]],

         ...,

         [[ 3.4742e-01, -1.5759e+00],
          [-6.6500e-01, -2.1159e+00]],

         [[ 3.4775e-01,  9.5865e-01],
          [ 1.5669e+00,  1.1676e-03]],

         [[-6.2299e-02, -1.1045e+00],
          [ 1.5128e-01,  3.8223e-01]]],


        [[[-9.3783e-01,  5.2313e-02],
          [-6.2985e-01,  2.3832e+00]],

         [[-7.1196e-01, -5.5177e-01],
          [-4.6909e-01,  3.8183e-01]],

         [[-6.1007e-01, -6.1445e-02],
          [-2.5982e+00,  1.9354e+00]],

         ...,

         [[ 1.4714e-01, -4.5221e-01],
          [ 7.0671e-02, -8.2256e-01]],

         [[ 1.1713e-01, -1.0164e+00],
          [ 8.6660e-01,  1.2946e+00]],

         [[ 2.5522e-01,  9.7698e-01],
          [-1.4134e+00, -9.1138e-01]]],


        [[[-1.9626e-01, -2.196

In [None]:
torch.cat(cat, dim=0).shape

NameError: ignored

In [None]:
torch.cat((t4d, t4d), dim=0)

In [None]:
F.pad(t4d, (1,1,1,1,4,4))

In [None]:
F.avg_pool2d(torch.randn([128, 512, 2, 2]), 2)

In [None]:
6