In [1]:
import csv
import numpy as np
import random
import os

from PIL import Image

import torch as t
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms as  T
from torch.utils.data import Dataset, DataLoader

In [2]:
TRAIN_PATH = "/kaggle/input/mlhw5-data/train"
TEST_PATH = "/kaggle/input/mlhw5-data/test"
device = "cuda" if t.cuda.is_available() else "cpu"

In [3]:
keys = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
values = [ i for i in range(36)]
elements = dict(zip(keys, values))
print(elements)

{'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, 'a': 10, 'b': 11, 'c': 12, 'd': 13, 'e': 14, 'f': 15, 'g': 16, 'h': 17, 'i': 18, 'j': 19, 'k': 20, 'l': 21, 'm': 22, 'n': 23, 'o': 24, 'p': 25, 'q': 26, 'r': 27, 's': 28, 't': 29, 'u': 30, 'v': 31, 'w': 32, 'x': 33, 'y': 34, 'z': 35}


In [4]:
def numOfElements(task):
    if task == 'task1':
        return int(1)
    elif task == 'task2':
        return int(2)
    else:
        return int(4)

def stol(s, task='task1'):
    l = list()
    for i in range(numOfElements(task)):
        l.append(elements[s[i]])
    return l

print(stol('2', task='task1'))
print(stol('r7', task='task2'))
print(stol('f3a5', task='task3'))

[2]
[27, 7]
[15, 3, 10, 5]


In [5]:
class LoadData(Dataset):
    def __init__(self, data, root, task="task1", H=72, W=72):
        self.data = [sample for sample in data if sample[0].startswith(task)]
        self.root = root
        self.task = task
        self.transform = T.Compose([
            T.Resize((H, W)),
            T.ToTensor(),
            T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
        ])
    
    def __getitem__(self, index):
        filename, label = self.data[index]
        img = Image.open(f"{self.root}/{filename}")
        img = self.transform(img)
        return img, t.Tensor(stol(label, self.task))

    def __len__(self):
        return len(self.data)

In [54]:
train_data = []
val_data = []

num_t = 0
num_v = 0

with open(f'{TRAIN_PATH}/annotations.csv', newline='') as csvfile:
    for row in csv.reader(csvfile, delimiter=','):
        if row[1] == 'label':
            continue
        if row[0].startswith("task1") and len(row[1]) != 1:
            continue
        if row[0].startswith("task2") and len(row[1]) != 2:
            continue
        if row[0].startswith("task3") and len(row[1]) != 4:
            continue
        if random.random() < 0.75:
            train_data.append(row)
            num_t += 1
        else:
            val_data.append(row)
            num_v += 1

print('Number of training data:', num_t)
print('Number of validation data:', num_v)

Number of training data: 153575
Number of validation data: 51369


In [7]:
learningRate = 1e-3
epochs = 200
batchSize = 500

In [8]:
def numType(task):
    if task == 'task1':
        return int(10)
    else:
        return int(36)

In [9]:
class Residual(nn.Module):
    def __init__(self, inChannel, outChannel, stride=1):
        super(Residual, self).__init__()
        self.conv2_1 = nn.Conv2d(inChannel, outChannel, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2_1 = nn.BatchNorm2d(outChannel, track_running_stats=True)
        self.relu = nn.ReLU(inplace=True)
        self.conv2_2 = nn.Conv2d(outChannel, outChannel, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2_2 = nn.BatchNorm2d(outChannel, track_running_stats=True)
        self.shortcut = nn.Sequential()
        if stride != 1 or inChannel != outChannel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(inChannel, outChannel, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(outChannel, track_running_stats=True)
            )

    def forward(self, x):
        out = self.conv2_1(x)
        out = self.bn2_1(out)
        out = self.relu(out)
        out = self.conv2_2(out)
        out = self.bn2_2(out)
        out += self.shortcut(x)
        out = self.relu(out)
        return out

In [10]:
class Model(nn.Module):
    def __init__(self, Residual, task='task1'):
        super(Model, self).__init__()
        self.task = task
        self.inChannel = 36
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 36, kernel_size=3, stride=1, padding=1, bias=False),
            nn.BatchNorm2d(36, track_running_stats=True),
            nn.ReLU()
        )
        self.layer1 = self.make_layer(Residual, outChannel=36, num=2, stride=1)
        self.layer2 = self.make_layer(Residual, outChannel=72, num=2, stride=2)
        self.layer3 = self.make_layer(Residual, outChannel=144, num=2, stride=2)
        self.layer4 = self.make_layer(Residual, outChannel=288, num=2, stride=2)
        self.fc1 = nn.Linear(288, numType(task))
        self.fc2 = nn.Linear(288, numType(task))
        self.fc3 = nn.Linear(288, numType(task))
        self.fc4 = nn.Linear(288, numType(task))

    def make_layer(self, Residual, outChannel, num, stride):
        layers = list()
        strides = [stride] + [1] * (num - 1)
        for stride in strides:
            layers.append(Residual(self.inChannel, outChannel, stride))
            self.inChannel = outChannel
        return nn.Sequential(*layers)

    def forward(self, x):       #print("IN")#print(x.shape)
        x = self.conv1(x)       #print("conv1")#print(x.shape)
        x = self.layer1(x)      #print("layer1")#print(x.shape)
        x = self.layer2(x)      #print("layer2")#print(x.shape)
        x = self.layer3(x)      #print("layer3")#print(x.shape)
        x = self.layer4(x)      #print("layer4")#print(x.shape)
        x = F.avg_pool2d(x, 9)  #print("avg_pool2d")#print(x.shape)
        x = x.view(-1, 288)
        y = list()
        y.append(self.fc1(x))
        if self.task != 'task1':
            y.append(self.fc2(x))
            if self.task != 'task2':
                y.append(self.fc3(x))
                y.append(self.fc4(x))
        return y

In [43]:
def train(model, train_dl, val_dl):
    stopCounter = 0
    optimizer = t.optim.Adam(model.parameters(), lr=learningRate)
    loss_fn = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        print(f"Epoch [{epoch}]")
        model.train()
        for image, label in train_dl:
            image = image.to(device)
            label = label.to(device)
            pred = model(image)
            #print(len(pred))
            #print(label.shape)
            #print("---------")
            num = label.shape[1]
            loss = 0.0
            for i in range(num):
                label_tmp = label.long()[:,i]
                #print("*******")
                pred_tmp = pred[i]
                #print(pred_tmp.shape)
                loss += loss_fn(pred_tmp, label_tmp)
                
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        sample_count = 0
        correct_count = 0
        model.eval()
        for image, label in val_dl:
            image = image.to(device)
            label = label.to(device)
            pred = model(image)
            num = label.shape[1]
            loss = 0.0
            same = [True for i in range(len(image))]
            for i in range(num):
                label_tmp = label.long()[:,i]
                pred_tmp = pred[i]
                loss += loss_fn(pred_tmp, label_tmp)
                pred_tmp = t.argmax(pred_tmp, dim=1)
                for j in range(len(image)):
                    if label_tmp[j] != pred_tmp[j]:
                        same[j] = False
                    
            sample_count += len(image)
            correct_count += sum(same)
            
        acc = correct_count / sample_count
        print("accuracy (validation):", acc)
        if acc > 0.999:
            stopCounter += 1
        else:
            stopCounter = 0
        #if stopCounter > 2:
        #    break

In [55]:
#task1
learningRate = 1e-3
epochs = 100
batchSize = 128

In [56]:
task1_train_ds = LoadData(train_data, root=TRAIN_PATH, task='task1')
task1_train_dl = DataLoader(task1_train_ds, batch_size=batchSize, num_workers=2, drop_last=True, shuffle=True)

task1_val_ds = LoadData(val_data, root=TRAIN_PATH, task='task1')
task1_val_dl = DataLoader(task1_val_ds, batch_size=batchSize, num_workers=2, drop_last=False, shuffle=False)

In [57]:
task1_model = Model(Residual, task='task1').to(device)
print(task1_model)

Model(
  (conv1): Sequential(
    (0): Conv2d(3, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer1): Sequential(
    (0): Residual(
      (conv2_1): Conv2d(36, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2_1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2_2): Conv2d(36, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2_2): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): Residual(
      (conv2_1): Conv2d(36, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2_1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2_2): Conv2d(36, 36, kernel_s

In [58]:
train(task1_model, task1_train_dl, task1_val_dl)

Epoch [0]
accuracy (validation): 0.7686864219371959
Epoch [1]
accuracy (validation): 0.9960194604157453
Epoch [2]
accuracy (validation): 0.8796992481203008
Epoch [3]
accuracy (validation): 0.7739938080495357
Epoch [4]
accuracy (validation): 0.9898275099513489
Epoch [5]
accuracy (validation): 0.8925254312251216
Epoch [6]
accuracy (validation): 0.9995577178239717
Epoch [7]
accuracy (validation): 0.9995577178239717
Epoch [8]
accuracy (validation): 0.9995577178239717
Epoch [9]
accuracy (validation): 0.9995577178239717
Epoch [10]
accuracy (validation): 0.9995577178239717
Epoch [11]
accuracy (validation): 0.9995577178239717
Epoch [12]
accuracy (validation): 0.9995577178239717
Epoch [13]
accuracy (validation): 0.9995577178239717
Epoch [14]
accuracy (validation): 0.9995577178239717
Epoch [15]
accuracy (validation): 0.9995577178239717
Epoch [16]
accuracy (validation): 0.9995577178239717
Epoch [17]
accuracy (validation): 0.9995577178239717
Epoch [18]
accuracy (validation): 0.9995577178239717
Epo

KeyboardInterrupt: 

In [60]:
t.save(task1_model.state_dict(), 'task1_model.pth')

In [62]:
#task2
learningRate = 1e-3
epochs = 200
batchSize = 256

In [63]:
task2_train_ds = LoadData(train_data, root=TRAIN_PATH, task='task2')
task2_train_dl = DataLoader(task2_train_ds, batch_size=batchSize, num_workers=2, drop_last=True, shuffle=True)

task2_val_ds = LoadData(val_data, root=TRAIN_PATH, task='task2')
task2_val_dl = DataLoader(task2_val_ds, batch_size=batchSize, num_workers=2, drop_last=False, shuffle=False)

In [64]:
task2_model = Model(Residual, task='task2').to(device)
print(task2_model)

Model(
  (conv1): Sequential(
    (0): Conv2d(3, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer1): Sequential(
    (0): Residual(
      (conv2_1): Conv2d(36, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2_1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2_2): Conv2d(36, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2_2): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): Residual(
      (conv2_1): Conv2d(36, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2_1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2_2): Conv2d(36, 36, kernel_s

In [67]:
train(task2_model, task2_train_dl, task2_val_dl)

Epoch [0]
accuracy (validation): 0.9962981449130688
Epoch [1]
accuracy (validation): 0.9854421429165627
Epoch [2]
accuracy (validation): 0.9945512020630564
Epoch [3]
accuracy (validation): 0.999168122452375
Epoch [4]
accuracy (validation): 0.9962149571583063
Epoch [5]
accuracy (validation): 0.9970468347059313
Epoch [6]
accuracy (validation): 0.9994592795940438
Epoch [7]
accuracy (validation): 0.9997920306130937
Epoch [8]
accuracy (validation): 0.9999168122452375
Epoch [9]
accuracy (validation): 0.9999584061226188
Epoch [10]
accuracy (validation): 0.9999584061226188
Epoch [11]
accuracy (validation): 0.9999584061226188
Epoch [12]


KeyboardInterrupt: 

In [68]:
t.save(task2_model.state_dict(), 'task2_model.pth')

In [22]:
#task3
learningRate = 1e-3
epochs = 300
batchSize = 256

In [23]:
task3_train_ds = LoadData(train_data, root=TRAIN_PATH, task='task3')
task3_train_dl = DataLoader(task3_train_ds, batch_size=batchSize, num_workers=2, drop_last=True, shuffle=True)

task3_val_ds = LoadData(val_data, root=TRAIN_PATH, task='task3')
task3_val_dl = DataLoader(task3_val_ds, batch_size=batchSize, num_workers=2, drop_last=False, shuffle=False)

In [24]:
task3_model = Model(Residual, task='task3').to(device)
print(task3_model)

Model(
  (conv1): Sequential(
    (0): Conv2d(3, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (layer1): Sequential(
    (0): Residual(
      (conv2_1): Conv2d(36, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2_1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2_2): Conv2d(36, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2_2): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): Residual(
      (conv2_1): Conv2d(36, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2_1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2_2): Conv2d(36, 36, kernel_s

In [39]:
train(task3_model, task3_train_dl, task3_val_dl)

Epoch [0]
accuracy (validation): 0.9717115721681001
Epoch [1]
accuracy (validation): 0.9869155830921615
Epoch [2]
accuracy (validation): 0.9826763950597155
Epoch [3]
accuracy (validation): 0.9855296946969389
Epoch [4]
accuracy (validation): 0.97990461826927
Epoch [5]
accuracy (validation): 0.9865079688582725
Epoch [6]
accuracy (validation): 0.9744425875351568
Epoch [7]
accuracy (validation): 0.9516977132841479
Epoch [8]
accuracy (validation): 0.9466025353605347
Epoch [9]
accuracy (validation): 0.989279745648718
Epoch [10]
accuracy (validation): 0.9788040598377695
Epoch [11]
accuracy (validation): 0.966616394244487
Epoch [12]
accuracy (validation): 0.9799861411160478
Epoch [13]
accuracy (validation): 0.9801899482329923
Epoch [14]
accuracy (validation): 0.9909102025842742
Epoch [15]
accuracy (validation): 0.9849590347694942
Epoch [16]
accuracy (validation): 0.9894427913422736
Epoch [17]
accuracy (validation): 0.9906656340439408
Epoch [18]
accuracy (validation): 0.9818204051685485
Epoch [

KeyboardInterrupt: 

In [40]:
t.save(task3_model.state_dict(), 'task3_model.pth')