In [1]:
import warnings
warnings.filterwarnings('ignore')

from glob import glob
import pandas as pd
import numpy as np 
from tqdm import tqdm
import cv2

import os
import timm
import random
import shutil
import copy

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
import time

import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt

device = torch.device('cuda')

In [2]:
# Code for Reproduction

def seed_everything(seed: int = 2):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  

In [3]:
# Input your own directory

file_dir = 'D:/thon/DL/zindi'
img_dir = file_dir + '/Images'

In [9]:
train_df = pd.read_csv(file_dir + "/Train.csv")
test_df = pd.read_csv(file_dir + '/Test.csv')
train_labels = train_df["Label"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

train_labels = [label_unique[k] for k in train_labels]

In [5]:
### You don't need to do it twice

# Generate new 'train', 'test' image folders in your file_directory

os.makedirs(img_dir + '/train')
os.makedirs(img_dir + '/test')

# Set train and test directories

train_dir = img_dir + '/train'
test_dir = img_dir + '/test'

### To move files according to train.csv and test.csv's 'Image_id' column

train_file_name = train_df["Image_id"]
train_file_label = train_df["Label"]
test_file_name = test_df["Image_id"]

# Move train images from 'Images' folder to 'train' folder

for i in train_file_name:
    file_source = img_dir + f'/{i}'
    file_destination = train_dir
    shutil.move(file_source, file_destination)


# Move test images from 'Images' folder to 'test' folder

for i in test_file_name:
    file_source = img_dir + f'/{i}'
    file_destination = test_dir
    shutil.move(file_source, file_destination)

FileExistsError: [WinError 183] 파일이 이미 있으므로 만들 수 없습니다: 'D:/thon/DL/zindi/Images/train'

In [4]:
train_dir = img_dir + '/train'
test_dir = img_dir + '/test'

train_jpg = sorted(glob(train_dir +'/*.jpg'))
test_jpg = sorted(glob(test_dir + '/*.jpg'))

In [5]:
img_size = 224 # input size for efficientnet_b0 : 224 * 224

def img_load(path):
    img = cv2.imread(path)[:,:,::-1]
    img = cv2.resize(img, (img_size, img_size))
    return img

In [6]:
train_imgs = [img_load(m) for m in tqdm(train_jpg)]
test_imgs = [img_load(n) for n in tqdm(test_jpg)]

100%|██████████| 1619/1619 [00:25<00:00, 64.71it/s]
100%|██████████| 1080/1080 [00:17<00:00, 60.05it/s]


In [7]:
class Custom_dataset(Dataset):
    def __init__(self, img_paths, labels, mode='train'):
        self.img_paths = img_paths
        self.labels = labels
        self.mode=mode
    def __len__(self):
        return len(self.img_paths)
    def __getitem__(self, idx):
        img = self.img_paths[idx]
        if self.mode=='train':
            augmentation = random.randint(0,2)
            if augmentation==1:
                img = img[::-1].copy()
            elif augmentation==2:
                img = img[:,::-1].copy()
        img = transforms.ToTensor()(img)
        if self.mode=='test':
            pass
        
        label = self.labels[idx]
        return img, label
    
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=88)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [10]:
batch_size = 32
epochs = 25

# Train
train_dataset = Custom_dataset(np.array(train_imgs), np.array(train_labels), mode='train')
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

# Test
test_dataset = Custom_dataset(np.array(test_imgs), np.array(["tmp"]*len(test_imgs)), mode='test')
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [28]:
def score_function(real, pred):
    score = roc_auc_score(real, pred, average="macro")
    return score

model = Network().to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
scaler = torch.cuda.amp.GradScaler() 



best=0
for epoch in range(epochs):
    start=time.time()
    train_loss = 0
    train_pred=[]
    train_y=[]
    model.train()
    for batch in (train_loader):
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype=torch.float32, device=device)
        y = torch.tensor(batch[1], dtype=torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred, y)


        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss += loss.item()/len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
        
    
    train_auc = score_function(train_y, train_pred)

    TIME = time.time() - start
    print(f'epoch : {epoch+1}/{epochs}    time : {TIME:.0f}s/{TIME*(epochs-epoch-1):.0f}s')
    print(f'TRAIN    loss : {train_loss:.5f}    AUC : {train_auc:.5f}')

epoch : 1/25    time : 14s/343s
TRAIN    loss : 0.48698    AUC : 0.90842
epoch : 2/25    time : 14s/326s
TRAIN    loss : 0.03627    AUC : 0.98826
epoch : 3/25    time : 14s/313s
TRAIN    loss : 0.04267    AUC : 0.98394
epoch : 4/25    time : 14s/301s
TRAIN    loss : 0.03606    AUC : 0.99012
epoch : 5/25    time : 14s/283s
TRAIN    loss : 0.01341    AUC : 0.99568
epoch : 6/25    time : 14s/267s
TRAIN    loss : 0.00784    AUC : 0.99877
epoch : 7/25    time : 14s/252s
TRAIN    loss : 0.01342    AUC : 0.99506
epoch : 8/25    time : 14s/239s
TRAIN    loss : 0.02273    AUC : 0.99444
epoch : 9/25    time : 14s/225s
TRAIN    loss : 0.02200    AUC : 0.99382
epoch : 10/25    time : 14s/211s
TRAIN    loss : 0.01210    AUC : 0.99691
epoch : 11/25    time : 14s/197s
TRAIN    loss : 0.00792    AUC : 0.99753
epoch : 12/25    time : 14s/183s
TRAIN    loss : 0.01974    AUC : 0.99629
epoch : 13/25    time : 14s/169s
TRAIN    loss : 0.00681    AUC : 0.99691
epoch : 14/25    time : 14s/154s
TRAIN    loss 

In [29]:
model.eval()
f_pred = []

with torch.no_grad():
    for batch in (test_loader):
        x = torch.tensor(batch[0], dtype = torch.float32, device = device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        f_pred.extend(pred.argmax(1).detach().cpu().numpy().tolist())

In [30]:
label_decoder = {val:key for key, val in label_unique.items()}

f_result = [label_decoder[result] for result in f_pred]

In [31]:
submission = pd.read_csv("./zindi/SampleSubmission.csv")
submission["Label"] = f_result
submission

Unnamed: 0,Image_id,Label
0,id_00exusbkgzw1b.jpg,0
1,id_03dqinf6w0znv.jpg,0
2,id_046yl0cxn3ybz.jpg,1
3,id_04athdtx2abyg.jpg,0
4,id_062aauf9e9jk0.jpg,0
...,...,...
1075,id_zv5fvjnakvf1r.jpg,1
1076,id_zvpikh1z30arn.jpg,0
1077,id_zypilwkudljyz.jpg,0
1078,id_zz9lwehh5sxdp.jpg,1


In [32]:
submission.to_csv('Sub_effnet_seed2.csv', index = False)

In [33]:
perfect = pd.read_csv('./Perfect_AUC.csv')

print(np.sum(submission["Label"] == perfect['Label']), len(submission["Label"]))

1076 1080


In [26]:
# Seed 41 -> 1076 / 1080
# Seed 1 -> 1075 / 1080
# Seed 2 -> 1076 / 1080

# ResNet50

In [19]:
# load resnet18 with the pre-trained weights
from torchvision import models
import torch

resnet18 = models.resnet18(pretrained = True)
vgg16 = models.vgg16(pretrained = True)
efficientnet_b0 = models.efficientnet_b0(pretrained = True)
efficientnet_b4 = models.efficientnet_b4(pretrained = True)
efficientnet_b7 = models.efficientnet_b7(pretrained = True)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to C:\Users\sujin/.cache\torch\hub\checkpoints\vgg16-397923af.pth
100%|██████████| 528M/528M [00:56<00:00, 9.82MB/s] 
Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-3dd342df.pth" to C:\Users\sujin/.cache\torch\hub\checkpoints\efficientnet_b0_rwightman-3dd342df.pth
100%|██████████| 20.5M/20.5M [00:03<00:00, 6.39MB/s]
Downloading: "https://download.pytorch.org/models/efficientnet_b4_rwightman-7eb33cd5.pth" to C:\Users\sujin/.cache\torch\hub\checkpoints\efficientnet_b4_rwightman-7eb33cd5.pth
100%|██████████| 74.5M/74.5M [00:09<00:00, 8.28MB/s]
Downloading: "https://download.pytorch.org/models/efficientnet_b7_lukemelas-dcc49843.pth" to C:\Users\sujin/.cache\torch\hub\checkpoints\efficientnet_b7_lukemelas-dcc49843.pth
100%|██████████| 255M/255M [00:26<00:00, 10.3MB/s] 


In [20]:
model_list = [resnet18, vgg16, efficientnet_b0]
print(model_list)

[ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
 

In [12]:
# change the output layer to 10 classes
num_classes = 2
num_ftrs = resnet18.fc.in_features
resnet18.fc = nn.Linear(num_ftrs, num_classes)

device = torch.device('cuda:0')
resnet18.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [13]:
# get the model summary
from torchsummary import summary
summary(resnet18, input_size=(3, 224, 224), device = device.type)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

In [None]:
for epoch in range(epochs):
    start = time.time()
    train_loss = 0
    train_pred = []
    train_y = []
    model.train()
    for batch in (train_loader):
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype = torch.float32, device = device)
        y = torch.tensor(batch[1], dtype = torch.long, device = device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred, y)


        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss += loss.item() / len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
        
    
    train_auc = score_function(train_y, train_pred)

    TIME = time.time() - start
    print(f'epoch : {epoch + 1}/{epochs}    time : {TIME:.0f}s/{TIME * (epochs-epoch-1):.0f}s')
    print(f'TRAIN    loss : {train_loss:.5f}    AUC : {train_auc:.5f}')