In [1]:
import warnings
warnings.filterwarnings('ignore')

from glob import glob
import pandas as pd
import numpy as np 
from tqdm import tqdm
import cv2

import os
import timm
import random
import shutil
import copy

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torchvision.transforms as transforms
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
import time

import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt

device = torch.device('cuda')

In [2]:
def seed_everything(seed: int = 110):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  

In [3]:
# Input your own directory

file_dir = 'D:/thon/DL/zindi'
img_dir = file_dir + '/Images'

In [32]:
train_df = pd.read_csv(file_dir + "/highly_imbalanced_train.csv")
test_df = pd.read_csv(file_dir + '/Test.csv')
train_labels = train_df["Label"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

train_labels = [label_unique[k] for k in train_labels]

In [34]:
train_df['Label'].value_counts()

0    810
1      9
Name: Label, dtype: int64

In [5]:
### You don't need to do it twice
# Set train and test directories

train_dir = img_dir + '/highly_imbalanced_train'
test_dir = img_dir + '/test'

### To move files according to train.csv and test.csv's 'Image_id' column

train_file_name = train_df["Image_id"]
train_file_label = train_df["Label"]
test_file_name = test_df["Image_id"]

# Load Image datasets
train_jpg = sorted(glob(train_dir +'/*.jpg'))
test_jpg = sorted(glob(test_dir + '/*.jpg'))

In [6]:
img_size = 224 # input size for efficientnet_b0 : 224 * 224

def img_load(path):
    img = cv2.imread(path)[:,:,::-1]
    img = cv2.resize(img, (img_size, img_size))
    return img

In [7]:
train_imgs = [img_load(m) for m in tqdm(train_jpg)]
test_imgs = [img_load(n) for n in tqdm(test_jpg)]

100%|██████████| 9/9 [00:00<00:00, 79.84it/s]
100%|██████████| 1080/1080 [00:15<00:00, 70.66it/s]


In [8]:
class Custom_dataset(Dataset):
    def __init__(self, img_paths, labels, mode='train'):
        self.img_paths = img_paths
        self.labels = labels
        self.mode=mode
    def __len__(self):
        return len(self.img_paths)
    def __getitem__(self, idx):
        img = self.img_paths[idx]
        if self.mode=='train':
            augmentation = random.randint(0,2)
            if augmentation==1:
                img = img[::-1].copy()
            elif augmentation==2:
                img = img[:,::-1].copy()
        img = transforms.ToTensor()(img)
        if self.mode=='test':
            pass
        
        label = self.labels[idx]
        return img, label
    
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.model = timm.create_model('efficientnet_b0', pretrained=True, num_classes=88)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [9]:
batch_size = 32
epochs = 25

# Train
train_dataset = Custom_dataset(np.array(train_imgs), np.array(train_labels), mode = 'train')
train_loader = DataLoader(train_dataset, shuffle = True, batch_size = batch_size)

# Test
test_dataset = Custom_dataset(np.array(test_imgs), np.array(["tmp"] * len(test_imgs)), mode = 'test')
test_loader = DataLoader(test_dataset, shuffle = False, batch_size = batch_size)

In [22]:
def score_function(real, pred):
    score = f1_score(real, pred, average = "macro")
    return score

model = Network().to(device)

optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
criterion = nn.CrossEntropyLoss()
scaler = torch.cuda.amp.GradScaler() 



best = 0
for epoch in range(epochs):
    start = time.time()
    train_loss = 0
    train_pred = []
    train_y = []
    model.train()
    for batch in (train_loader):
        optimizer.zero_grad()
        x = torch.tensor(batch[0], dtype = torch.float32, device=device)
        y = torch.tensor(batch[1], dtype = torch.long, device=device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        loss = criterion(pred, y)


        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss += loss.item() / len(train_loader)
        train_pred += pred.argmax(1).detach().cpu().numpy().tolist()
        train_y += y.detach().cpu().numpy().tolist()
        
    
    train_f1 = score_function(train_y, train_pred)

    TIME = time.time() - start
    print(f'epoch : {epoch + 1} / {epochs}    time : {TIME:.0f}s / {TIME*(epochs - epoch - 1):.0f}s')
    print(f'TRAIN    loss : {train_loss:.5f}    F1 : {train_f1:.5f}')

epoch : 1 / 25    time : 0s / 9s
TRAIN    loss : 5.39453    F1 : 0.00000
epoch : 2 / 25    time : 0s / 8s
TRAIN    loss : 5.25391    F1 : 0.00000
epoch : 3 / 25    time : 0s / 7s
TRAIN    loss : 3.05273    F1 : 0.14286
epoch : 4 / 25    time : 0s / 7s
TRAIN    loss : 1.25391    F1 : 1.00000
epoch : 5 / 25    time : 0s / 7s
TRAIN    loss : 0.44385    F1 : 1.00000
epoch : 6 / 25    time : 0s / 6s
TRAIN    loss : 0.18372    F1 : 1.00000
epoch : 7 / 25    time : 0s / 6s
TRAIN    loss : 0.10565    F1 : 1.00000
epoch : 8 / 25    time : 0s / 6s
TRAIN    loss : 0.01605    F1 : 1.00000
epoch : 9 / 25    time : 0s / 5s
TRAIN    loss : 0.17371    F1 : 1.00000
epoch : 10 / 25    time : 0s / 5s
TRAIN    loss : 0.00589    F1 : 1.00000
epoch : 11 / 25    time : 0s / 5s
TRAIN    loss : 0.00351    F1 : 1.00000
epoch : 12 / 25    time : 0s / 4s
TRAIN    loss : 0.00594    F1 : 1.00000
epoch : 13 / 25    time : 0s / 4s
TRAIN    loss : 0.00653    F1 : 1.00000
epoch : 14 / 25    time : 0s / 4s
TRAIN    loss

In [41]:
model.eval()
f_pred = []

with torch.no_grad():
    for batch in (test_loader):
        x = torch.tensor(batch[0], dtype = torch.float32, device = device)
        with torch.cuda.amp.autocast():
            pred = model(x)
        f_pred.extend(pred.argmax(1).detach().cpu().numpy().tolist())

In [45]:
pred_sum = 0
for i in f_pred:
    pred_sum = pred_sum + i
print(pred_sum)

12


In [36]:
label_decoder = {val:key for key, val in label_unique.items()}
label_decoder

{0: 0, 1: 1}

In [35]:
label_decoder = {val:key for key, val in label_unique.items()}

f_result = [label_decoder[result] for result in f_pred]

KeyError: 12

In [46]:
submission = pd.read_csv("./zindi/SampleSubmission.csv")
submission["Label"] = f_pred
submission

Unnamed: 0,Image_id,Label
0,id_00exusbkgzw1b.jpg,0
1,id_03dqinf6w0znv.jpg,0
2,id_046yl0cxn3ybz.jpg,0
3,id_04athdtx2abyg.jpg,0
4,id_062aauf9e9jk0.jpg,0
...,...,...
1075,id_zv5fvjnakvf1r.jpg,0
1076,id_zvpikh1z30arn.jpg,0
1077,id_zypilwkudljyz.jpg,0
1078,id_zz9lwehh5sxdp.jpg,0


In [48]:
np.sum(submission['Label'] == )

1079

In [14]:
submission.to_csv('Sub_imbal_b0.csv', index = False)

In [38]:
perfect = pd.read_csv('./zindi/submission_csv/Perfect_AUC.csv')

print(np.sum(submission["Label"] == perfect['Label']), len(submission["Label"]))

542 1080


In [39]:
542/1080

0.5018518518518519