In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import random
import numpy as np
import pandas as pd
import os
import timm

from libauc.losses import AUCMLoss, AUCM_MultiLabel
from libauc.optimizers import PESG

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from glob import glob
from PIL import Image
import torchvision
import torchvision.transforms as tfs
import sys

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# seed 고정 함수 및 seed 고정
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    print(f"SUCCES {seed} SEED FIXING")

seed_everything(42)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

SUCCES 42 SEED FIXING


In [3]:
data_dir = '/home/competition/dataset/Xray/'
meat_data = data_dir + 'meatdata.csv'
test_dir = data_dir + 'test'

In [4]:
def dataframe_set(meat_data):
    '''
    meatdata를 DataFrame으로 변환하고 train과 valid의 index를 미리 나눔
    seed image로부터 4가지 이미지(normal, pleural_effusion, pneumonia, pneumothorax)를 생성하므로
    seed 번호를 뽑음 (seed_list 함수에서 받아옴)
    '''
    meat_data = meat_data
    df = pd.read_csv(meat_data)
    data_length = int(len(df)/4)
    # index setting
    total_indices = [i for i in range(data_length)]
    random.shuffle(total_indices)
    train_indices = total_indices[:int(data_length*0.8)]
    valid_indices = list(set(total_indices) - set(train_indices))
    # get seed list
    train_seed_list = seed_list(df, train_indices)
    valid_seed_list = seed_list(df, valid_indices)

    return df, train_seed_list, valid_seed_list


def seed_list(df, indices):
    '''
    seed 번호 뽑는 함수
    '''
    # seed number list
    seed_list = df["Filename"].tolist()
    select_seed_list = []
    for i in seed_list:
        if int(i.strip('seed'))+1 in indices:
            select_seed_list.append(i)

    return select_seed_list

In [5]:
def dataframe_handling(df):
    df_normal = df[((df['Conditioned'] == 'normal') & (df['Pleural_effusion'] < 20)) &
                   ((df['Conditioned'] == 'normal') & (df['Pneumonia'] < 20)) &
                   ((df['Conditioned'] == 'normal') & (df['Pneumothorax'] < 20))].copy()
    df_abnormal = df[((df['Conditioned'] == 'pleural_effusion') & (df['Pleural_effusion'] > 20)) | 
                        ((df['Conditioned'] == 'pneumonia') & (df['Pneumonia'] > 20)) |
                      ((df['Conditioned'] == 'pneumothorax') & (df['Pneumothorax'] > 20))].copy()
    dataframe = pd.concat([df_normal, df_abnormal])
    dataframe = dataframe.sample(frac=1).reset_index(drop=True)
    train_df = dataframe[:int(len(dataframe)*0.8)]
    valid_df = dataframe[int(len(dataframe)*0.8):].reset_index(drop=True)
    return train_df, valid_df

In [7]:
class XrayDataset(Dataset):
    '''
    version 3
    '''
    def __init__(self, data_dir, meat_data, mode):
        self.data_dir = data_dir
        self.meat_data = meat_data
        self.mode = mode
        self.df, self.train_seed_list, self.valid_seed_list = dataframe_set(self.meat_data)
        self.train_df, self.valid_df = dataframe_handling(self.df)
    
    def __len__(self):
        if 'train' == self.mode:
            return len(self.train_df)
        elif 'valid' == self.mode:
            return len(self.valid_df)
    
    def __getitem__(self, idx):
        if 'train' == self.mode:
            self.dataframe = self.train_df
        elif 'valid' == self.mode:
            self.dataframe = self.valid_df
        img_path = self.dataframe["Path"].tolist()
        img_label = self.dataframe["Abnormality"].tolist()
        # to tensor, image
        image = Image.open(self.data_dir + img_path[idx]).resize((224,224))
        image = tfs.ToTensor()(image)
        img = tfs.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(image)
        label = torch.tensor(img_label[idx])

        return img, label

In [16]:
class XrayTestset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.img_lists = list(glob(os.path.join(self.data_dir, '*.png')))
        self.img_list_f = sorted(self.img_lists)

    def __len__(self):
        return len(self.img_lists)
    
    def __getitem__(self, idx):
        label = 0
        img_path = self.img_list_f[idx]
        im = Image.open(img_path).resize((224,224))
        img = torchvision.transforms.ToTensor()(im)
        # 1차원 image -> 3차원
        imgs = []
        for i in range(3):
            imgs.append(img)
        image = torch.cat(imgs)
        image = tfs.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(image)

        return image, img_path

In [17]:
train_dataset = XrayDataset(data_dir, meat_data, 'train')
valid_dataset = XrayDataset(data_dir, meat_data, 'valid')
test_dataset = XrayTestset(test_dir)

In [18]:
train_loader = DataLoader(train_dataset, batch_size=8, num_workers=2, drop_last=True, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=8, num_workers=2, drop_last=False, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, num_workers=2, drop_last=False, shuffle=False)

In [11]:
# model = timm.create_model('swsl_resnet18', pretrained=True, num_classes=2)
model = timm.create_model('efficientnet_b4', pretrained=True, num_classes=2)
# model = timm.create_model('swinv2_tiny_window16_256', pretrained=True, num_classes=2)
model.to(device)

EfficientNet(
  (conv_stem): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn1): BatchNormAct2d(
    48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
        (bn1): BatchNormAct2d(
          48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(48, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNormAct2d(
    

In [12]:
# criterion = AUCMLoss()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 5, gamma=0.7)

In [13]:
# training
print ('Start Training')
print ('-'*30)

best_val_auc = 0
early_stop_cnt = 0
for epoch in range(5):
    # if epoch > 0:
    #     optimizer.update_regularizer(decay_factor=10)    

    for idx, data in enumerate(train_loader):
        model.train()
        train_data, train_labels = data
        train_data, train_labels  = train_data.to(device), train_labels.to(device)
        y_pred = model(train_data)
        y_pred = torch.sigmoid(y_pred)
        loss = criterion(y_pred, train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # validation    
        if idx % 500 == 0:
            model.eval()
            with torch.no_grad():    
                test_pred = []
                test_true = [] 
                for jdx, data in enumerate(valid_loader):
                    test_data, test_labels = data
                    test_data = test_data.to(device)
                    y_pred = model(test_data)
                    y_pred = torch.sigmoid(y_pred)
                    y_pred = torch.argmax(y_pred, dim=1)
                    test_pred.append(y_pred.cpu().detach().numpy())
                    test_true.append(test_labels.numpy())

                test_true = np.concatenate(test_true)
                test_pred = np.concatenate(test_pred)
                val_auc = roc_auc_score(test_true, test_pred)

                if best_val_auc < val_auc:
                    best_val_auc = val_auc
                    early_stop_cnt = 0
                    torch.save(model.state_dict(), '../result_xray/xray_juppak_efficientnet_b4.pth')
                else:
                    early_stop_cnt += 1
                    if early_stop_cnt == 5:
                        print('Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f'%(epoch, idx, val_auc, best_val_auc))
                        break

                print ('Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f'%(epoch, idx, val_auc, best_val_auc))

Start Training
------------------------------
Epoch=0, BatchID=0, Val_AUC=0.5027, Best_Val_AUC=0.5027
Epoch=0, BatchID=500, Val_AUC=0.9159, Best_Val_AUC=0.9159
Epoch=0, BatchID=1000, Val_AUC=0.9295, Best_Val_AUC=0.9295
Epoch=0, BatchID=1500, Val_AUC=0.9536, Best_Val_AUC=0.9536
Epoch=0, BatchID=2000, Val_AUC=0.9514, Best_Val_AUC=0.9536
Epoch=0, BatchID=2500, Val_AUC=0.9718, Best_Val_AUC=0.9718
Epoch=0, BatchID=3000, Val_AUC=0.9688, Best_Val_AUC=0.9718
Epoch=0, BatchID=3500, Val_AUC=0.9577, Best_Val_AUC=0.9718
Epoch=0, BatchID=4000, Val_AUC=0.9641, Best_Val_AUC=0.9718
Epoch=1, BatchID=0, Val_AUC=0.9700, Best_Val_AUC=0.9718
Epoch=1, BatchID=500, Val_AUC=0.9698, Best_Val_AUC=0.9718
Epoch=2, BatchID=500, Val_AUC=0.9665, Best_Val_AUC=0.9718
Epoch=2, BatchID=1000, Val_AUC=0.9759, Best_Val_AUC=0.9759
Epoch=2, BatchID=1500, Val_AUC=0.9691, Best_Val_AUC=0.9759
Epoch=2, BatchID=2000, Val_AUC=0.9663, Best_Val_AUC=0.9759
Epoch=2, BatchID=2500, Val_AUC=0.9716, Best_Val_AUC=0.9759
Epoch=2, BatchID=30

In [19]:
model = timm.create_model('efficientnet_b4', pretrained=True, num_classes=2)
model.to(device)
model.load_state_dict(torch.load('../result_xray/xray_juppak_efficientnet_b4.pth'))

# training
print ('Start Testing')
print ('-'*30)

model.eval()
with torch.no_grad():    
    test_pred = []
    data_path_lst = []
    for jdx, (test_data, data_path) in enumerate(test_loader):
        test_data = test_data.cuda()
        y_pred = model(test_data)
        y_pred = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        test_pred.append(y_pred.detach().cpu().numpy())
        data_path_lst.append(data_path)

    test_pred = np.concatenate(test_pred)
    data_path_lst = np.concatenate(data_path_lst)

pred_info = {}

for pred, img_path in zip(test_pred, data_path_lst):
    file_name = img_path.split('/')[-1]
    pred_info[file_name] = pred

sort_pred = dict(sorted(pred_info.items()))
print(sort_pred.items())


submission = pd.read_csv('../result_xray/1000_sample_submission.csv')
submission['result'] = sort_pred.values()

submission.to_csv('../result_xray/xray_juppak_efficientnet_b4.csv', index = False)
submission.head()

Start Testing
------------------------------


Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f2ff544fdc0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1510, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1493, in _shutdown_workers
    if w.is_alive():
  File "/usr/local/lib/python3.8/multiprocessing/process.py", line 160, in is_alive
    Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f2ff544fdc0>assert self._parent_pid == os.getpid(), 'can only test a child process'

Traceback (most recent call last):
  File "/usr/local/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1510, in __del__
    self._shutdown_workers()AssertionError: 
can only test a child process  File "/usr/local/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1493, in _shutdown_workers
    if w.is_alive():

  File "/usr/loc

dict_items([('0000.png', 0), ('0001.png', 1), ('0002.png', 0), ('0003.png', 0), ('0004.png', 1), ('0005.png', 0), ('0006.png', 0), ('0007.png', 0), ('0008.png', 1), ('0009.png', 1), ('0010.png', 1), ('0011.png', 1), ('0012.png', 0), ('0013.png', 1), ('0014.png', 1), ('0015.png', 0), ('0016.png', 0), ('0017.png', 1), ('0018.png', 1), ('0019.png', 0), ('0020.png', 1), ('0021.png', 1), ('0022.png', 0), ('0023.png', 1), ('0024.png', 0), ('0025.png', 1), ('0026.png', 1), ('0027.png', 1), ('0028.png', 1), ('0029.png', 0), ('0030.png', 1), ('0031.png', 1), ('0032.png', 1), ('0033.png', 1), ('0034.png', 1), ('0035.png', 1), ('0036.png', 1), ('0037.png', 1), ('0038.png', 1), ('0039.png', 1), ('0040.png', 1), ('0041.png', 1), ('0042.png', 0), ('0043.png', 0), ('0044.png', 1), ('0045.png', 1), ('0046.png', 0), ('0047.png', 0), ('0048.png', 0), ('0049.png', 1), ('0050.png', 1), ('0051.png', 1), ('0052.png', 1), ('0053.png', 1), ('0054.png', 1), ('0055.png', 0), ('0056.png', 1), ('0057.png', 1), ('

Unnamed: 0,filename,result
0,0000.png,0
1,0001.png,1
2,0002.png,0
3,0003.png,0
4,0004.png,1
