In [None]:
!pip3 install -r requirements.txt
# !pip install -r requirements.txt

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split

import random
import numpy as np
import pandas as pd
import os
import timm

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from glob import glob
from PIL import Image
import torchvision
import sys
import albumentations as A
from albumentations.pytorch import ToTensorV2

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# seed 고정 함수 및 seed 고정
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    print(f"SUCCESS {seed} SEED FIXING")

In [3]:
class CTDataset(Dataset):
    def __init__(self, mode, transforms):
        self.base_path = os.path.join('../data/CT', mode)
        self.img_lists = list(glob(os.path.join(self.base_path, '*/*.png')))
        self.transforms = transforms
    
    def __len__(self):
        return len(self.img_lists)
    
    def __getitem__(self, idx):
        label = 0
        img_path = self.img_lists[idx]
        image = Image.open(img_path)
        np_img = self.transforms(image=np.array(image))['image']
        if 'Normal' in img_path:
            return np_img, torch.tensor(0)
        elif 'Lung_Cancer' in img_path:
            return np_img, torch.tensor(1)
        elif 'ILD' in img_path:
            return np_img, torch.tensor(2)
        elif 'pneumonia' in img_path:
            return np_img, torch.tensor(3)
        elif 'pneumothorax' in img_path:
            return np_img, torch.tensor(4)

In [4]:
seed_everything(42)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

SUCCESS 42 SEED FIXING


In [5]:
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

train_transforms = A.Compose([
    A.Normalize(mean=mean, std=std, p=1.0),
    ToTensorV2(p=1.0),
], p=1.0)

test_transforms = A.Compose([
    A.Normalize(mean=mean, std=std, p=1.0),
    ToTensorV2(p=1.0),
], p=1.0)

In [6]:
train_dataset = CTDataset('Train', transforms=train_transforms)
valid_dataset = CTDataset('Validation', transforms=test_transforms)

train_loader = DataLoader(train_dataset, batch_size=16, num_workers=2, drop_last=True, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, num_workers=2, drop_last=False, shuffle=False)

In [7]:
model = timm.create_model('densenet121', pretrained=True, num_classes=5)
model.to(device)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNormAct2d(
      64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (drop): Identity()
      (act): ReLU(inplace=True)
    )
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): DenseBlock(
      (denselayer1): DenseLayer(
        (norm1): BatchNormAct2d(
          64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU(inplace=True)
        )
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNormAct2d(
          128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): ReLU(inplace=True)
        )
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
  

In [8]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 10)

In [9]:
# training
print ('Start Training')
print ('-'*30)

best_val_f1 = 0
early_stop_cnt = 0

model.train()
for epoch in range(10):  

    for idx, data in enumerate(train_loader):
        train_data, train_labels = data
        train_data, train_labels = train_data.cuda(), train_labels.cuda()
        y_pred = model(train_data)
        # y_pred = torch.sigmoid(y_pred)
        loss = criterion(y_pred, train_labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    # validation
    model.eval()
    with torch.no_grad():    
        test_pred = []
        test_true = [] 
        for jdx, data in enumerate(valid_loader):
            test_data, test_labels = data
            test_data = test_data.cuda()
            y_pred = model(test_data)
            y_pred = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
            test_pred.append(y_pred.detach().cpu().numpy())
            test_true.append(test_labels.detach().cpu().numpy())

        test_true = np.concatenate(test_true)
        test_pred = np.concatenate(test_pred)
        val_f1 = f1_score(test_true, test_pred, average='macro')

        if best_val_f1 < val_f1:
            best_val_f1 = val_f1
            early_stop_cnt = 0
            torch.save(model.state_dict(), '../result_ct/high_score.pth')
        else:
            early_stop_cnt += 1
            if early_stop_cnt == 3:
                print('Epoch=%s, BatchID=%s, Val_F1=%.4f, Best_Val_F1=%.4f'%(epoch, idx, val_f1, best_val_f1))
                sys.exit()

        print('Epoch=%s, BatchID=%s, Val_F1=%.4f, Best_Val_F1=%.4f'%(epoch, idx, val_f1, best_val_f1))
    
    model.train()
    scheduler.step()

Start Training
------------------------------
Epoch=0, BatchID=499, Val_F1=0.9702, Best_Val_F1=0.9702
Epoch=1, BatchID=499, Val_F1=0.9548, Best_Val_F1=0.9702
Epoch=2, BatchID=499, Val_F1=0.9746, Best_Val_F1=0.9746
Epoch=3, BatchID=499, Val_F1=0.9910, Best_Val_F1=0.9910
Epoch=4, BatchID=499, Val_F1=0.9942, Best_Val_F1=0.9942
Epoch=5, BatchID=499, Val_F1=0.9926, Best_Val_F1=0.9942
Epoch=6, BatchID=499, Val_F1=0.9958, Best_Val_F1=0.9958
Epoch=7, BatchID=499, Val_F1=0.9974, Best_Val_F1=0.9974
Epoch=8, BatchID=499, Val_F1=0.9958, Best_Val_F1=0.9974
Epoch=9, BatchID=499, Val_F1=0.9990, Best_Val_F1=0.9990


In [10]:
class CTData_testset(Dataset):
    def __init__(self, transforms):
        self.img_lists = list(glob('../data/CT/Test/*.png'))
        self.transforms = transforms
    
    def __len__(self):
        return len(self.img_lists)
    
    def __getitem__(self, idx):
        img_path = self.img_lists[idx]
        image = Image.open(img_path)
        img = self.transforms(image=np.array(image))['image']
        return img, img_path

In [11]:
test_dataset = CTData_testset(test_transforms)
test_loader = DataLoader(test_dataset, batch_size=16, num_workers=2, drop_last=False, shuffle=False)

In [12]:
model = timm.create_model('densenet121', pretrained=True, num_classes=5)
model.to(device)
model.load_state_dict(torch.load('../result_ct/high_score.pth'))

<All keys matched successfully>

In [None]:
# training
print ('Start Testing')
print ('-'*30)

model.eval()
with torch.no_grad():    
    test_pred = []
    data_path_lst = []
    for jdx, (test_data, data_path) in enumerate(test_loader):
        test_data = test_data.cuda()
        y_pred = model(test_data)
        y_pred = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        test_pred.append(y_pred.detach().cpu().numpy())
        data_path_lst.append(data_path)

    test_pred = np.concatenate(test_pred)
    data_path_lst = np.concatenate(data_path_lst)

pred_info = {}

for pred, img_path in zip(test_pred, data_path_lst):
    file_name = img_path.split('/')[-1]
    pred_info[file_name] = pred

sort_pred = dict(sorted(pred_info.items()))
print(sort_pred.items())


submission = pd.read_csv('../result_ct/1001_sample_submission.csv')
submission['result'] = sort_pred.values()

submission.to_csv('../result_ct/den121_high_score.csv', index = False)
submission.head()