## Import Libraries

In [1]:
import wandb

wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33myup[0m (use `wandb login --relogin` to force relogin)


True

In [1]:
import os, torch, copy, cv2, sys, random
# from datetime import datetime, timezone, timedelta
from PIL import Image
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
from torchvision import models

import matplotlib.pyplot as plt
%matplotlib inline

## Set Arguments & hyperparameters

In [2]:
# 시드(seed) 설정

RANDOM_SEED = 2022

torch.manual_seed(RANDOM_SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(RANDOM_SEED)
random.seed(RANDOM_SEED)

In [3]:
# parameters

### 데이터 디렉토리 설정 ###
DATA_DIR= './data'
NUM_CLS = 2

EPOCHS = 40
BATCH_SIZE = 32
LEARNING_RATE = 5*1e-4
EARLY_STOPPING_PATIENCE = 15
INPUT_SHAPE = 255

os.environ["CUDA_VISIBLE_DEVICES"]="0"
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Dataloader
- data augmetation : grayscale, invert, hotizonflip
- 384 pixel 데이터 resize to 255 : model 크기가 너무 커지는걸 방지

#### Train & Validation Set loader

In [5]:
class CustomDataset(Dataset):
    def __init__(self, data_dir, input_shape, invert=False, aug='normal'):
        self.data_dir = data_dir
        # self.mode = mode
        self.input_shape = input_shape
        self.invert = invert
        
        # Loading dataset
        self.db = self.data_loader()
        
        # Dataset split
        # if self.mode == 'train':
        #     self.db = self.db[:int(len(self.db) * 0.9)]
        # elif self.mode == 'val':
        #     self.db = self.db[int(len(self.db) * 0.9):]
        #     self.db.reset_index(inplace=True)
        # else:
        #     print(f'!!! Invalid split {self.mode}... !!!')
            
        # Transform function
        if aug=='normal' :
            self.transform = transforms.Compose([transforms.Resize(self.input_shape),
                                                 transforms.Grayscale(),
                                                 transforms.ToTensor()]
                                                ) # [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
        elif aug=='horizon' :
            self.transform = transforms.Compose([transforms.Resize(self.input_shape),
                                                 transforms.RandomHorizontalFlip(p=1.0),
                                                 transforms.Grayscale(),
                                                 transforms.ToTensor()]
                                                )
    

    def data_loader(self):
        print('Loading dataset..')
        if not os.path.isdir(self.data_dir):
            print(f'!!! Cannot find {self.data_dir}... !!!')
            sys.exit()
        
        # (COVID : 1, No : 0)
        db = pd.read_csv(os.path.join(self.data_dir, 'train.csv'))
        
        return db

    def __len__(self):
        return len(self.db)

    def __getitem__(self, index):
        data = copy.deepcopy(self.db.loc[index])

        # Loading image
        cvimg = cv2.imread(os.path.join(self.data_dir,'train',data['file_name']), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        if not isinstance(cvimg, np.ndarray):
            raise IOError("Fail to read %s" % data['file_name'])

        # Preprocessing images
        trans_image = self.transform(Image.fromarray(cvimg))
        if self.invert :
            trans_image = 1 - trans_image

        return trans_image, torch.FloatTensor([1,0]) if data['COVID']==0 else torch.FloatTensor([0,1])


## Model
- resnet50, resnet101, vgg11 사용해본 결과 resnet50이 제일 성능이 좋았음
- resnet50의 conv1 layer와 fc layer를 수정하여 사용

In [6]:
resnet = models.resnet50(pretrained=False)

In [36]:
resnet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [7]:
resnet.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

In [8]:
resnet.fc = nn.Sequential(
            nn.Linear(2048,1024,bias=True),
            nn.ReLU(),
            nn.Dropout(0.7),
            nn.Linear(1024, 256),
            nn.ReLU(),
            nn.Dropout(0.7),
            nn.Linear(256,2),
            nn.Softmax(dim=1)
        )

## Utils
### EarlyStopper
- validation loss가 제일 작은 model 저장

In [9]:
class LossEarlyStopper():
    """Early stopper
    
    Attributes:
        patience (int): loss가 줄어들지 않아도 학습할 epoch 수
        patience_counter (int): loss 가 줄어들지 않을 때 마다 1씩 증가, 감소 시 0으로 리셋
        min_loss (float): 최소 loss
        stop (bool): True 일 때 학습 중단

    """

    def __init__(self, patience: int)-> None:
        self.patience = patience

        self.patience_counter = 0
        self.min_loss = np.Inf
        self.stop = False
        self.save_model = False

    def check_early_stopping(self, loss: float)-> None:
        """Early stopping 여부 판단"""  

        if self.min_loss == np.Inf:
            self.min_loss = loss
            return None

        elif loss > self.min_loss:
            self.patience_counter += 1
            self.save_model = False
            msg = f"Early stopping counter {self.patience_counter}/{self.patience}"

            if self.patience_counter == self.patience:
                self.stop = True
                
        elif loss <= self.min_loss:
            self.patience_counter = 0
            self.save_model = True
            msg = f"Validation loss decreased {self.min_loss} -> {loss}"
            self.min_loss = loss
        
        print(msg)

### Trainer

In [10]:
class Trainer():
    """ epoch에 대한 학습 및 검증 절차 정의"""
    
    def __init__(self, loss_fn, model, device, metric_fn, optimizer=None, scheduler=None):
        """ 초기화
        """
        self.loss_fn = loss_fn
        self.model = model
        self.device = device
        self.optimizer = optimizer
        self.scheduler = scheduler
        self.metric_fn = metric_fn

    def train_epoch(self, dataloader, epoch_index):
        """ 한 epoch에서 수행되는 학습 절차"""
        
        self.model.train()
        train_total_loss = 0
        target_lst = []
        pred_lst = []

        for batch_index, (img, label) in enumerate(dataloader):
            img = img.to(self.device)
            label = label.to(self.device).float()
            
            pred = self.model(img)
            
            loss = self.loss_fn(pred, label)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            self.scheduler.step()
            
            train_total_loss += loss.item()
            target_lst.extend(label.argmax(dim=1).cpu().tolist())
            pred_lst.extend(pred.argmax(dim=1).cpu().tolist())
        self.train_mean_loss = train_total_loss / batch_index
        self.train_score, f1 = self.metric_fn(y_pred=pred_lst, y_answer=target_lst)
        wandb.log({'train_acc':self.train_score,'train_loss':self.train_mean_loss})
        msg = f'Epoch {epoch_index}, Train loss: {self.train_mean_loss}, Acc: {self.train_score}, F1-Macro: {f1}'
        print(msg)

    def validate_epoch(self, dataloader, epoch_index):
        """ 한 epoch에서 수행되는 검증 절차
        """
        self.model.eval()
        val_total_loss = 0
        target_lst = []
        pred_lst = []

        for batch_index, (img, label) in enumerate(dataloader):
            img = img.to(self.device)
            label = label.to(self.device).float()
            pred = self.model(img)
            
            loss = self.loss_fn(pred, label)
            val_total_loss += loss.item()
            target_lst.extend(label.argmax(dim=1).cpu().tolist())
            pred_lst.extend(pred.argmax(dim=1).cpu().tolist())
        self.val_mean_loss = val_total_loss / batch_index
        self.validation_score, f1 = self.metric_fn(y_pred=pred_lst, y_answer=target_lst)
        wandb.log({'validation_acc':self.validation_score,'validation_loss':self.val_mean_loss})
        msg = f'Epoch {epoch_index}, Val loss: {self.val_mean_loss}, Acc: {self.validation_score}, F1-Macro: {f1}'
        print(msg)

### Metrics

In [11]:
from sklearn.metrics import accuracy_score, f1_score

def get_metric_fn(y_pred, y_answer):
    """ 성능을 반환하는 함수"""
    
    assert len(y_pred) == len(y_answer), 'The size of prediction and answer are not same.'
    accuracy = accuracy_score(y_answer, y_pred)
    f1 = f1_score(y_answer, y_pred, average='macro')
    return accuracy, f1

## Train
### 학습을 위한 객체 선언

#### Load Dataset & Dataloader

In [12]:
augmentation = ['normal','horizon']
d_set = []
for c in augmentation:
    d = CustomDataset(data_dir=DATA_DIR, input_shape=INPUT_SHAPE, aug=c)
    d_invert = CustomDataset(data_dir=DATA_DIR, input_shape=INPUT_SHAPE, invert=True, aug=c)
    d_set.append(d)
    d_set.append(d_invert)

Loading dataset..
Loading dataset..
Loading dataset..
Loading dataset..


In [13]:
N = len(d_set[0])
n = len(d_set[0])//25
train_1= [i for i in range(N)]
train_2= [i for i in range(N)]
train_3= [i for i in range(N)]
train_4= [i for i in range(N)]
valid_1 = [i for i in range(n)]
valid_2 = [i for i in range(2*n,3*n)]
valid_3 = [i for i in range(5*n,6*n)]
valid_4 = [i for i in range(7*n,8*n)]

In [14]:
for i in range(25) :
    train_1.remove(valid_1[i])
    train_2.remove(valid_2[i])
    train_3.remove(valid_3[i])
    train_4.remove(valid_4[i])

In [15]:
train_d_set = []
valid_d_set = []
for i in range(4) :
    train_d_set.append(torch.utils.data.Subset(d_set[i],globals()[f'train_{i+1}']))
    valid_d_set.append(torch.utils.data.Subset(d_set[i],globals()[f'valid_{i+1}']))

In [16]:
train_dataset = torch.utils.data.ConcatDataset(train_d_set)
valid_dataset = torch.utils.data.ConcatDataset(valid_d_set)

In [17]:
# Load dataset & dataloader
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
validation_dataloader = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True)
print('Train set samples:',len(train_dataset),  'Val set samples:', len(valid_dataset))

Train set samples: 2484 Val set samples: 100


In [71]:
from torchvision.utils import save_image
save_image(1-train_dataset[16][0],'bbb.png')

#### Load model and other utils

In [18]:
# Load Model
model = resnet.to(DEVICE)

# # Save Initial Model
# torch.save(model.state_dict(), 'initial.pt')

# Set optimizer, scheduler, loss function, metric function
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-5)
scheduler =  optim.lr_scheduler.OneCycleLR(optimizer=optimizer, pct_start=0.1, div_factor=1e5, max_lr=0.0001, epochs=EPOCHS, steps_per_epoch=len(train_dataloader))
loss_fn = nn.BCELoss()
metric_fn = get_metric_fn


# Set trainer
trainer = Trainer(loss_fn, model, DEVICE, metric_fn, optimizer, scheduler)

# Set earlystopper
early_stopper = LossEarlyStopper(patience=EARLY_STOPPING_PATIENCE)

### epoch 단위 학습 진행

In [None]:
wandb.init(entity='pnm-team',
           project='week1_Image',
           name='ResNet50_augall_v13',
           config={'learning_rate':5*1e-4,
                   'batch_size':32,
                   'epochs':40,
                   'input_size':255,
                   'Augmentation': ['Grayscale','Invert','RandomHorizontalFlip']},
           group='resnet50'
           )
for epoch_index in tqdm(range(EPOCHS)):

    trainer.train_epoch(train_dataloader, epoch_index)
    trainer.validate_epoch(validation_dataloader, epoch_index)

    # early_stopping check
    early_stopper.check_early_stopping(loss=trainer.val_mean_loss)

    if early_stopper.stop:
        print('Early stopped')
        break

    if early_stopper.save_model:
        check_point = {
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'scheduler': scheduler.state_dict()
        }
        torch.save(check_point, 'v13_ResNet50.pt')
wandb.finish()

  0% 0/40 [00:00<?, ?it/s]

Epoch 0, Train loss: 0.7258137952197682, Acc: 0.5, F1-Macro: 0.49653743092975966


  2% 1/40 [05:15<3:25:21, 315.95s/it]

Epoch 0, Val loss: 0.9441876610120138, Acc: 0.49, F1-Macro: 0.34539853677319987
Epoch 1, Train loss: 0.709755898296059, Acc: 0.5237520128824477, F1-Macro: 0.5119046017373677


  5% 2/40 [09:58<3:13:45, 305.94s/it]

Epoch 1, Val loss: 0.9753856658935547, Acc: 0.5, F1-Macro: 0.4832575444398512
Early stopping counter 1/15
Epoch 2, Train loss: 0.6994163943575574, Acc: 0.5567632850241546, F1-Macro: 0.5471997629113557
Epoch 2, Val loss: 0.8776026566823324, Acc: 0.53, F1-Macro: 0.46438746438746437
Validation loss decreased 0.9441876610120138 -> 0.8776026566823324


  8% 3/40 [14:40<3:04:15, 298.79s/it]

Epoch 3, Train loss: 0.6921268546736086, Acc: 0.571658615136876, F1-Macro: 0.5633820407143749


 10% 4/40 [19:24<2:56:37, 294.38s/it]

Epoch 3, Val loss: 0.8853931228319804, Acc: 0.63, F1-Macro: 0.609333755675219
Early stopping counter 1/15
Epoch 4, Train loss: 0.6463371015988387, Acc: 0.6481481481481481, F1-Macro: 0.646290822072072
Epoch 4, Val loss: 0.8058781226476034, Acc: 0.58, F1-Macro: 0.5543293718166383
Validation loss decreased 0.8776026566823324 -> 0.8058781226476034


 12% 5/40 [23:59<2:48:16, 288.47s/it]

Epoch 5, Train loss: 0.6117360812503022, Acc: 0.6815619967793881, F1-Macro: 0.6788802638195821
Epoch 5, Val loss: 0.7436487972736359, Acc: 0.7, F1-Macro: 0.7000000000000001
Validation loss decreased 0.8058781226476034 -> 0.7436487972736359


 15% 6/40 [28:37<2:41:39, 285.28s/it]

Epoch 6, Train loss: 0.5190450471717042, Acc: 0.7471819645732689, F1-Macro: 0.7461286685536179
Epoch 6, Val loss: 0.5087483872969946, Acc: 0.82, F1-Macro: 0.8188405797101448
Validation loss decreased 0.7436487972736359 -> 0.5087483872969946


 18% 7/40 [33:44<2:40:29, 291.80s/it]

Epoch 7, Train loss: 0.47426263852552936, Acc: 0.7950885668276972, F1-Macro: 0.7940664512248374


 20% 8/40 [38:53<2:38:26, 297.07s/it]

Epoch 7, Val loss: 0.9700694680213928, Acc: 0.69, F1-Macro: 0.6846709388668497
Early stopping counter 1/15
Epoch 8, Train loss: 0.4100865437225862, Acc: 0.8293075684380032, F1-Macro: 0.828541846419327


 22% 9/40 [44:08<2:36:16, 302.48s/it]

Epoch 8, Val loss: 0.5426599681377411, Acc: 0.82, F1-Macro: 0.8197115384615385
Early stopping counter 2/15
Epoch 9, Train loss: 0.2963409696693544, Acc: 0.8828502415458938, F1-Macro: 0.8822658886177361


 25% 10/40 [49:13<2:31:31, 303.06s/it]

Epoch 9, Val loss: 0.6553780535856882, Acc: 0.78, F1-Macro: 0.7785829307568437
Early stopping counter 3/15
Epoch 10, Train loss: 0.22425770962780173, Acc: 0.9142512077294686, F1-Macro: 0.9139415636327262


 28% 11/40 [54:20<2:27:05, 304.33s/it]

Epoch 10, Val loss: 0.5178041358788809, Acc: 0.84, F1-Macro: 0.8376623376623377
Early stopping counter 4/15
Epoch 11, Train loss: 0.1936799128624526, Acc: 0.926731078904992, F1-Macro: 0.9263613974277114
Epoch 11, Val loss: 0.43829717735449475, Acc: 0.86, F1-Macro: 0.859775641025641
Validation loss decreased 0.5087483872969946 -> 0.43829717735449475


 30% 12/40 [59:07<2:19:39, 299.28s/it]

Epoch 12, Train loss: 0.13270739393381328, Acc: 0.9533011272141707, F1-Macro: 0.9531209428393861
Epoch 12, Val loss: 0.37762781977653503, Acc: 0.91, F1-Macro: 0.9097744360902256
Validation loss decreased 0.43829717735449475 -> 0.37762781977653503


 32% 13/40 [1:03:46<2:11:52, 293.07s/it]

Epoch 13, Train loss: 0.11167179310525005, Acc: 0.9577294685990339, F1-Macro: 0.9575684899916983


 35% 14/40 [1:08:29<2:05:43, 290.15s/it]

Epoch 13, Val loss: 1.6339832941691081, Acc: 0.72, F1-Macro: 0.6961805555555556
Early stopping counter 1/15
Epoch 14, Train loss: 0.1088668822202009, Acc: 0.962157809983897, F1-Macro: 0.9620155805485207


 38% 15/40 [1:13:40<2:03:24, 296.17s/it]

Epoch 14, Val loss: 0.6363657141725222, Acc: 0.82, F1-Macro: 0.8200000000000001
Early stopping counter 2/15
Epoch 15, Train loss: 0.0900808692389688, Acc: 0.9694041867954911, F1-Macro: 0.9692891927839102


 40% 16/40 [1:18:26<1:57:19, 293.33s/it]

Epoch 15, Val loss: 1.1283962031205494, Acc: 0.78, F1-Macro: 0.7688104245481294
Early stopping counter 3/15


## Inference
### 모델 로드

In [None]:
TRAINED_MODEL_PATH = 'v13_ResNet50.pt'

### Load dataset

In [None]:
class TestDataset(Dataset):
    def __init__(self, data_dir, input_shape):
        self.data_dir = data_dir
        self.input_shape = input_shape
        
        # Loading dataset
        self.db = self.data_loader()
        
        # Transform function
        self.transform = transforms.Compose([transforms.Resize(self.input_shape),
                                             transforms.Grayscale(),
                                             transforms.ToTensor()]
                                            )

    def data_loader(self):
        print('Loading test dataset..')
        if not os.path.isdir(self.data_dir):
            print(f'!!! Cannot find {self.data_dir}... !!!')
            sys.exit()
        
        db = pd.read_csv(os.path.join(self.data_dir, 'sample_submission.csv'))
        return db
    
    def __len__(self):
        return len(self.db)
    
    def __getitem__(self, index):
        data = copy.deepcopy(self.db.loc[index])
        
        # Loading image
        cvimg = cv2.imread(os.path.join(self.data_dir,'test',data['file_name']), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        if not isinstance(cvimg, np.ndarray):
            raise IOError("Fail to read %s" % data['file_name'])

        # Preprocessing images
        trans_image = self.transform(Image.fromarray(cvimg))

        return trans_image, data['file_name']

In [None]:
# Load dataset & dataloader
test_dataset = TestDataset(data_dir=DATA_DIR, input_shape=INPUT_SHAPE)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=100, shuffle=False)

### 추론 진행

In [None]:
model.load_state_dict(torch.load(TRAINED_MODEL_PATH)['model'])

# Prediction
file_lst = []
pred_lst = []
model.eval()
with torch.no_grad():
    for batch_index, (img, file_num) in tqdm(enumerate(test_dataloader)):
        img = img.to(DEVICE)
        pred = model(img)
        print(pred)
        file_lst.extend(list(file_num))
        pred_lst.extend(pred.argmax(dim=1).tolist())

### 결과 저장

In [None]:
df = pd.DataFrame({'file_name':file_lst, 'COVID':pred_lst})
# df.sort_values(by=['file_name'], inplace=True)
df.to_csv('submission_v013.csv', index=False)

In [None]:
df