## 13.3 베이스라인 모델
* EfficientNet b0 이용

In [36]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from glob import glob
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import cv2
import torch
import random
import os
from tqdm.auto import tqdm

### 13.3.1 시드값 고정 및 GPU 장비 설정

In [22]:
# 시드값 고정
seed = 50
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.enabled = False

In [23]:
# GPU 장비 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### 13.3.2 데이터 준비

In [24]:
data_path = '../../data/13_xray/chest_xray/'

train_path = data_path + 'train/'
valid_path = data_path + 'val/'
test_path = data_path + 'test/'

In [25]:
# 데이터 증강을 위한 이미지 변환기 정의
from torchvision import transforms
transform_train = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.CenterCrop(180), # 중앙 이미지 확대
    transforms.RandomHorizontalFlip(0.5), # 좌우 대칭
    transforms.RandomVerticalFlip(0.2), # 상하 대칭
    transforms.RandomRotation(20), # 이미지 회전
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
transform_valid = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.CenterCrop(180), # 중앙 이미지 확대
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

In [41]:
# 데이터셋 및 데이터 로더 생성
from torchvision.datasets import ImageFolder
datasets_train = ImageFolder(root=train_path, transform=transform_train)
datasets_valid = ImageFolder(root=valid_path, transform=transform_valid)

In [42]:
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2 ** 32
    np.random.seed(worker_seed)
    random.seed(worker_seed)
g = torch.Generator()
g.manual_seed(0)

<torch._C.Generator at 0x29825580530>

In [51]:
from torch.utils.data import DataLoader

batch_size = 8
loader_train = DataLoader(dataset=datasets_train, batch_size=batch_size, shuffle=True, worker_init_fn=seed_worker, generator=g, num_workers=0)
loader_valid = DataLoader(dataset=datasets_valid, batch_size=batch_size, shuffle=False, worker_init_fn=seed_worker, generator=g, num_workers=0)

### 13.3.3 모델 생성

In [52]:
import timm
model = timm.create_model('efficientnet_b0', pretrained=True).to(device)

In [53]:
print(f'모델 파라미터 개수 : {sum(param.numel() for param in model.parameters())}')

모델 파라미터 개수 : 5288548


### 13.3.4 모델 훈련 및 성능 검증

In [54]:
# 손실 함수와 옵티마이저 설정
import torch.nn as nn
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [83]:
from sklearn.metrics import accuracy_score,f1_score,recall_score 

# 훈련 함수 작성
def train(model, loader_train, loader_valid, criterion, optimizer, scheduler=None, epochs=10, save_file='model_state_dict.pth'):
    valid_loss_min = np.inf
    for epoch in range(epochs):
        print(f'train epoch [{epoch+1}/{epochs}]\n-------------------------------')
        model.train()
        epoch_train_loss=0
        for images, labels in tqdm(loader_train):
            images = images.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            epoch_train_loss += loss.item()
            loss.backward()
            optimizer.step()
            if scheduler !=None:
                scheduler.step()
        print(f'\t훈련 데이터 손실값 : {epoch_train_loss/len(loader_train):.4f}')
    
        print(f'valid epoch [{epoch+1}/{epochs}]\n-------------------------------')
        model.eval()
        epoch_valid_loss=0
        preds_list = []
        true_list = []
        with torch.no_grad():
            for images, labels in tqdm(loader_valid):
                images = images.to(device)
                labels = labels.to(device)
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                epoch_valid_loss += loss.item()
                
                preds = torch.max(outputs.cpu(), dim=1)[1].numpy()
                true = labels.cpu().numpy()
                
                preds_list.extend(preds)
                true_list.extend(true)
                
            print(f'\tvalid 데이터 손실값 : {epoch_valid_loss/len(loader_valid):.4f}')
            val_accuracy = accuracy_score(true_list, preds_list)
            val_recall = recall_score(true_list, preds_list)
            val_f1_score = f1_score(true_list, preds_list)
            print(f'\t정확도 : {val_accuracy:.4f} / 재현률 : {val_recall:.4f} / F1-score : {val_f1_score:.4f}')
            
            if epoch_valid_loss <= valid_loss_min:
                print(f'\t valid 데이터 loss 감소({valid_loss_min:.4f}=>{epoch_valid_loss:.4f}). 모델 저장')
                torch.save(model.state_dict(), save_file)
                valid_loss_min = epoch_valid_loss
    
    return torch.load(save_file)

In [84]:
# 훈련 및 성능 검증
model_state_dict = train(model=model,loader_train=loader_train, loader_valid=loader_valid, criterion=criterion, optimizer=optimizer)

train epoch [1/10]
-------------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.2132
valid epoch [1/10]
-------------------------------


  0%|          | 0/78 [00:00<?, ?it/s]

	valid 데이터 손실값 : 0.5828
	정확도 : 0.8574 / 재현률 : 0.9513 / F1-score : 0.8929
	 valid 데이터 loss 감소(inf=>45.4574). 모델 저장
train epoch [2/10]
-------------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.1919
valid epoch [2/10]
-------------------------------


  0%|          | 0/78 [00:00<?, ?it/s]

	valid 데이터 손실값 : 4.1133
	정확도 : 0.6891 / 재현률 : 0.9949 / F1-score : 0.8000
train epoch [3/10]
-------------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.1755
valid epoch [3/10]
-------------------------------


  0%|          | 0/78 [00:00<?, ?it/s]

	valid 데이터 손실값 : 0.6792
	정확도 : 0.7596 / 재현률 : 0.9897 / F1-score : 0.8373
train epoch [4/10]
-------------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.1815
valid epoch [4/10]
-------------------------------


  0%|          | 0/78 [00:00<?, ?it/s]

	valid 데이터 손실값 : 0.4804
	정확도 : 0.7981 / 재현률 : 0.9436 / F1-score : 0.8538
	 valid 데이터 loss 감소(45.4574=>37.4750). 모델 저장
train epoch [5/10]
-------------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.1608
valid epoch [5/10]
-------------------------------


  0%|          | 0/78 [00:00<?, ?it/s]

	valid 데이터 손실값 : 1.2790
	정확도 : 0.6490 / 재현률 : 1.0000 / F1-score : 0.7808
train epoch [6/10]
-------------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.1691
valid epoch [6/10]
-------------------------------


  0%|          | 0/78 [00:00<?, ?it/s]

	valid 데이터 손실값 : 0.7847
	정확도 : 0.7901 / 재현률 : 0.9974 / F1-score : 0.8559
train epoch [7/10]
-------------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.1506
valid epoch [7/10]
-------------------------------


  0%|          | 0/78 [00:00<?, ?it/s]

	valid 데이터 손실값 : 0.7997
	정확도 : 0.7869 / 재현률 : 0.9821 / F1-score : 0.8521
train epoch [8/10]
-------------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.1495
valid epoch [8/10]
-------------------------------


  0%|          | 0/78 [00:00<?, ?it/s]

	valid 데이터 손실값 : 0.5790
	정확도 : 0.8253 / 재현률 : 0.9923 / F1-score : 0.8766
train epoch [9/10]
-------------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.1245
valid epoch [9/10]
-------------------------------


  0%|          | 0/78 [00:00<?, ?it/s]

	valid 데이터 손실값 : 0.4293
	정확도 : 0.8766 / 재현률 : 0.9821 / F1-score : 0.9087
	 valid 데이터 loss 감소(37.4750=>33.4870). 모델 저장
train epoch [10/10]
-------------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.1291
valid epoch [10/10]
-------------------------------


  0%|          | 0/78 [00:00<?, ?it/s]

	valid 데이터 손실값 : 1.9292
	정확도 : 0.6795 / 재현률 : 1.0000 / F1-score : 0.7959


In [85]:
sample = torch.rand(4,2)
sample

tensor([[0.1644, 0.5569],
        [0.0744, 0.0333],
        [0.0491, 0.5233],
        [0.3611, 0.0170]])

In [86]:
torch.max(sample, dim=1)

torch.return_types.max(
values=tensor([0.5569, 0.0744, 0.5233, 0.3611]),
indices=tensor([1, 0, 1, 0]))

### 13.3.5 예측 및 평가 결과

In [87]:
datasets_test = ImageFolder(root=test_path, transform=transform_valid)
loader_test = DataLoader(
    dataset=datasets_test, 
    batch_size=batch_size, 
    shuffle=False, 
    worker_init_fn=seed_worker,
    generator=g,
    num_workers=0
)

In [88]:
def predict(model, loader_test, return_true=False):
    model.eval()
    preds_list = []
    true_list = []
    
    with torch.no_grad():
        for images, labels in loader_test:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            preds = torch.max(outputs.cpu(), dim=1)[1].numpy()
            true = labels.cpu().numpy()
            preds_list.extend(preds)
            true_list.extend(true)
    if return_true:
        return true_list, preds_list
    else:
        return preds_list

In [89]:
true_list, preds_list = predict(model=model, loader_test=loader_test, return_true=True)

In [90]:
# 평가 결과
accuracy = accuracy_score(true_list, preds_list)
recall = recall_score(true_list, preds_list)
f1_score = f1_score(true_list, preds_list)
print(f'\t정확도 : {accuracy:.4f} / 재현률 : {recall:.4f} / F1-score : {f1_score:.4f}')

	정확도 : 0.6795 / 재현률 : 1.0000 / F1-score : 0.7959
