### Fix seed value

In [1]:
import torch 
import random
import numpy as np
import os

seed = 50
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.enabled = False

### GPU Setting

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Data preparation

In [3]:
data_path = '/kaggle/input/chest-xray-pneumonia/chest_xray/'

train_path = data_path + 'train/'
valid_path = data_path + 'val/'
test_path = data_path + 'test/'

### Image transformer for data augmentation

In [4]:
from torchvision import transforms

transform_train = transforms.Compose([
                          transforms.Resize((250, 250)),      
                          transforms.CenterCrop(180),         
                          transforms.RandomHorizontalFlip(0.5), 
                          transforms.RandomVerticalFlip(0.2),
                          transforms.RandomRotation(20),      
                          transforms.ToTensor(),              
                          transforms.Normalize((0.485, 0.456, 0.406), 
                                               (0.229, 0.224, 0.225))]) 

transform_test = transforms.Compose([
                          transforms.Resize((250, 250)),
                          transforms.CenterCrop(180),
                          transforms.ToTensor(),
                          transforms.Normalize((0.485, 0.456, 0.406), 
                                               (0.229, 0.224, 0.225))])

### Dataset loader

In [5]:
from torchvision.datasets import ImageFolder


datasets_train = ImageFolder(root=train_path, transform=transform_train)
datasets_valid = ImageFolder(root=valid_path, transform=transform_test) 

In [6]:
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)

<torch._C.Generator at 0x7fe2edcf8ef0>

In [7]:
from torch.utils.data import DataLoader

batch_size = 8

loader_train = DataLoader(dataset=datasets_train, batch_size=batch_size, 
                          shuffle=True, worker_init_fn=seed_worker,
                          generator=g, num_workers=2)
loader_valid = DataLoader(dataset=datasets_valid, batch_size=batch_size, 
                          shuffle=False, worker_init_fn=seed_worker,
                          generator=g, num_workers=2)

## Create model

In [8]:
!pip install efficientnet-pytorch==0.7.1

Collecting efficientnet-pytorch==0.7.1
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l- \ done
[?25h  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16446 sha256=05c69eef7dc1f4b38c54ea8e64d6e292806dfc47d7f29981ad3cf95d56cffd6b
  Stored in directory: /root/.cache/pip/wheels/0e/cc/b2/49e74588263573ff778da58cc99b9c6349b496636a7e165be6
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.1


In [9]:
from efficientnet_pytorch import EfficientNet
model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=2) 
model = model.to(device)

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth


  0%|          | 0.00/20.4M [00:00<?, ?B/s]

Loaded pretrained weights for efficientnet-b0


In [10]:
print('모델 파라미터 개수 :', sum(param.numel() for param in model.parameters()))

모델 파라미터 개수 : 4010110


## Model train and evaluation

### loss function and optimizer

In [11]:
import torch.nn as nn
criterion = nn.CrossEntropyLoss()

In [12]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

### Train function

In [13]:
from sklearn.metrics import accuracy_score 
from sklearn.metrics import recall_score   
from sklearn.metrics import f1_score       
from tqdm.notebook import tqdm             

def train(model, loader_train, loader_valid, criterion, optimizer, 
          scheduler=None, epochs=10, save_file='model_state_dict.pth'):
    
    valid_loss_min = np.inf 

    for epoch in range(epochs):
        print(f'에폭 [{epoch+1}/{epochs}] \n-----------------------------')

        model.train()      
        epoch_train_loss = 0 
 
        for images, labels in tqdm(loader_train):
          
            images = images.to(device)
            labels = labels.to(device)
            
            
            optimizer.zero_grad()
           
            outputs = model(images)
          
            loss = criterion(outputs, labels)
           
            epoch_train_loss += loss.item() 
            loss.backward()      
            optimizer.step()      
            if scheduler != None:  
                scheduler.step() 
        
        print(f'\t훈련 데이터 손실값 : {epoch_train_loss/len(loader_train):.4f}')
        
        model.eval()         
        epoch_valid_loss = 0 
        preds_list = []      
        true_list = []       
        
        with torch.no_grad(): 
            for images, labels in loader_valid:
                images = images.to(device)
                labels = labels.to(device)
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                epoch_valid_loss += loss.item()
                
               
                preds = torch.max(outputs.cpu(), dim=1)[1].numpy() 
                true = labels.cpu().numpy() 
    
                preds_list.extend(preds)
                true_list.extend(true)
                
        val_accuracy = accuracy_score(true_list, preds_list)
        val_recall = recall_score(true_list, preds_list)
        val_f1_score = f1_score(true_list, preds_list)

        print(f'\t검증 데이터 손실값 : {epoch_valid_loss/len(loader_valid):.4f}')
        print(f'\t정확도 : {val_accuracy:.4f} / 재현율 : {val_recall:.4f} / F1 점수 : {val_f1_score:.4f}')

        if epoch_valid_loss <= valid_loss_min: 
            print(f'\t### 검증 데이터 손실값 감소 ({valid_loss_min:.4f} --> {epoch_valid_loss:.4f}). 모델 저장')
            torch.save(model.state_dict(), save_file) 
            valid_loss_min = epoch_valid_loss 
    return torch.load(save_file)

In [14]:
import torch 

sample = torch.rand(4, 2)
sample

tensor([[0.7347, 0.5148],
        [0.5171, 0.8200],
        [0.3602, 0.4330],
        [0.6107, 0.7663]])

In [15]:
torch.max(sample, dim=1)

torch.return_types.max(
values=tensor([0.7347, 0.8200, 0.4330, 0.7663]),
indices=tensor([0, 1, 1, 1]))

In [16]:
torch.max(sample, dim=1)[1]

tensor([0, 1, 1, 1])

### Train and evaluation

In [17]:
model_state_dict = train(model=model,
                         loader_train=loader_train, 
                         loader_valid=loader_valid,
                         criterion=criterion, 
                         optimizer=optimizer)

에폭 [1/10] 
-----------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.4840
	검증 데이터 손실값 : 0.8669
	정확도 : 0.5000 / 재현율 : 0.7500 / F1 점수 : 0.6000
	### 검증 데이터 손실값 감소 (inf --> 1.7337). 모델 저장
에폭 [2/10] 
-----------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.2799
	검증 데이터 손실값 : 1.1663
	정확도 : 0.5000 / 재현율 : 0.8750 / F1 점수 : 0.6364
에폭 [3/10] 
-----------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.2409
	검증 데이터 손실값 : 2.0441
	정확도 : 0.5625 / 재현율 : 1.0000 / F1 점수 : 0.6957
에폭 [4/10] 
-----------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.2278
	검증 데이터 손실값 : 0.6337
	정확도 : 0.6875 / 재현율 : 0.8750 / F1 점수 : 0.7368
	### 검증 데이터 손실값 감소 (1.7337 --> 1.2673). 모델 저장
에폭 [5/10] 
-----------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.2221
	검증 데이터 손실값 : 12.3483
	정확도 : 0.5000 / 재현율 : 1.0000 / F1 점수 : 0.6667
에폭 [6/10] 
-----------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.2098
	검증 데이터 손실값 : 3.4247
	정확도 : 0.5625 / 재현율 : 1.0000 / F1 점수 : 0.6957
에폭 [7/10] 
-----------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.2019
	검증 데이터 손실값 : 1.1218
	정확도 : 0.5625 / 재현율 : 1.0000 / F1 점수 : 0.6957
에폭 [8/10] 
-----------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.1748
	검증 데이터 손실값 : 3.5465
	정확도 : 0.6250 / 재현율 : 1.0000 / F1 점수 : 0.7273
에폭 [9/10] 
-----------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.1818
	검증 데이터 손실값 : 0.3808
	정확도 : 0.8750 / 재현율 : 1.0000 / F1 점수 : 0.8889
	### 검증 데이터 손실값 감소 (1.2673 --> 0.7617). 모델 저장
에폭 [10/10] 
-----------------------------


  0%|          | 0/652 [00:00<?, ?it/s]

	훈련 데이터 손실값 : 0.1657
	검증 데이터 손실값 : 0.5013
	정확도 : 0.6250 / 재현율 : 0.7500 / F1 점수 : 0.6667


In [18]:
model.load_state_dict(model_state_dict)

<All keys matched successfully>

## Prediction and evaluation result

In [19]:
datasets_test = ImageFolder(root=test_path, transform=transform_test)

loader_test = DataLoader(dataset=datasets_test, batch_size=batch_size, 
                         shuffle=False, worker_init_fn=seed_worker,
                         generator=g, num_workers=2)

### Prediction

In [20]:
def predict(model, loader_test, return_true=False):
    model.eval()    
    preds_list = [] 
    true_list = []  

    with torch.no_grad(): 
        for images, labels in loader_test:
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            
            preds = torch.max(outputs.cpu(), dim=1)[1].numpy() # 예측값
            true = labels.cpu().numpy() # 실제값 

            preds_list.extend(preds)
            true_list.extend(true)

    if return_true:
        return true_list, preds_list
    else:
        return preds_list

In [21]:
true_list, preds_list = predict(model=model, 
                                loader_test=loader_test, 
                                return_true=True)

### Prediction result

In [22]:
print('#'*5, '최종 예측 결과 평가 점수', '#'*5)
print(f'정확도 : {accuracy_score(true_list, preds_list):.4f}')
print(f'재현율 : {recall_score(true_list, preds_list):.4f}')
print(f'F1 점수 : {f1_score(true_list, preds_list):.4f}')

##### 최종 예측 결과 평가 점수 #####
정확도 : 0.8734
재현율 : 0.9385
F1 점수 : 0.9026
