In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import torch
import random
import numpy as np
import os

seed = 50
os.environ['PYTHONHASHSEED']=str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.enabled = False

In [3]:
device = torch.device('cuda' if torch.cuda.is_available else 'cpu')

In [7]:
data_path = '/home/baebro/nipa_ws/Pneumonia Binary Classification/chest_xray/'

train_path = data_path + 'train/'
valid_path = data_path +'val/'
test_path = data_path + 'test/'

In [8]:
from torchvision import transforms

transform_train = transforms.Compose([
    transforms.Resize((250, 250)),
    transforms.CenterCrop(180),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomVerticalFlip(0.2),
    transforms.RandomRotation(20),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

transform_test = transforms.Compose([
    transforms.Resize((250, 250)),
    transforms.CenterCrop(180),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

In [9]:
from torchvision.datasets import ImageFolder

datasets_train = ImageFolder(root=train_path, transform=transform_train)
datasets_valid = ImageFolder(root=valid_path, transform=transform_test)

In [10]:
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)

<torch._C.Generator at 0x7fb0393fdcb0>

In [11]:
from torch.utils.data import DataLoader

batch_size = 8

loader_train = DataLoader(dataset=datasets_train, batch_size=batch_size,
                         shuffle=True, worker_init_fn=seed_worker,
                         generator=g, num_workers=2)
loader_valid = DataLoader(dataset=datasets_valid, batch_size=batch_size,
                         shuffle=False, worker_init_fn=seed_worker,
                         generator=g, num_workers=2)

In [12]:
from efficientnet_pytorch import EfficientNet

model = EfficientNet.from_pretrained('efficientnet-b0', num_classes=2)

model.to(device)

Loaded pretrained weights for efficientnet-b0


EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d((0, 1, 0, 1))
  )
  (_bn0): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        32, 32, kernel_size=(3, 3), stride=[1, 1], groups=32, bias=False
        (static_padding): ZeroPad2d((1, 1, 1, 1))
      )
      (_bn1): BatchNorm2d(32, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        32, 8, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        8, 32, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2dStaticSamePadding(
        32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False
    

In [17]:
print('model parameter :', sum(param.numel() for param in model.parameters()))

model parameter : 4010110


In [18]:
import torch.nn as nn

criterion = nn.CrossEntropyLoss()

In [19]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [21]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from tqdm.notebook import tqdm

def train(model, loader_train, loader_valid, criterion, optimizer, scheduler=None, epochs=10, save_file='model_state_dict.pth'):

    valid_loss_min = np.inf

    for epoch in range(epochs):

        print(f'epoch [{epoch+1}/{epochs}]')
        model.train()
        epoch_train_loss = 0

        # mini batch train loop
        for images, labels in loader_train:
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            epoch_train_loss + loss.item()
            loss.backward()
            optimizer.step()

            if scheduler != None:
                scheduler.step()

        print(f'\ttrain loss : {epoch_train_loss/len(loader_train):4f}')

        model.eval()
        epoch_valid_loss = 0
        preds_list = []
        true_list = []

        # mini batch validation loop
        with torch.no_grad():
            for images, labels in loader_valid:
                images = images.to(device)
                labels = labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)
                epoch_valid_loss += loss.item()

                preds = torch.max(outputs.cpu(), dim=1)[1].numpy()
                true = labels.cpu().numpy()

                preds_list.extend(preds)
                true_list.extend(true)
        print(f'\tvalid loss : {epoch_valid_loss/len(loader_valid):4f}')

        val_accuracy_score = accuracy_score(true_list, preds_list)
        val_recall = recall_score(true_list, preds_list)
        val_f1_score = f1_score(true_list, preds_list)

        print(f'accuracy : {val_accuracy_score:.4f} / recall : {val_recall:.4f} / f1 score : {val_f1_score:.4f}')

        if epoch_valid_loss <= valid_loss_min:
            print(f'\t### valid loss decrease ({valid_loss_min:.4f} --> {epoch_valid_loss:.4f}). model saved')
            torch.save(model.state_dict(), save_file)
            valid_loss_min = epoch_valid_loss

    return torch.load(save_file)

In [22]:
model_state_dict = train(model=model, loader_train=loader_train, loader_valid=loader_valid,criterion=criterion, optimizer=optimizer, 
                        save_file='/home/baebro/nipa_ws/Pneumonia Binary Classification/model_state_dict.pth')

epoch [1/10]
	train loss : 0.000000
	valid loss : 0.756567
accuracy : 0.7500 / recall : 0.6250 / f1 score : 0.7143
	### valid loss decrease (inf --> 1.5131). model saved
epoch [2/10]
	train loss : 0.000000
	valid loss : 1.077170
accuracy : 0.3750 / recall : 0.6250 / f1 score : 0.5000
epoch [3/10]
	train loss : 0.000000
	valid loss : 0.985954
accuracy : 0.5625 / recall : 0.7500 / f1 score : 0.6316
epoch [4/10]
	train loss : 0.000000
	valid loss : 0.991221
accuracy : 0.5625 / recall : 0.1250 / f1 score : 0.2222
epoch [5/10]
	train loss : 0.000000
	valid loss : 0.881279
accuracy : 0.6875 / recall : 1.0000 / f1 score : 0.7619
epoch [6/10]
	train loss : 0.000000
	valid loss : 6.374924
accuracy : 0.5000 / recall : 1.0000 / f1 score : 0.6667
epoch [7/10]
	train loss : 0.000000
	valid loss : 1.539275
accuracy : 0.5625 / recall : 1.0000 / f1 score : 0.6957
epoch [8/10]
	train loss : 0.000000
	valid loss : 0.699769
accuracy : 0.6875 / recall : 0.8750 / f1 score : 0.7368
	### valid loss decrease 

  return torch.load(save_file)


In [23]:
model.load_state_dict(model_state_dict)

<All keys matched successfully>

In [24]:
datasets_test = ImageFolder(root=test_path, transform=transform_test)

loader_test = DataLoader(dataset=datasets_test, batch_size=batch_size,
                        shuffle=False, worker_init_fn=seed_worker, generator=g, num_workers=2)

In [25]:
def predict(model, loader_test, return_true=False):
    model.eval()
    preds_list = []
    true_list = []

    with torch.no_grad():
        for images, labels in loader_test:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)

            preds = torch.max(outputs.cpu(), dim=1)[1].numpy()
            true = labels.cpu().numpy()

            preds_list.extend(preds)
            true_list.extend(true)

    if return_true:
        return true_list, preds_list
    else:
        preds_list

In [26]:
true_list, preds_list = predict(model=model, loader_test=loader_test, return_true=True)

In [27]:
print('#'*5, 'final pred score', '#'*5)
print(f'accuracy score : {accuracy_score(true_list,preds_list):.4f}')
print(f'recall score : {recall_score(true_list, preds_list):.4f}')
print(f'f1 score : {f1_score(true_list, preds_list):.4f}')

##### final pred score #####
accuracy score : 0.8510
recall score : 0.9359
f1 score : 0.8870
