In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.model_selection import StratifiedKFold
from PIL import Image
from tqdm import tqdm
import torch.nn as nn
from sklearn.model_selection import train_test_split
import torch
from sklearn.metrics import roc_auc_score

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

main_path = '/kaggle/input/pneumothorax-chest-xray-images-and-masks/siim-acr-pneumothorax'
print(os.listdir(main_path))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import sys
sys.path.append('../input/pretrainedmodels/pretrainedmodels-0.7.4')
import pretrainedmodels
from pneumothorax_dataset import ClassificationDataset
from pneumothorax_model import get_model
import pneumothorax_engine as engine

import albumentations

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

epochs = 5

In [None]:
model = get_model(pretrained=True)
model = model.to(device)

In [None]:
train_data = pd.read_csv(os.path.join(main_path, 'stage_1_train_images.csv'))
test_data = pd.read_csv(os.path.join(main_path, 'stage_1_test_images.csv'))

train_data['images'] = train_data['new_filename'].apply(lambda x: os.path.join(main_path, 'png_images', x))
test_data['images'] = test_data['new_filename'].apply(lambda x: os.path.join(main_path, 'png_images', x))

images = train_data['images'].tolist()
targets = train_data['has_pneumo'].tolist()

In [None]:
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)

aug = albumentations.Compose([albumentations.Normalize(mean, std, max_pixel_value=255, always_apply=True)])

train_images, val_images, train_targets, val_targets = train_test_split(images, targets, stratify=targets, train_size=0.25)

train_dataset = ClassificationDataset(train_images, train_targets, resize=(224, 224), augmentations=aug)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4)

val_dataset = ClassificationDataset(train_images, val_targets, resize=(224, 224), augmentations=aug)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=5e-04)

for epoch in range(epochs):
    engine.train(train_loader, model, optimizer, device)
    predictions, actual = engine.evaluate(val_loader, model, device)
    
    roc_auc = roc_auc_score(actual, predictions)
    
    print(f'Epoch={epoch}, ROC score={roc_auc}')