In [None]:
import torch
from torch.utils.data import DataLoader
from torchvision.models import resnext50_32x4d
from torch import nn
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import albumentations
from tqdm.notebook import tqdm
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
df_train = pd.read_csv('../input/vinbigdata-chest-xray-abnormalities-detection/train.csv')
df_submission = pd.read_csv('../input/vinbigdata-chest-xray-abnormalities-detection/sample_submission.csv')

In [None]:
df_train["Finding"] = np.where(df_train.class_name == "No finding",0,1)
df_train = df_train[['image_id','Finding']].drop_duplicates()
df_train.head()

In [None]:
df_submission.head()

In [None]:
df_train.Finding.value_counts()

In [None]:
df_train.Finding.nunique()

In [None]:
train, val = train_test_split(df_train, test_size = 0.20, shuffle = True, stratify = df_train['Finding'])
train = train.reset_index(drop = True)
val = val.reset_index(drop = True)
train.shape, val.shape

In [None]:
import os
os.path.exists('../input/vinbigdata-chest-xray-resized-png-1024x1024/train/9a5094b2563a1ef3ff50dc5c7ff71345.png')

In [None]:
TRAIN_PATH = '../input/vinbigdata-chest-xray-resized-png-1024x1024/train/'
train_image_paths = [TRAIN_PATH + image+'.png' for image in train.image_id.values]
val_image_paths = [TRAIN_PATH + image + '.png' for image in val.image_id.values]
print(train_image_paths[:5])
len(train_image_paths), len(val_image_paths)

In [None]:
train_targets = train.Finding.values
val_targets = val.Finding.values
print(len(train_targets), len(val_targets))
train_targets[:5]

In [None]:
class VinBigDataDataset:
    def __init__(self, image_paths, targets, augmentations = None):
        self.image_paths = image_paths
        self.targets = targets
        self.augmentations = augmentations
    def __len__(self):
        return len(self.image_paths)
    def __getitem__(self, item):
        targets = self.targets[item]
        image = cv2.imread(self.image_paths[item])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.augmentations is not None:
            augmented = self.augmentations(image = image)
            image = augmented['image']
        image = np.transpose(image, (2,0,1)).astype(np.float32)
        return {
            "image" : torch.tensor(image),
            "targets" : torch.tensor(targets)
        }

In [None]:
train_aug = albumentations.Compose(
    [
        albumentations.Resize(64,64)
    ]
)

In [None]:
train_dataset = VinBigDataDataset(train_image_paths, train_targets, train_aug)
val_dataset = VinBigDataDataset(val_image_paths, val_targets, train_aug)
BATCH_SIZE = 32
train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True)
val_dataloader = DataLoader(val_dataset, batch_size = BATCH_SIZE, shuffle = True)

train_dataset[0]['image'].shape

In [None]:
def plot_images(image_dict):
    print(image_dict['targets'])
    image = image_dict['image'].permute(1,2,0) / 255
    plt.imshow(image)

In [None]:
plot_images(train_dataset[10])

In [None]:
class VinBigDataModel(nn.Module):
    def __init__(self, num_classes, pretrained = True):
        super().__init__()
        self.convnet = resnext50_32x4d(pretrained = True)
        self.convnet.fc = nn.Linear(2048, num_classes)
    
    def loss(self, outputs, classes):
        if classes is not None:
            outputs = outputs
            return CrossEntropyLoss()(outputs, classes)
    
    def forward(self, images, classes = None):
        outputs = self.convnet(images)
        if classes is not None:
            loss = self.loss(outputs, classes)
            return outputs, loss
        return outputs, None

In [None]:
model = VinBigDataModel(num_classes = df_train.Finding.nunique(), pretrained = True)

In [None]:
opt = Adam(model.parameters(), lr = 1e-3)

In [None]:
EPOCHS = 50
train_preds = []
val_preds = []
train_actuals = []
val_actuals = []
loss_list_train = []
f1_list_train = []
loss_list_val = []
f1_list_val = []
min_val_loss = np.inf
model.to(device)
for epoch in range(EPOCHS):
    model.train()
    tr_loss = 0
    tr_examples = 0
    val_loss = 0
    val_examples = 0
    for batch in train_dataloader:
        images, classes = batch['image'].to(device), batch['targets'].to(device)
        opt.zero_grad()
        outputs, loss = model(images, classes)
        loss.backward()
        opt.step()
        tr_loss += loss.item()
        tr_examples += len(batch['targets'])
        _, predicted = torch.max(outputs.cpu(), 1)
        train_preds.extend(list(predicted.numpy()))
        train_actuals.extend(list(classes.cpu().numpy()))
    f1_score_train = f1_score(train_actuals, train_preds)
    train_avg_loss = tr_loss/tr_examples
    model.eval()
    for batch in val_dataloader:
        with torch.no_grad():
            images, classes = batch['image'].to(device), batch['targets'].to(device)
            outputs, loss = model(images, classes)
            val_loss += loss.item()
            val_examples += len(batch['targets'])
            _, predicted = torch.max(outputs.cpu(),1)
            val_preds.extend(list(predicted.numpy()))
            val_actuals.extend(list(classes.cpu().numpy()))
    f1_score_val = f1_score(val_actuals, val_preds)
    val_avg_loss = val_loss/val_examples
    
    loss_list_train.append(train_avg_loss)
    loss_list_val.append(val_avg_loss)
    f1_list_train.append(f1_score_train)
    f1_list_val.append(f1_score_val)
    
    print("Epoch: {} TrainLoss: {:.3f} Val Loss: {:.3f} TrainF1: {:.3f} ValF1: {:.3f}".format(epoch, train_avg_loss, val_avg_loss, f1_score_train, f1_score_val))
    
    if val_loss < min_val_loss:
        torch.save(model.state_dict(),'checkpoint.pt')
        epochs_no_improve = 0
        min_val_loss = val_loss
    else:
        epochs_no_improve += 1
        # Check early stopping condition
        if epochs_no_improve == 5:
            print('Early stopping!' )
            model.load_state_dict(torch.load('checkpoint.pt'))
            break

In [None]:
fig, ax1 = plt.subplots()

ax2 = ax1.twinx()

p1 = ax1.plot(loss_list_train, '-r', label = "loss_train")
p2 = ax1.plot(loss_list_val, '-g',label = "loss_val")
p3 = ax2.plot(f1_list_train, label = 'f1_train')
p4 = ax2.plot(f1_list_val, label = 'f1_val')

ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax2.set_ylabel('F1-Score')

# added these three lines
lns = p1 +p2 +p3+p4
labs = [l.get_label() for l in lns]
ax1.legend(lns, labs, loc=0)

In [None]:
TEST_PATH = '../input/vinbigdata-chest-xray-resized-png-1024x1024/test/'
test_image_paths = [TEST_PATH + image +'.png' for image in df_submission.image_id.values]
test_targets = [0 for i in df_submission.image_id.values]
test_dataset = VinBigDataDataset(test_image_paths, test_targets, train_aug)
test_dataloader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = False)
test_dataset[0]['image'].shape

In [None]:
predictions = []
#Iterate over the validation data
for batch in tqdm(test_dataloader):
    #We dont want to update the gradients
    with torch.no_grad():
        images, classes = batch['image'].to(device), batch['targets'].to(device)
        #Forward pass the input data
        outputs = model(images)
        _, predicted = torch.max(outputs[0].cpu(),1)
        predictions.extend(list(predicted.numpy()))

In [None]:
df_submission['Finding'] = predictions
df_submission.to_csv('submission_temp.csv', index = False)