In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

import numpy as np
import pandas as pd
import os
import time
import math
import timm

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score
from PIL import Image

np.random.seed(42)
torch.manual_seed(42)

In [None]:
TARGET_COLUMNS = ['ETT - Abnormal', 'ETT - Borderline', 'ETT - Normal',
                 'NGT - Abnormal', 'NGT - Borderline', 'NGT - Incompletely Imaged', 'NGT - Normal', 
                 'CVC - Abnormal', 'CVC - Borderline', 'CVC - Normal',
                 'Swan Ganz Catheter Present']

DEBUG = False

if DEBUG is False:
    BATCH_SIZE = 3
    EPOCHS = 3
    AVERAGING_SIZE = 100
else:
    BATCH_SIZE = 4
    EPOCHS = 2
    AVERAGING_SIZE = 20

ROOT_DIR = '/kaggle/input/ranzcr-clip-catheter-line-classification/test'
OUTPUT_DIR = './'
MODEL_PATH = '../input/efficientnet-b3-epoch-4-loss-01446-roc-09160pth/efficientnet_b3_epoch_4_loss_0.1446_roc_0.9160.pth'
MODEL_NAME = 'efficientnet_b3'
IMG_SIZE = 672*1.5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

# Load and configure data 

### Load DataFrame with labels of images

In [None]:
imgs = []
for dirname, _, filenames in os.walk('../input/ranzcr-clip-catheter-line-classification/test'):
    for filename in filenames:
        imgs.append(filename)

# Load DF with labels 
cols = {"StudyInstanceUID": imgs}
for col in TARGET_COLUMNS:
    cols[col] = [0 for i in range(len(imgs))]
test_set_df = pd.DataFrame(cols)
print(test_set_df.columns)

if DEBUG is True:
    test_set_df = test_set_df.sample(200)
else:
    test_set_df = test_set_df


test_set_df.shape

### Create custom PyTorch dataset

We do this instead of using ImageFolder as we don't want to reorganize the input folder as it is given from Kaggle already loaded without any nesting, and we have more than one class for each image so we need custom dataset

In [None]:
class RanzcrClipTestDataset(torch.utils.data.Dataset):
    """Face Landmarks dataset."""

    def __init__(self, labels_df, transform=None):
        """
        Args:
            labels_df (string): DataFrame with mapping of images to target
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.file_paths = [os.path.join(ROOT_DIR, uid) for uid in labels_df["StudyInstanceUID"].values]
        self.transform = transform

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):

        # Read image as PIL
        sample = Image.open(self.file_paths[idx]).convert('RGB')

        # Run all given transformations on image
        if self.transform:
            sample = self.transform(sample)

        return sample

### Transforms

In [None]:
test_transforms = transforms.Compose([transforms.Resize((IMG_SIZE, IMG_SIZE)),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

### Split test dataset into train and test

In [None]:
test_set = RanzcrClipTestDataset(labels_df=test_set_df, transform=test_transforms)

print(f'Test size: {len(test_set)}')

test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE*2, shuffle=False, num_workers=4, pin_memory=True, drop_last=False)

## Visualize some images

In [None]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


# Get a batch of training data
inputs = next(iter(test_loader))

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out)


# Training Code

In [None]:
def inference(net):
    
    y_pred = []
    y_prob = []
    
    # switch to evaluation mode
    net.eval()
    
    start_time = time.time()
    
    with torch.no_grad():
        for i, inputs in enumerate(test_loader, 0):

            inputs = inputs.to(device)
      
            outputs = net(inputs)
            
            probs = outputs.sigmoid()
            
            for i in range(len(outputs)):
                y_pred.append(np.round(probs[i].cpu().detach().numpy()))
                y_prob.append(probs[i].cpu().detach().numpy())

        y_pred = np.vstack(y_pred)
        y_prob = np.vstack(y_prob)
        
        del inputs
        torch.cuda.empty_cache()
        
        end_time = time.time()
        print(f'[{i}] Elapsed {(end_time - start_time):.4f} ') 
        
        return y_pred, y_prob


In [None]:
class CustomPretrainedmModel(nn.Module):
    def __init__(self, model_name=MODEL_NAME, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, len(TARGET_COLUMNS))

    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
checkpoint = torch.load(MODEL_PATH, map_location=device)
model = CustomPretrainedmModel()
model.to(device)
model.load_state_dict(checkpoint['model'])
y_pred, y_prob = inference(model)

In [None]:
test_set_df.iloc[:, 1:] = y_prob

test_set_df["StudyInstanceUID"] = test_set_df.StudyInstanceUID.str.replace('.jpg', '')

In [None]:
# dtypes = {col: 'int' for col in TARGET_COLUMNS}
# test_set_df = test_set_df.astype(dtypes)

In [None]:
test_set_df.sample(20)

In [None]:
test_set_df.to_csv('./submission.csv', index=False)