__Competition Goal__

Detect the presence and position of catheters and lines on chest x-rays

__Competition Metric__

Submissions are evaluated on __area under the ROC curve__ between the predicted probability and the observed target.

__Competition Rules__

- CPU Notebook <= 9 hours run-time
- GPU Notebook <= 9 hours run-time
- TPUs will not be available for making submissions to this competition. You are still welcome to use them for training models. For a walk-through on how to train on TPUs and run inference/submit on GPUs, see our TPU Docs.
- No internet access enabled on submission
- External data, freely & publicly available, is allowed. This includes pre-trained models.
- Submission file must be named submission.csv

__Target Labels__

- ETT - Abnormal - endotracheal tube placement abnormal
- ETT - Borderline - endotracheal tube placement borderline abnormal
- ETT - Normal - endotracheal tube placement normal
- NGT - Abnormal - nasogastric tube placement abnormal
- NGT - Borderline - nasogastric tube placement borderline abnormal
- NGT - Incompletely Imaged - nasogastric tube placement inconclusive due to imaging
- NGT - Normal - nasogastric tube placement borderline normal
- CVC - Abnormal - central venous catheter placement abnormal
- CVC - Borderline - central venous catheter placement borderline abnormal
- CVC - Normal - central venous catheter placement normal
- Swan Ganz Catheter Present

In [None]:
%matplotlib inline

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os, json, re, math

from tqdm import tqdm
from glob import glob
import gc

import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display

plt.rcParams["figure.figsize"] = (12,8)
plt.rcParams['axes.titlesize'] = 16

from kaggle_datasets import KaggleDatasets

from time import time, strftime, gmtime
start = time()
import datetime
print(str(datetime.datetime.now()))

In [None]:
print(os.listdir('/kaggle/input/'))
print(os.listdir('/kaggle/input/ranzcr-clip-catheter-line-classification/'))

In [None]:
base_dir = '/kaggle/input/ranzcr-clip-catheter-line-classification/'

In [None]:
train = pd.read_csv(base_dir + 'train.csv')
train

In [None]:
targets = ['ETT - Abnormal', 'ETT - Borderline',
       'ETT - Normal', 'NGT - Abnormal', 'NGT - Borderline',
       'NGT - Incompletely Imaged', 'NGT - Normal', 'CVC - Abnormal',
       'CVC - Borderline', 'CVC - Normal', 'Swan Ganz Catheter Present',
       ]

In [None]:
print('Number of unique PatientID: ', train['PatientID'].nunique())

In [None]:
print('Number of unique Images in Train: ', train['StudyInstanceUID'].nunique())
print('Number of unique Images in Test: ', len(os.listdir(base_dir + 'test')))

__Labels Count__

In [None]:
train[targets].sum()

In [None]:
ax = sns.countplot(x = "variable", hue = "value", data = pd.melt(train[targets]))
ax.grid()
ax.set_xticklabels(targets, rotation = 90)

__Visualization__

In [None]:
def display_images(UID, lbl):
    files = np.random.choice(UID, 12)

    plt.figure(figsize = (16, 12))

    for i, im in enumerate(files):
        plt.subplot(3, 4, i + 1)
        img = cv2.imread(base_dir + 'train/' + im + '.jpg')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (512, 512))
        plt.imshow(img)
        plt.title(f'{lbl}', fontsize = 10)
    plt.suptitle(f'Train Images: {lbl}', fontsize = 16)
    plt.show()

In [None]:
for lbl in targets:
    temp = train['StudyInstanceUID'].loc[train[lbl] == 1]
    display_images(temp.values, lbl)

__Visualization with annotations__

In [None]:
train_annot = pd.read_csv(base_dir + 'train_annotations.csv')
train_annot

In [None]:
import ast

def display_images_annot(temp, lbl):
    files = np.random.choice(temp['StudyInstanceUID'], 3)

    plt.figure(figsize = (12, 8))

    for i, uid in enumerate(files):
        plt.subplot(1, 3, i + 1)
        img = cv2.imread(base_dir + 'train/' + uid + '.jpg')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.imshow(img)
        
        annot = train_annot[train_annot['StudyInstanceUID'] == uid]['data'].values[0]
        annot = np.array(ast.literal_eval(annot))
        plt.scatter(annot[:, 0], annot[:, 1])
        plt.title(f'{lbl}', fontsize = 10)
    plt.suptitle(f'{lbl} with Annotations', fontsize = 16, y = 0.75)
    plt.show()

In [None]:
for lbl in targets:
    temp = train_annot[train_annot['label'] == lbl]
    display_images_annot(temp, lbl)

__Images with more than one labels__

In [None]:
train_more = train[train[targets].sum(axis = 1) > 1]
train_no = train[train[targets].sum(axis = 1) == 0]
print('Number of Images with more than one labels: ', len(train_more))
print('Number of Images with NO labels: ', len(train_no))

In [None]:
#print(train_more[targets].loc[5].eq(1).astype(int).sum())

In [None]:
def display_images_more(df):
    idx = np.random.choice(df.index, 12)
    
    
    plt.figure(figsize = (16, 12))

    for i, ind in enumerate(idx):
        plt.subplot(3, 4, i + 1)
        im = train_more['StudyInstanceUID'].loc[ind]
        img = cv2.imread(base_dir + 'train/' + im + '.jpg')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (512, 512))
        plt.imshow(img)
        lbl_num = df[targets].loc[5].eq(1).astype(int).sum()
        plt.title(f'Number of Labels: {lbl_num}', fontsize = 10)
    plt.suptitle(f'Train Images with more than one label', fontsize = 16)
    plt.show()

In [None]:
display_images_more(train_more)

__Train Images with no Labels__

In [None]:
def display_images_no(df):
    idx = np.random.choice(df.index, 12)
    
    
    plt.figure(figsize = (16, 12))

    for i, ind in enumerate(idx):
        plt.subplot(3, 4, i + 1)
        im = train_no['StudyInstanceUID'].loc[ind]
        img = cv2.imread(base_dir + 'train/' + im + '.jpg')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (512, 512))
        plt.imshow(img)
        #plt.title(f'Number of Labels: {lbl_num}', fontsize = 10)
    plt.suptitle(f'Train Images with NO labels', fontsize = 16)
    plt.show()
    print(train_no['StudyInstanceUID'].values)

In [None]:
display_images_no(train_no)

In [None]:
finish = time()
print(strftime("%H:%M:%S", gmtime(finish - start)))