In [None]:
import os
import ast
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import random


### Before starting there are some important terms

- ETT Abnormal (endotracheal tube placement abnormal)
- ETT Borderline (endotracheal tube placement borderline abnormal)
- ETT Normal (endotracheal tube placement normal)
- NGT Abnormal (nasogastric tube placement abnormal)
- NGT Borderline (nasogastric tube placement borderline abnormal)
- NGT Incompletely Imaged (nasogastric tube placement inconclusive due to imaging)
- NGT Normal (nasogastric tube placement borderline normal)
- CVC Abnormal (central venous catheter placement abnormal)
- CVC Borderline (central venous catheter placement borderline abnormal)
- CVC Normal (central venous catheter placement normal)
- Swan Ganz Catheter Present

In [None]:
BASE_DIR = "../input/ranzcr-clip-catheter-line-classification"

In [None]:
train = pd.read_csv(f"{BASE_DIR}/train.csv", index_col=0)
train_annotations = pd.read_csv(f"{BASE_DIR}/train_annotations.csv")

In [None]:
train.shape

In [None]:
train.head()

In [None]:
train.iloc[:, :-1].sum()

In [None]:
def display_image(img_ids):
    plt.figure(figsize=(14, 10))
    
    for index, img_id in enumerate(img_ids):
        plt.subplot(3, 4, index + 1)
        img = cv2.imread(os.path.join(BASE_DIR, "train", f"{img_id}.jpg"))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        plt.imshow(img)
        plt.axis("off")
    
    plt.show()
    

def disp_describe(df, col):
    print("Distribution:")
    print(df[col].value_counts())
    print()
    print(f"Percent of 1: {df[col].mean():.4f}")
    

def disp_image_with_annotate(df, row_ind):
    row = df.iloc[row_ind]
    img_path = os.path.join(BASE_DIR, "train", row["StudyInstanceUID"] + ".jpg")
    label = row["label"]
    data = np.array(ast.literal_eval(row["data"]))
    
    plt.figure(figsize=(10, 5))
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.subplot(1, 2, 1)
    plt.imshow(img)
    plt.subplot(1, 2, 2)
    plt.imshow(img)
    plt.scatter(data[:, 0], data[:, 1])
    
    plt.suptitle(label, fontsize=14)

In [None]:
train_annotations.shape

In [None]:
train_annotations.head()

In [None]:
disp_image_with_annotate(train_annotations, 10)

In [None]:
for i in range(10):
    disp_image_with_annotate(train_annotations, random.randint(0, 1500))

In [None]:
train.columns

### ETT Abnormal

In [None]:
col_name = "ETT - Abnormal"
disp_describe(train, col_name)
tmp_df = train[train[col_name] == 1]
display_image(random.sample(tmp_df.index.tolist(), 12))

### ETT Borderline

In [None]:
col_name = "ETT - Borderline"
disp_describe(train, col_name)
tmp_df = train[train[col_name] == 1]
display_image(random.sample(tmp_df.index.tolist(), 12))

### ETT Normal

In [None]:
col_name = "ETT - Normal"
disp_describe(train, col_name)
tmp_df = train[train[col_name] == 1]
display_image(random.sample(tmp_df.index.tolist(), 12))

### NGT Abnormal

In [None]:
col_name = "NGT - Abnormal"
disp_describe(train, col_name)
tmp_df = train[train[col_name] == 1]
display_image(random.sample(tmp_df.index.tolist(), 12))

### NGT Borderline

In [None]:
col_name = "NGT - Borderline"
disp_describe(train, col_name)
tmp_df = train[train[col_name] == 1]
display_image(random.sample(tmp_df.index.tolist(), 12))

### NGT Incompletely Imaged

In [None]:
col_name = "NGT - Incompletely Imaged"
disp_describe(train, col_name)
tmp_df = train[train[col_name] == 1]
display_image(random.sample(tmp_df.index.tolist(), 12))

### NGT Normal

In [None]:
col_name = "NGT - Normal"
disp_describe(train, col_name)
tmp_df = train[train[col_name] == 1]
display_image(random.sample(tmp_df.index.tolist(), 12))

### CVC Abnormal

In [None]:
col_name = "CVC - Abnormal"
disp_describe(train, col_name)
tmp_df = train[train[col_name] == 1]
display_image(random.sample(tmp_df.index.tolist(), 12))

### CVC Borderline

In [None]:
col_name = "CVC - Borderline"
disp_describe(train, col_name)
tmp_df = train[train[col_name] == 1]
display_image(random.sample(tmp_df.index.tolist(), 12))

### CVC Normal

In [None]:
col_name = "CVC - Normal"
disp_describe(train, col_name)
tmp_df = train[train[col_name] == 1]
display_image(random.sample(tmp_df.index.tolist(), 12))

### Swan Ganz Catheter Present

In [None]:
col_name = "Swan Ganz Catheter Present"
disp_describe(train, col_name)
tmp_df = train[train[col_name] == 1]
display_image(random.sample(tmp_df.index.tolist(), 12))