In [None]:
import os
import ast

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as ex
from matplotlib_venn import venn2,venn3


import torch 
import torch.nn as nn
import cv2

<h2 style="color:purple"> 
all the columns details
</h2>

StudyInstanceUID - unique ID for each image

ETT - Abnormal - endotracheal tube placement abnormal

ETT - Borderline - endotracheal tube placement borderline abnormal

ETT - Normal - endotracheal tube placement normal

NGT - Abnormal - nasogastric tube placement abnormal

NGT - Borderline - nasogastric tube placement borderline abnormal

NGT - Incompletely Imaged - nasogastric tube placement inconclusive due to imaging

NGT - Normal - nasogastric tube placement borderline normal

CVC - Abnormal - central venous catheter placement abnormal

CVC - Borderline - central venous catheter placement borderline abnormal

CVC - Normal - central venous catheter placement normal

Swan Ganz Catheter Present

PatientID - unique ID for each patient in the dataset

In [None]:
DIR = '../input/ranzcr-clip-catheter-line-classification'
os.listdir(DIR)

In [None]:
df_train = pd.read_csv(os.path.join(DIR,'train.csv'))
df_train.head()

In [None]:
df_plot1 = df_train.iloc[:, 1:-1]

In [None]:
df_plot1.dtypes

In [None]:
for col in df_plot1.columns:
    if df_plot1[col].dtype== int :
        df_plot1.hist(col,color= '#008294')

In [None]:
plt.figure(figsize=(8, 8))
df_plot2 = df_train.iloc[:, 1:-1].sum()
sns.barplot(x=df_plot2.values, y=df_plot2.index,color='#008294')
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel("Number of images", fontsize=15)
plt.title("Distribution of labels", fontsize=16);

In [None]:
df_plot2 = pd.DataFrame(df_plot2)

In [None]:
# the data represents the coordinates of the pipe in the body
df_annot = pd.read_csv(os.path.join(DIR,'train_annotations.csv'))
df_annot.head()

In [None]:
def label_plotting(name):
    # we take first occurence of every label and not random numbers 
    row = df_annot.loc[df_annot['label']== name,:].iloc[0,:]
    image_path = os.path.join(DIR, "train", row["StudyInstanceUID"] + ".jpg")
    label = row["label"]
    data = np.array(ast.literal_eval(row["data"]))
    
    plt.figure(figsize=(10, 5))
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.subplot(1, 2, 2)
    plt.imshow(image)
    plt.scatter(data[:, 0], data[:, 1])
    
    plt.suptitle(label, fontsize=15)

In [None]:
label_plotting('CVC - Borderline')

In [None]:
label_plotting('NGT - Borderline')

In [None]:
label_plotting('CVC - Abnormal')

In [None]:
label_plotting('NGT - Abnormal')

In [None]:
label_plotting('ETT - Abnormal')

In [None]:
label_plotting('CVC - Normal')

In [None]:
label_plotting('NGT - Normal')

In [None]:
label_plotting('ETT - Borderline')

In [None]:
label_plotting('ETT - Normal')

In [None]:
# lets see the commonness between the columns in the data 

def venn2_drawer(col1,col2):
    venn2(subsets=
          #area1
          (df_train[col1].sum(),
           #area2
           df_train[col2].sum(),
           #common area
           df_train[(df_train[col2]==1)&(df_train[col1]==1)].shape[0]),
          set_colors='bg',normalize_to=1,alpha=0.3,
          set_labels=(col1,col2))

In [None]:
venn2_drawer('ETT - Abnormal','NGT - Abnormal')

In [None]:
def plot_venn3(col_1, col_2, col_3):
    plt.figure(figsize=(6, 6))
    
    area_100 = df_train[col_1].sum()
    area_010 = df_train[col_2].sum()
    area_110 = df_train[(df_train[col_1] == 1) & (df_train[col_2] == 1)].shape[0]
    area_001 = df_train[col_3].sum()
    area_101 = df_train[(df_train[col_1] == 1) & (df_train[col_3] == 1)].shape[0]
    area_011 = df_train[(df_train[col_2] == 1) & (df_train[col_3] == 1)].shape[0]
    area_111 = df_train[(df_train[col_1] == 1) & (df_train[col_2] == 1) & (df_train[col_3] == 1)].shape[0]

#     print(area_100, area_010, area_110, area_001, area_101, area_011, area_111)

    venn3(
        subsets=(area_100, area_010, area_110, area_001, area_101, area_011, area_111), 
        set_labels=(col_1, col_2, col_3), set_colors='ybg',
        alpha=0.3
    );


In [None]:
plot_venn3(
    "ETT - Abnormal",
    "NGT - Abnormal",
    "CVC - Abnormal",
)

In [None]:
plot_venn3(
    "ETT - Normal",
    "NGT - Normal",
    "CVC - Normal",
)

In [None]:
plot_venn3(
    "ETT - Borderline",
    "NGT - Borderline",
    "CVC - Borderline",
)

In [None]:
df_annot