In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
'''for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
'''
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# What files do I have?

- train.csv - contains image IDs, binary labels, and patient IDs.
- sample_submission.csv - a sample submission file in the correct format
- test - test images
- train - training images


# What do the columns mean?


- StudyInstanceUID - unique ID for each image
- ETT - Abnormal - endotracheal tube placement abnormal
- ETT - Borderline - endotracheal tube placement borderline abnormal
- ETT - Normal - endotracheal tube placement normal
- NGT - Abnormal - nasogastric tube placement abnormal
- NGT - Borderline - nasogastric tube placement borderline abnormal
- NGT - Incompletely Imaged - nasogastric tube placement inconclusive due to imaging
- NGT - Normal - nasogastric tube placement borderline normal
- CVC - Abnormal - central venous catheter placement abnormal
- CVC - Borderline - central venous catheter placement borderline abnormal
- CVC - Normal - central venous catheter placement normal
- Swan Ganz Catheter Present
- PatientID - unique ID for each patient in the dataset


# Some domain knowledge

- Endotrachial Tube: An endotracheal tube is a flexible plastic tube that is placed **through the mouth** into the trachea (windpipe) to help a patient breathe. The endotracheal tube is then connected to a ventilator, which delivers oxygen to the lungs

- Nasogastric Tube: A nasogastric (NG) tube is a flexible tube of rubber or plastic that is passed **through the nose**, down through the esophagus, and into the stomach.

- A central venous catheter is a thin, flexible tube that is **inserted into a vein**, usually below the right collarbone, and guided (threaded) into a large vein above the right side of the heart called the superior vena cava.

- Swanz Ganz Catheter: Swan-Ganz catheterization is the passing of a thin tube (catheter) **into the right side of the heart and the arteries leading to the lungs**. It is done to monitor the heart's function and blood flow and pressures in and around the heart. This test is most often done in people who are very ill.


# Normal-Borderline-Abnormal

- Normal: Catheter placement was proper
- Borderline: Catheter placement needed repositioning but worked normally
- Abnormal: Completely abnormal

In [None]:
import pandas as pd

train = pd.read_csv("../input/ranzcr-clip-catheter-line-classification/train.csv")
train.head()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
def plot_categorical(df,cat_names):
    fig,ax = plt.subplots(3,4,figsize=(12,8))
    for ii,feature in enumerate(cat_names):
        sns.countplot(data=df,x=feature,ax=ax[ii//4][ii%4])
    
    plt.tight_layout()
    plt.show()

In [None]:
train.columns

In [None]:
plot_categorical(train,["ETT - Abnormal","ETT - Borderline","ETT - Normal","NGT - Abnormal","NGT - Borderline",
                        "NGT - Incompletely Imaged","NGT - Normal","CVC - Abnormal","CVC - Borderline","CVC - Normal",
                       "Swan Ganz Catheter Present"])

We can see that there is a heavy class imbalance in almost all the features

Now let's plot some X-Rays and see what data we have

In [None]:
import cv2
import matplotlib.image as mpimg
def read_n_plot_image(filenames,rows,cols,figsize=(10,10)):
    fig,ax = plt.subplots(rows,cols,figsize=figsize)

    for ii,filename in enumerate(filenames):
        filename = "../input/ranzcr-clip-catheter-line-classification/train/"+filename+".jpg"
        img = mpimg.imread(filename)
        
        ax[ii//cols][ii%cols].imshow(np.array(img),cmap="gray")

    plt.tight_layout()


In [None]:
var_df = train["StudyInstanceUID"].values[:12]
read_n_plot_image(var_df,4,3,(15,15))

It can be clearly seen in just the first 12 images that there exist images in which the catheters cannot be seen due to a haze or a foggy effect in the image. This usually happens due to improper illumination. There are several techniques to solve this problems, some of which are implemented below.

In [None]:
import cv2
def sharpen_image(image):
    #image = cv2.imread(img_name)
    sharpen_kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
    sharpen = cv2.filter2D(image, -1, sharpen_kernel)
    return sharpen



# A. Histogram Equalization

In this method we transform the image such that the distribution of the intensity values becomes more uniform throughout the image. As a result, the foggy effect produced due to high number of pixels in the white range, does not overpower the image after the transformation and gives us a clear view of the catheters in this case. Wikipedia gives a great expalanation of the same: https://en.wikipedia.org/wiki/Histogram_equalization

# B. Contrast Limited Adaptive Histogram Equalization (CLAHE)

If histogram equalization is done globally, some areas which have a higher contrast than others may get unnecessarily dark. Hence CLAHE is a method which applies histogram equalization to small windows instead thus is a form of local histogram equalization. Due to this areas are equalized aptly. You can read about it more in OpenCV's documentation: https://docs.opencv.org/master/d5/daf/tutorial_py_histogram_equalization.html

In [None]:
def dehaze_img(img_name):
    '''Applies histogram equalization and CLAHE to an image
    
    
    Input: img_name (str): Name of the image
    Output: dst1 (np.ndarray): CLAHE applied image
            dst2 (np.ndarray): Global HE applied image
    '''
    image = cv2.imread(img_name)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=15.0, tileGridSize=(10,10))
    dst1 = clahe.apply(image)
    dst2 = cv2.equalizeHist(image)
    return dst1,dst2




def dehaze_df(filenames,rows):
    '''Applies the above two algorithms to a series
    
        Input: filenames(pd.Series): Series consisting of study instance ids
               rows (int): Number of rows for plotting
        
        Output: Plots of each of the filenames
               
    '''
    fig,ax = plt.subplots(rows,3,figsize=(30,30))
    for ii,img_name in enumerate(filenames):
        img_name =  "../input/ranzcr-clip-catheter-line-classification/train/"+img_name+".jpg"
        ax[ii][0].imshow(cv2.imread(img_name))
        
        i1,i2 = dehaze_img(img_name)
        ax[ii][1].imshow(i1,cmap="gray")
        

        ax[ii][2].imshow(i2,cmap="gray")
        
        
        if ii==0:
            ax[ii][0].set_title("Original")
            ax[ii][1].set_title("Dehazed CLAHE")
            ax[ii][2].set_title("Dehazed Global")
    plt.tight_layout()
    plt.show()
filenames = train["StudyInstanceUID"][:5]
dehaze_df(filenames,5)

As we can see Histogram equalization makes the catheters much more clear and probably will for the networks too

# Retinex Based Methods

- In these methods, the image is considered to be composed of two different components, a reflection component and an illumination component.
- The hazy effect is a part of the illumination component. Hence if we get rid of this component, we have practivally removed the fog.
- More about it here: http://html.rhhz.net/ieee-jas/html/2017-3-410.htm
- The below code is an implementation of Retinex based image restoration. The credit for the code goes to Masato Tamura. You can check out his repo here: https://github.com/dongb5/Retinex


In [None]:
import numpy as np
import cv2

def singleScaleRetinex(img, sigma):

    retinex = np.log10(img) - np.log10(cv2.GaussianBlur(img, (0, 0), sigma))

    return retinex

def multiScaleRetinex(img, sigma_list):

    retinex = np.zeros_like(img)
    for sigma in sigma_list:
        retinex += singleScaleRetinex(img, sigma)

    retinex = retinex / len(sigma_list)

    return retinex

def colorRestoration(img, alpha, beta):

    img_sum = np.sum(img, axis=2, keepdims=True)

    color_restoration = beta * (np.log10(alpha * img) - np.log10(img_sum))

    return color_restoration

def simplestColorBalance(img, low_clip, high_clip):    

    total = img.shape[0] * img.shape[1]
    for i in range(img.shape[2]):
        unique, counts = np.unique(img[:, :, i], return_counts=True)
        current = 0
        for u, c in zip(unique, counts):            
            if float(current) / total < low_clip:
                low_val = u
            if float(current) / total < high_clip:
                high_val = u
            current += c
                
        img[:, :, i] = np.maximum(np.minimum(img[:, :, i], high_val), low_val)

    return img    

def MSRCR(img, sigma_list, G, b, alpha, beta, low_clip, high_clip):

    img = np.float64(img) + 1.0

    img_retinex = multiScaleRetinex(img, sigma_list)    
    img_color = colorRestoration(img, alpha, beta)    
    img_msrcr = G * (img_retinex * img_color + b)

    for i in range(img_msrcr.shape[2]):
        img_msrcr[:, :, i] = (img_msrcr[:, :, i] - np.min(img_msrcr[:, :, i])) / \
                             (np.max(img_msrcr[:, :, i]) - np.min(img_msrcr[:, :, i])) * \
                             255
    
    img_msrcr = np.uint8(np.minimum(np.maximum(img_msrcr, 0), 255))
    img_msrcr = simplestColorBalance(img_msrcr, low_clip, high_clip)       

    return img_msrcr

def automatedMSRCR(img, sigma_list):

    img = np.float64(img) + 1.0

    img_retinex = multiScaleRetinex(img, sigma_list)

    for i in range(img_retinex.shape[2]):
        unique, count = np.unique(np.int32(img_retinex[:, :, i] * 100), return_counts=True)
        for u, c in zip(unique, count):
            if u == 0:
                zero_count = c
                break
            
        low_val = unique[0] / 100.0
        high_val = unique[-1] / 100.0
        for u, c in zip(unique, count):
            if u < 0 and c < zero_count * 0.1:
                low_val = u / 100.0
            if u > 0 and c < zero_count * 0.1:
                high_val = u / 100.0
                break
            
        img_retinex[:, :, i] = np.maximum(np.minimum(img_retinex[:, :, i], high_val), low_val)
        
        img_retinex[:, :, i] = (img_retinex[:, :, i] - np.min(img_retinex[:, :, i])) / \
                               (np.max(img_retinex[:, :, i]) - np.min(img_retinex[:, :, i])) \
                               * 255

    img_retinex = np.uint8(img_retinex)
        
    return img_retinex

def MSRCP(img, sigma_list, low_clip, high_clip):

    img = np.float64(img) + 1.0

    intensity = np.sum(img, axis=2) / img.shape[2]    

    retinex = multiScaleRetinex(intensity, sigma_list)

    intensity = np.expand_dims(intensity, 2)
    retinex = np.expand_dims(retinex, 2)

    intensity1 = simplestColorBalance(retinex, low_clip, high_clip)

    intensity1 = (intensity1 - np.min(intensity1)) / \
                 (np.max(intensity1) - np.min(intensity1)) * \
                 255.0 + 1.0

    img_msrcp = np.zeros_like(img)
    
    for y in range(img_msrcp.shape[0]):
        for x in range(img_msrcp.shape[1]):
            B = np.max(img[y, x])
            A = np.minimum(256.0 / B, intensity1[y, x, 0] / intensity[y, x, 0])
            img_msrcp[y, x, 0] = A * img[y, x, 0]
            img_msrcp[y, x, 1] = A * img[y, x, 1]
            img_msrcp[y, x, 2] = A * img[y, x, 2]

    img_msrcp = np.uint8(img_msrcp - 1.0)

    return img_msrcp


In [None]:
config = {
    "sigma_list": [15, 80, 250],
    "G"         : 5.0,
    "b"         : 25.0,
    "alpha"     : 125.0,
    "beta"      : 46.0,
    "low_clip"  : 0.01,
    "high_clip" : 0.99
}

In [None]:

def amsrcr_df(filenames,config):
    
    '''Plots retinex based cleaned image and the original image
    
    Input: filenames (pd.Series): Series of filenames to be plotted
           config: Config dict
    
    Output: Plots of retinex based filtered image and original image
    '''
    
    fig,ax = plt.subplots(len(filenames),2,figsize=(15,15))
    for ii,img_name in enumerate(filenames):
    
        img = cv2.imread( "../input/ranzcr-clip-catheter-line-classification/train/"+img_name+".jpg")
        img_amsrcr = automatedMSRCR(
            img,
            config['sigma_list']
        )
        
        ax[ii][0].imshow(img_amsrcr)
        

        ax[ii][1].imshow(img)
        if ii==0:
            ax[ii][0].set_title("Retinex")
            ax[ii][1].set_title("Normal")
        
    plt.show()


In [None]:
## UNCOMMENT THIS LINE TO RUN THE RETINEX METHOD,CURRENTLY ITS VERY SLOW
#amsrcr_df(filenames,config)