<h1 style="border-style: outset;border-color: red;text-align: center;">SIIM-FISABIO-RSNA COVID-19 Detection</h1>

<img src="https://www.news-medical.net/image.axd?picture=2020%2F11%2Fshutterstock_1707538084_(1).jpg" height="500" width="500" style="display: block;margin-left: auto;margin-right: auto;"> 

<h2 style="font-weight:'bold'; color:blue; font-family:verdana; text-align: center;"> Identify and localize COVID-19 abnormalities on chest radiographs </h2>

<h2 style="text-align: center;border-style: double;text-align: center;border-color: red; ">About SIIM</h2>
<img src="https://siim.org/resource/resmgr/SIIM_logo-600x315.png" width="200" style="display: block;margin-left: auto;margin-right: auto;">
<p> <b>Society for Imaging Informatics in Medicine</b> (<a href="https://siim.org/">SIIM</a>) is the leading healthcare professional organization for those interested in the current and future use of informatics in medical imaging. The society's mission is to advance medical imaging informatics across the enterprise through education, research, and innovation in a multi-disciplinary community.</p>

<h2 style="text-align: center;;border-style: double;text-align: center;;border-color: red;"> What we need to do</h2>
<img src="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQtew5pZk4BkEcy8v7TCXDaTAam9qj144eiy6K5ShjkeDNPVeZohAKIvdepF0HQxgcQzUU&usqp=CAU" width="300" height="300" style="display: block;margin-left: auto;margin-right: auto;">
<p> <b>Identify and localize COVID-19 abnormalities on chest radiographs </b>. In particular, you'll categorize the radiographs as negative for pneumonia or typical, indeterminate, or atypical for COVID-19.</p>

In [None]:
!conda install -c conda-forge gdcm -y

<h1 style = "font-family:'Courier New';font-weight: bold;margin-top: 0px;margin-bottom: 1px;text-align: center;">Import libraries</h1>

In [None]:
import tensorflow as tf
from kaggle_datasets import KaggleDatasets

import numpy as np
import pandas as pd
import ast

import gdcm

import matplotlib.pyplot as plt
import matplotlib.patches as patches

import PIL
import cv2
import pydicom 
from pydicom.pixel_data_handlers.util import apply_voi_lut
import matplotlib.pyplot as plt

import os
import warnings
warnings.filterwarnings('ignore')

<h2 style = "font-family:'Courier New';font-weight: bold;margin-top: 0px;margin-bottom: 1px;text-align: center;">Checking TPU access</h2>

In [None]:
AUTO = tf.data.experimental.AUTOTUNE

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

<h1 style = "font-family:'Courier New';font-weight: bold;margin-top: 0px;margin-bottom: 1px;text-align: center;">Loading Data</h1>

In [None]:
#GCS_DS_PATH = KaggleDatasets().get_gcs_path()

#TRAIN_PATH = GCS_DS_PATH + "/train"
TRAIN_PATH = "../input/siim-covid19-detection/train"
#TEST_PATH = GCS_DS_PATH + "/test"
TEST_PATH = "../input/siim-covid19-detection/test"
TRAIN_FILES = tf.io.gfile.glob(TRAIN_PATH+"/*/*/*.dcm")
TEST_FILES = tf.io.gfile.glob(TEST_PATH+"/*/*/*.dcm")

classes_dict = {
    0 : "Negative for Pneumonia",
    1  : "Typical Appearance",
    2  : "Indeterminate Appearance",
    3  : "Atypical Appearance"
}

HEIGHT,WIDTH = 512,512

<h1 style = "font-family:'Courier New';font-weight: bold;margin-top: 0px;margin-bottom: 1px;text-align: center;">Preprocessing CSV files</h1>

In [None]:
#getting filepath from study_id or image_id
def get_path(file_id,main_path,id_type):
    name = file_id.split("_")[0]
    if id_type == "study":
        path = tf.io.gfile.glob(main_path+f"/{name}/*/*.dcm")[0]
    else:
        path = tf.io.gfile.glob(main_path+f"/*/*/{name}.dcm")[0]
    return path

In [None]:
#Loading csv files
img_df = pd.read_csv("../input/siim-covid19-detection/train_image_level.csv")
study_df = pd.read_csv("../input/siim-covid19-detection/train_study_level.csv")

#converting into one-hot label
study_df["one_hot"] = study_df.apply(lambda x : np.array([x["Negative for Pneumonia"],
                                                        x["Typical Appearance"],
                                                        x["Indeterminate Appearance"],
                                                        x["Atypical Appearance"]]),axis=1)

study_df = study_df.drop(["Negative for Pneumonia","Typical Appearance","Indeterminate Appearance","Atypical Appearance"],axis=1)
study_df["label_id"] = study_df["one_hot"].map(lambda x : classes_dict[np.argmax(x)])


study_df["filepath"] = study_df["id"].map(lambda x: get_path(x,TRAIN_PATH,"study"))
study_df.rename(columns={"id":"study_id"},inplace=True)
study_df["image_id"] = study_df["filepath"].map(lambda x : x.split("/")[-1].split(".")[0]+"_image")

img_df.rename(columns={"id":"image_id"},inplace=True)

train_df = pd.merge(study_df,img_df,on="image_id")
cols = ["filepath","study_id","image_id","StudyInstanceUID","one_hot","label_id","label","boxes"]
train_df = train_df[cols]
train_df.head()

In [None]:
x = list(train_df["label_id"].value_counts().index)
y = list(train_df["label_id"].value_counts().values) 

plt.figure(figsize=(10,5))
plt.bar(x,y)
plt.xlabel("Label")
plt.ylabel("Count")
plt.title("Labels Distribution")
plt.show()

<h2 style = "font-family:'Courier New';font-weight: bold;margin-top: 0px;margin-bottom: 1px;text-align: center;">DICOM to Array</h2>

In [None]:
#Ref : https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
def dicom2arr(path, voi_lut = True, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

<h2 style = "font-family:'Courier New';font-weight: bold;margin-top: 0px;margin-bottom: 1px;text-align: center;">Image Visualization</h2>

In [None]:
colors_dict = {
    0 : "blue",
    1 : "orange",
    2 : 'green',
    3 : "red"
}

def displayImg(filepath_ls):
    plt.figure(figsize=(18,18))
    for i,filepath in enumerate(filepath_ls):
        ax = plt.subplot(3,3,i+1)
        arr = dicom2arr(filepath)
        req = train_df[train_df["filepath"]==filepath]
        boxes = list(req["boxes"].values)
        
        label = classes_dict[np.argmax(req["one_hot"].values[0])]
        color = colors_dict[np.argmax(req["one_hot"].values[0])]
        plt.imshow(arr,cmap="gray")
        plt.axis("off")
        plt.title(label)
        
        if not pd.isna(boxes[0]) :
            for box in boxes:
                box = ast.literal_eval(box)[0] 
                x,y,width,height = box["x"], box["y"], box["width"], box["height"]
                rect = patches.Rectangle((x,y),width,height,edgecolor=color,linewidth=1, facecolor='none')
                ax.add_patch(rect)
                ax.text(x+15,y-50,label)
    plt.show()

rand = np.random.randint(0,4000,9)
filepath_ls = [list(train_df["filepath"])[i] for i in rand]
displayImg(filepath_ls)