<h1 style = "text-align :center; color:black; background-image:url(https://cdn.pixabay.com/photo/2014/06/16/23/40/blue-370128__340.png)"> About Competition </h1>

<h2 style="border-style: outset;border-color: red;text-align: center;">SIIM-FISABIO-RSNA COVID-19 Study Level Predictions</h2>

<img src="https://content.presspage.com/uploads/2110/gettyimages-1214942330.jpg" height="500" width="500" style="display: block;margin-left: auto;margin-right: auto;"> 

<h2 style="text-align: center;border-style: double;text-align: center;border-color: red; ">About SIIM</h2>
<img src="https://siim.org/resource/resmgr/SIIM_logo-600x315.png" width="200" style="display: block;margin-left: auto;margin-right: auto;">
<p> <b>Society for Imaging Informatics in Medicine</b> (<a href="https://siim.org/">SIIM</a>) is the leading healthcare professional organization for those interested in the current and future use of informatics in medical imaging. The society's mission is to advance medical imaging informatics across the enterprise through education, research, and innovation in a multi-disciplinary community.</p>

<a href = "https://www.kaggle.com/shanmukh05/siim-covid19-dataset-256px-jpg" style="font-weight:'bold'; color:blue; font-family:monospace; "><h3>My Dataset</h3></a>

<a href = "https://www.kaggle.com/shanmukh05/siim-covid-19-data-preparation-for-detectron2" style="font-weight:'bold'; color:blue; font-family:monospace; "><h3>My Data Preparation Notebook</h3></a> 

<a href = "https://www.kaggle.com/shanmukh05/siim-covid-19-yolo-v5-image-level-predictions/output" style="font-weight:'bold'; color:blue; font-family:monospace; "><h3>YOLO v5 Image Level Training Notebook</h3></a> 

<a href = "https://www.kaggle.com/shanmukh05/siim-covid-19-study-level-predictions/" style="font-weight:'bold'; color:blue; font-family:monospace; "><h3>Study Level Training Notebook</h3></a> 

<a href = "https://www.kaggle.com/shanmukh05/siim-covid-19-detection-detectron2-training" style="font-weight:'bold'; color:blue; font-family:monospace; "><h3> My Detectron2 Training Notebook</h3></a> 
To be updated

<h2 style = "text-align :center; font-family:verdana; color:red; background-image: url(https://hookagency.com/wp-content/uploads/2015/11/miracle-grow-light-green-gradient.jpg); ">Install Requirements</h2>

In [1]:
!cp /kaggle/input/gdcm-conda-install/gdcm.tar .
!tar -xvzf gdcm.tar
!conda install --offline ./gdcm/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2
!rm -rf ./gdcm.tar

!pip install /kaggle/input/kerasapplications -q
!pip install /kaggle/input/efficientnet-keras-source-code/ -q --no-deps

gdcm/
gdcm/conda-4.8.4-py37hc8dfbb8_2.tar.bz2
gdcm/gdcm-2.8.9-py37h71b2a6d_0.tar.bz2
gdcm/libjpeg-turbo-2.0.3-h516909a_1.tar.bz2

Downloading and Extracting Packages
######################################################################## | 100% 
Preparing transaction: - done
Verifying transaction: | done
Executing transaction: - done


<h2 style = "text-align :center; font-family:verdana; color:red; background-image: url(https://hookagency.com/wp-content/uploads/2015/11/miracle-grow-light-green-gradient.jpg); ">Importing Dependencies</h2>

In [2]:
import tensorflow as tf
import efficientnet.tfkeras as efn
import tensorflow_addons as tfa

import torch

import pandas as pd
import numpy as np

from PIL import Image
import os
import shutil

import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2

In [3]:
HEIGHT,WIDTH = 224,224
CHANNELS = 3
BATCH_SIZE = 32

AUTO = tf.data.experimental.AUTOTUNE

ORIG_TEST_PATH = "../input/siim-covid19-detection/test"
TEST_FILES = tf.io.gfile.glob(ORIG_TEST_PATH + "/*/*/*.dcm")

FINAL_TEST_PATH = "./test"

<h1 style = "text-align :center; color:black; background-image:url(https://cdn.pixabay.com/photo/2014/06/16/23/40/blue-370128__340.png)"> Data Preparation </h1>

In [4]:
##---------------------------------------
#dicom to pixel array converting function
##---------------------------------------

# Ref : https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
def dicom2arr(path, voi_lut = True, fix_monochrome = True):
    dicom = pydicom.read_file(path)
    
    if voi_lut:
        arr = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        arr = dicom.pixel_array
               
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        arr = np.amax(arr) - arr
        
    arr = arr - np.min(arr)
    arr = arr / np.max(arr)
    arr = (arr * 255).astype(np.uint8)
        
    return arr

##-----------------------
#resizing the pixel array
##-----------------------

def resizeArr(arr):
    im = Image.fromarray(arr)
    test_w.append(im.size[0])
    test_h.append(im.size[1]) 
    im = im.resize((HEIGHT,WIDTH),resample= Image.LANCZOS)
    return im

##------------------------------
#Filename for resized jpg images 
##------------------------------

def getFilename(filepath):
    '''
        Fromat = '{STUDY-ID}_{SUB-STUDY-ID}_{IMAGE-ID}.jpg'
    '''
    ls = filepath.split("/")
    filename = ls[-3]+'_'+ls[-2]+'_'+ls[-1].split(".")[0]+".jpg"
    test_id.append(ls[-1].split(".")[0])
    return filename

##-------------------
#Finally saving image
##-------------------

def saveImage(filepath,mainpath=FINAL_TEST_PATH):
    arr = dicom2arr(filepath)
    arr = resizeArr(arr)
    path = os.path.join(mainpath,getFilename(filepath))
    
    arr.save(path)

In [5]:
test_id, test_h, test_w = [],[],[]
os.makedirs(FINAL_TEST_PATH, exist_ok=True)

for filepath in TEST_FILES:
    saveImage(filepath,mainpath=FINAL_TEST_PATH)

meta_test = pd.DataFrame.from_dict({
    "ImageInstanceUID" : test_id,
    "width" : test_w,
    "height" : test_h
})
meta_test.to_csv("./meta_test.csv",index=False)

<h2 style = "text-align :center; font-family:verdana; color:red; background-image: url(https://hookagency.com/wp-content/uploads/2015/11/miracle-grow-light-green-gradient.jpg); ">Preprocessing Images</h2>

In [6]:
def process_img(filepath):
    image = tf.io.read_file(filepath)
    image = tf.image.decode_jpeg(image, channels=CHANNELS)
    image = tf.image.convert_image_dtype(image, tf.float32) 
    image = tf.image.resize(image, [HEIGHT,WIDTH])
    return image


files_ls = tf.io.gfile.glob(FINAL_TEST_PATH + '/*.jpg')
test_ds = tf.data.Dataset.from_tensor_slices(files_ls)
test_ds = test_ds.map(process_img,num_parallel_calls=AUTO)
test_ds = test_ds.batch(BATCH_SIZE).prefetch(AUTO)

<h1 style = "text-align :center; color:black; background-image:url(https://cdn.pixabay.com/photo/2014/06/16/23/40/blue-370128__340.png)"> Study Level Predictions </h1>

In [7]:
MODEL_PATH = "../input/siim-covid-19-study-level-predictions/"

pred_arr = 0 
count = 0
classes_dict = {
    0 : "negative",
    1 : "typical",
    2 : "indeterminate",
    3 : "atypical",
}


for i,file in enumerate(os.listdir(MODEL_PATH)):
    if file.split(".")[-1] == "h5":
        count+=1
        with tf.device('/device:GPU:0'):
            model = tf.keras.models.load_model(os.path.join(MODEL_PATH,file))
            pred_arr += model.predict(test_ds)

pred_arr = pred_arr/count
pred_ls = [classes_dict[np.argmax(i)]+ f" {i[np.argmax(i)]} " +"0 0 1 1" for i in pred_arr]

In [8]:
df = pd.DataFrame.from_dict({"filepath":files_ls})
df["filename"] = df["filepath"].map(lambda x : x.split("/")[-1].split(".")[0])
df["StudyInstanceUID"] = df["filename"].map(lambda x : x.split("_")[0] + "_study")
df["ImageInstanceUID"] = df["filename"].map(lambda x : x.split("_")[-1] + "_image")
df["preds"] = pred_ls
df = df.drop(["filename","filepath"],axis=1)
df.head(3)

Unnamed: 0,StudyInstanceUID,ImageInstanceUID,preds
0,1b59d0b0ce5e_study,50243b2cdb56_image,negative 0.39392977952957153 0 0 1 1
1,2f8d9f7afedb_study,cb3eb8d1abd3_image,negative 0.5544598698616028 0 0 1 1
2,a1d7141d8672_study,c00bae0e7714_image,negative 0.6540222764015198 0 0 1 1


In [9]:
study_id = list(set(df["StudyInstanceUID"]))
strings = [] 
for sid in study_id:
    tmp = df[df["StudyInstanceUID"] == sid]
    string = ""
    for row in tmp.values:
        string += row[2]
        string += " "
    strings.append(string)

study_pred_df = pd.DataFrame.from_dict({
    "Id" : study_id,
    "PredictionString" : strings
})
study_pred_df.head()

Unnamed: 0,Id,PredictionString
0,aac24f8be440_study,typical 0.7929897308349609 0 0 1 1
1,164b05333dd6_study,typical 0.2818778157234192 0 0 1 1
2,9b284d2b8e81_study,typical 0.8898690342903137 0 0 1 1
3,fe3fa13c059c_study,typical 0.33014267683029175 0 0 1 1
4,3c46f027da8c_study,typical 0.7342738509178162 0 0 1 1


<h1 style = "text-align :center; color:black; background-image:url(https://cdn.pixabay.com/photo/2014/06/16/23/40/blue-370128__340.png)"> Image Level Predictions </h1>

In [10]:
!pip install ../input/pytorchthop/thop-0.0.31.post2005241907-py3-none-any.whl

Processing /kaggle/input/pytorchthop/thop-0.0.31.post2005241907-py3-none-any.whl
Installing collected packages: thop
Successfully installed thop-0.0.31.post2005241907


In [11]:
'''mainpath = "./test"
filenames = os.listdir(mainpath)
filepaths = [os.path.join(mainpath,i)  for i in filenames]
result_ls = []

MODEL_PATH = "../input/siim-covid-19-yolo-v5-image-level-predictions/best_yolov5.pt"
YOLO_PATH = "../input/siim-covid19-dataset-256px-jpg/yolov5"
model = torch.hub.load(YOLO_PATH,'custom',path=MODEL_PATH,force_reload=True,source="local")


for path in filepaths:
    arr = cv2.imread(path)[:,:,::-1]
    result = model([arr],size=HEIGHT)
    result_ls.append(result.pandas().xyxy[0])
    
    
id_ls = [i.split("_")[-1].split(".")[0]+"_image" for i in filenames]
pred_ls = []

for i in result_ls:
    if len(i) == 0:
        pred_ls.append("none 1 0 0 1 1")
    else:
        res = ""
        for f in i.values:
            f = list(map(str,f))
            tmp =  " ".join([f[6],f[4],f[0],f[1],f[2],f[3]])
            tmp += " "
            res+=tmp
        pred_ls.append(res)

image_pred_df = pd.DataFrame.from_dict({
    'Id' : id_ls,
    'PredictionString' : pred_ls
})
image_pred_df.head()'''

print("USING TORCH HUB")

USING TORCH HUB


In [12]:
%cd ../input/siim-covid19-dataset-256px-jpg/yolov5
%cd ../../../

/kaggle/input/siim-covid19-dataset-256px-jpg/yolov5
/kaggle


In [13]:
os.makedirs("./working/siim_covid19", exist_ok=True)
shutil.copytree("./input/siim-covid19-dataset-256px-jpg/yolov5","./working/siim_covid19/yolov5")
os.makedirs("./working/siim_covid19/yolov5/runs/train/yolov5_training/weights", exist_ok=True)
shutil.copyfile("./input/siim-covid-19-yolo-v5-image-level-predictions/best_yolov5.pt","./working/siim_covid19/yolov5/runs/train/yolov5_training/weights/best_yolov5.pt")

'./working/siim_covid19/yolov5/runs/train/yolov5_training/weights/best_yolov5.pt'

In [14]:
TEST_PATH = "/kaggle/working/test"
BEST_MODEL_PATH = "./runs/train/yolov5_training/weights/best_yolov5.pt"

In [15]:
%cd ./working/siim_covid19/yolov5
!python detect.py --weights {BEST_MODEL_PATH} \
                  --source {TEST_PATH} \
                  --img {HEIGHT} \
                  --conf 0.3 \
                  --iou-thres 0.5 \
                  --max-det 3 \
                  --save-txt \
                  --save-conf \
                  --name yolov5_testing \
                  --device cpu

/kaggle/working/siim_covid19/yolov5
Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.3, device='cpu', exist_ok=False, hide_conf=False, hide_labels=False, img_size=224, iou_thres=0.5, line_thickness=3, max_det=3, name='yolov5_testing', nosave=False, project='runs/detect', save_conf=True, save_crop=False, save_txt=True, source='/kaggle/working/test', update=False, view_img=False, weights=['./runs/train/yolov5_training/weights/best_yolov5.pt'])
image 1/1263 /kaggle/working/test/00188a671292_3eb5a506ccf3_3dcdfc352a06.jpg: 224x224 Done. (0.101s)
image 2/1263 /kaggle/working/test/004bd59708be_e7d024ea82d7_c39146cbda47.jpg: 224x224 2 opacitys, Done. (0.048s)
image 3/1263 /kaggle/working/test/00508faccd39_d39fc1121992_951211f8e1bb.jpg: 224x224 Done. (0.040s)
image 4/1263 /kaggle/working/test/006486aa80b2_fe138b3d009e_5e0e7acd9c7d.jpg: 224x224 1 opacity, Done. (0.040s)
image 5/1263 /kaggle/working/test/00655178fdfc_2e01129e9043_5b8ee5baa1d5.jpg: 224x224 1 opacity, Do

In [16]:
PREDICTIONS_PATH = "runs/detect/yolov5_testing/labels/"
PRED_FILES = os.listdir(PREDICTIONS_PATH)

print("Sample prediction (in txt file) : \n")

with open(PREDICTIONS_PATH + PRED_FILES[0], "r") as f:
    ls = f.read().strip("\n").split(" ")
    print(f"LABEL : {ls[0]} \nX_CENTER : {ls[1]} \nY_CENTER : {ls[2]} \nWIDTH : {ls[3]} \nHEIGHT : {ls[4]} \nCONFIDENCE : {ls[5]}")
print("Number of Prediction file originally present : ",len(PRED_FILES))

Sample prediction (in txt file) : 

LABEL : 1 
X_CENTER : 0.310268 
Y_CENTER : 0.671875 
WIDTH : 0.191964 
HEIGHT : 0.254464 
CONFIDENCE : 0.499815
Number of Prediction file originally present :  699


In [17]:
IMAGE_FILES = tf.io.gfile.glob("runs/detect/yolov5_testing/*.jpg")
ACTUAL_FILES = []
for path in IMAGE_FILES:
    ls = path.split("/")
    name = ls[-1].split(".")[0] + ".txt"
    ACTUAL_FILES.append(PREDICTIONS_PATH + name)
    
for filepath in ACTUAL_FILES:
    if not os.path.exists(filepath):
        with open(filepath,"w") as f:
            f.write("0 0 0 1 1 1")
            f.close()

In [18]:
##------------------------------------------
#getting string of predictions from txt file
##------------------------------------------
def get_string(filepath,out= "bbox"):
    probs = []
    bboxes = []
    labels = []
    with open(filepath, "r") as f:
        for line in f:
            ls = line.strip("\n").split(" ")
            ls = list(map(float, ls))
            labels.append(ls[0])
            bboxes.append(ls[1:-1])
            probs.append(ls[-1]) 
    if out == "bbox":
        return bboxes
    elif out == "label":
        return labels
    else:
        return probs

##------------------------------------------
#Scaling-up bounding box co-ordinates
##------------------------------------------
def scale_bbox(row):
    scale_x = row[5]
    scale_y = row[6]
    scale_bboxes = []
    for box in row[1]:
        if row[2][0] != 0.0:
            xc,yc = box[0]*scale_x,box[1]*scale_y
            w,h = box[2]*scale_x,box[3]*scale_y
            xmin,ymin = int(xc - w/2),int(yc - h/2)
            xmax,ymax = int(xc + w/2),int(yc + h/2)
            scale_bboxes.append([xmin,ymin,xmax,ymax])
        else:
            return [[0,0,1,1]]
    return scale_bboxes

In [19]:
pred_df = pd.DataFrame.from_dict({"filepath" : ACTUAL_FILES})
pred_df["bboxes"] = pred_df["filepath"].map(lambda x: get_string(x,out="bbox"))
pred_df["label"] = pred_df["filepath"].map(lambda x: get_string(x,out="label"))
pred_df["confidence"] = pred_df["filepath"].map(lambda x: get_string(x,out="conf"))

pred_df["ImageInstanceUID"] = pred_df["filepath"].map(lambda x : x.split("/")[-1].split("_")[-1][:-4])

pred_df = pred_df.merge(meta_test, on = "ImageInstanceUID")

box_ls = []
for i,row in enumerate(pred_df.values):
    box_ls.append(scale_bbox(row))
    
pred_df["scale_bboxes"] = box_ls
    
pred_df.head(3)

Unnamed: 0,filepath,bboxes,label,confidence,ImageInstanceUID,width,height,scale_bboxes
0,runs/detect/yolov5_testing/labels/1b59d0b0ce5e...,"[[0.252232, 0.551339, 0.174107, 0.290179]]",[1.0],[0.434306],50243b2cdb56,2846,2330,"[[470, 946, 965, 1622]]"
1,runs/detect/yolov5_testing/labels/2f8d9f7afedb...,"[[0.0, 0.0, 1.0, 1.0]]",[0.0],[1.0],cb3eb8d1abd3,2828,2320,"[[0, 0, 1, 1]]"
2,runs/detect/yolov5_testing/labels/a1d7141d8672...,"[[0.0, 0.0, 1.0, 1.0]]",[0.0],[1.0],c00bae0e7714,2846,2330,"[[0, 0, 1, 1]]"


In [20]:
%cd ../../../
image_dict = {
    0. : "none",
    1. : "opacity"
}

def get_string(row):
    string = ""
    if row[2][0] == 0.:
        string += "none 1 0 0 1 1"
        return string
    for i in range(len(row[2])):
        string += "opacity "
        string += str(row[3][i])
        string += " "
        bbox = map(str,row[7][i])
        tmp = " ".join(bbox)
        string += tmp
        string += " "
    return string
        
strings = []
for row in pred_df.values:
    strings.append(get_string(row))
    
    
image_pred_df = pd.DataFrame.from_dict({"Id" : pred_df["ImageInstanceUID"]})
image_pred_df["Id"] = image_pred_df['Id'].map(lambda x : x+"_image")
image_pred_df["PredictionString"] = strings
image_pred_df.head()

/kaggle


Unnamed: 0,Id,PredictionString
0,50243b2cdb56_image,opacity 0.434306 470 946 965 1622
1,cb3eb8d1abd3_image,none 1 0 0 1 1
2,c00bae0e7714_image,none 1 0 0 1 1
3,fe501aa91e43_image,opacity 0.508845 1901 549 2708 2174 opacity 0....
4,7f090fbd8e7c_image,opacity 0.332968 1893 644 2375 1882


<h1 style = "text-align :center; color:black; background-image:url(https://cdn.pixabay.com/photo/2014/06/16/23/40/blue-370128__340.png)"> Final Submission </h1>

In [21]:
dfs = [study_pred_df, image_pred_df]

test_df = pd.concat(dfs)
test_df.to_csv("./working/submission.csv",index = False) #./submission.csv
test_df.head()

Unnamed: 0,Id,PredictionString
0,aac24f8be440_study,typical 0.7929897308349609 0 0 1 1
1,164b05333dd6_study,typical 0.2818778157234192 0 0 1 1
2,9b284d2b8e81_study,typical 0.8898690342903137 0 0 1 1
3,fe3fa13c059c_study,typical 0.33014267683029175 0 0 1 1
4,3c46f027da8c_study,typical 0.7342738509178162 0 0 1 1


In [22]:
test_df.tail()

Unnamed: 0,Id,PredictionString
1258,d91b58caa74d_image,none 1 0 0 1 1
1259,cee98095dd28_image,opacity 0.447499 1317 676 1948 1804 opacity 0....
1260,2be1c227dbf2_image,opacity 0.508658 1585 1099 2424 2478 opacity 0...
1261,b09b20b44640_image,none 1 0 0 1 1
1262,63173909bed5_image,opacity 0.621288 417 427 1164 1866 opacity 0.6...


In [23]:
'''shutil.rmtree("./gdcm")
shutil.rmtree("./test")
os.remove("./meta_test.csv")'''
shutil.rmtree("./working/siim_covid19")
shutil.rmtree("./working/gdcm")
shutil.rmtree("./working/test")
os.remove("./working/meta_test.csv")