In [None]:
!conda install gdcm -c conda-forge -y

In [None]:
import os

from PIL import Image
import pandas as pd
from tqdm.auto import tqdm

import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

import matplotlib.pyplot as plt

import cv2
from IPython.display import  clear_output

clear_output()

In [None]:
'''
    this function allows you to read an image in dcm format
'''
def readXray(path, voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
        
    return data

In [None]:
train = pd.read_csv('../input/siim-covid19-detection/train_image_level.csv')

In [None]:
train_study = pd.read_csv('../input/siim-covid19-detection/train_study_level.csv') 

In [None]:
train.head()

In [None]:
dicom = readXray("../input/siim-covid19-detection/train/0051d9b12e72/152f6ec68d86/bb4b1da810f3.dcm")

plt.figure(figsize=(15,10)) 
plt.imshow(dicom, cmap='gray')

In [None]:
bbox_txt = train[train.StudyInstanceUID =='0051d9b12e72']
print(bbox_txt.index)
print(train.boxes[4605])
print(train.label[4605])


In [None]:
bbox_txt.head()

In [None]:
import ast
bbox = ast.literal_eval(train.boxes[4605])

bboxs = []
for b in bbox:
    bboxs.append([b['x'] , b['y'], b['x']+b['width'],  b['y']+b['height']])
print(bboxs)

In [None]:
'''
this function allows to extract the bounding box from text format
'''
def getBboxs(txt_bboxs):
    try:
        bbox = ast.literal_eval(txt_bboxs)
        bboxs = []
        for b in bbox:
            bboxs.append([b['x'] , b['y'], b['x']+b['width'],  b['y']+b['height']])
        return bboxs
    except:
        return []

In [None]:
'''
this function allows you to draw the bounding box
'''
def drawBoxes(boxes):

    for i in boxes:     
        # Top
        x = [i[0] ,  i[2] ]       # [ x1 , x2 ]
        y = [i[1], i[1] ]               # [ y1 , y2 ]
        plt.plot(x,y, color='#ff8838', linewidth=2)
        
        # Bottom
        y = [ i[3], i[3] ]
        plt.plot(x,y, color='#ff8838', linewidth=2)
        
        # Left
        x = [i[0] , i[0]]
        y = [i[1], i[3] ]
        plt.plot(x,y, color='#ff8838', linewidth=2)

        # Right         
        x = [ i[2] ,  i[2] ]
        plt.plot(x,y, color='#ff8838', linewidth=2)

In [None]:
bboxs_to_darw = getBboxs(train.boxes[4605])

plt.figure(figsize=(15,10)) 
drawBoxes(bboxs_to_darw)
plt.imshow(dicom, cmap='gray')

In [None]:
'''
this function allows you to resize the bounding box  
'''
def newBbox(boxes, img_o_x, img_o_y, img_x=512, img_y=512):
    x_scale = ( img_x / img_o_x )
    y_scale = ( img_y / img_o_y)
 
   # boxes = ast.literal_eval(boxes)

    new_bbox = []
    for i in boxes:   
        n_x1 = i[0] * x_scale
        n_y1 = i[1] * y_scale       
        n_x2 = i[2] * x_scale
        n_y2 = i[3] * y_scale
        new_bbox.append([n_x1, n_y1, n_x2,n_y2 ])
    return new_bbox

In [None]:
img = cv2.resize(dicom, (512,512), interpolation = cv2.INTER_AREA)
dim0 = dicom.shape[0]
dim1 = dicom.shape[1]
plt.figure(figsize=(15,10)) 
drawBoxes(newBbox(bboxs_to_darw, dim1, dim0))
plt.imshow(img, cmap='gray')

In [None]:
'''
this function converts the bounding box to yolov5 format
'''
def convert(boxs, class_val=0, size=(512,512)):
    label = ""
    for box in boxs:

    
        dw = 1./size[0]
        dh = 1./size[1]
        x = ((box[0] + box[2])/2.0)*dw
        y = ((box[1] + box[3])/2.0)*dh
        w = (box[2] - box[0])*dw
        h = (box[3] - box[1])*dh
        label = label + str(class_val)+ " " + str(x) + " " + str(y) + " " + str(w) + " " + str(h) + "\n"
    return label
print(convert(newBbox(bboxs_to_darw, dim1, dim0)))

In [None]:
newBbox(bboxs_to_darw, dim1, dim0)

In [None]:
'''
this function converts the bounding box from yolov5 format to normal format
'''
def convertRev(boxs, size=(512,512)):
    bboxs = []
    for box in boxs:
        dw = 1./size[0]
        dh = 1./size[1]
        x = ((box[0]/dw)*2 - box[2]/dw)/2
        y = ((box[1]/dh)*2 - box[3]/dh)/2
        w = box[2]/dw + x   
        h = box[3]/dw + y 

        bboxs.append([x,y,w,h])
      
    return bboxs

convertRev([[ 0.7753488311001411, 0.6818237628424657, 0.1590697566995768, 0.19198785958904108]])

In [None]:
'''
here we will convert all the images to .png format with a resolution of 512x512, by saving the original size of each one of them
'''           
image_id = []
dim0 = []
dim1 = []
splits = []

for split in ['test', 'train']:
    save_dir = f'images/{split}/'
    os.makedirs(save_dir, exist_ok=True)
    
    for dirname, _, filenames in tqdm(os.walk(f'../input/siim-covid19-detection/{split}')):
        for file in filenames:
            
            xray = readXray(os.path.join(dirname, file))
            
            im = cv2.resize(xray, (512,512), interpolation = cv2.INTER_AREA)
            cv2.imwrite(os.path.join(save_dir, file.replace('dcm', 'png')), im)
            
            image_id.append(file.replace('.dcm', ''))
            dim0.append(xray.shape[0])
            dim1.append(xray.shape[1])
            splits.append(split)

In [None]:
!zip -r images.zip ./images
clear_output()

In [None]:
meta = pd.DataFrame.from_dict({'image_id': image_id, 'dim0': dim0, 'dim1': dim1, 'split': splits})
meta.to_csv('meta.csv', index=False)
meta.head()

In [None]:
meta_test = meta[meta.split == 'test']
meta_train = meta[meta.split == 'train']

In [None]:
meta_test.shape

In [None]:
train.head()

In [None]:
train.shape

In [None]:
train['image_id'] = train.id.apply(lambda x: x.split('_')[0])
train.head()

In [None]:
result = pd.merge(meta_train, train, on='image_id', how='outer')
result.head()

In [None]:
result['bboxs_512'] = result.apply(lambda x: newBbox(getBboxs(x.boxes), x.dim1, x.dim0), axis=1)

In [None]:
result.head()

In [None]:
train_study.head()

In [None]:
train_study['Negative for Pneumonia'] = train_study['Negative for Pneumonia'].replace(1, 'negative')
train_study['Typical Appearance'] = train_study['Typical Appearance'].replace(1, 'typical')
train_study['Indeterminate Appearance'] = train_study['Indeterminate Appearance'].replace(1, 'indeterminate')
train_study['Atypical Appearance'] = train_study['Atypical Appearance'].replace(1, 'atypical')

train_study['Negative for Pneumonia'] = train_study['Negative for Pneumonia'].replace(0, '')
train_study['Typical Appearance'] = train_study['Typical Appearance'].replace(0, '')
train_study['Indeterminate Appearance'] = train_study['Indeterminate Appearance'].replace(0, '')
train_study['Atypical Appearance'] = train_study['Atypical Appearance'].replace(0, '')
train_study['class_name'] = train_study['Negative for Pneumonia'] + train_study['Typical Appearance'] + train_study['Indeterminate Appearance'] +  train_study['Atypical Appearance']

In [None]:
train_study['class_val'] = train_study['class_name']

train_study['class_val'] = train_study['class_val'].replace('negative', 0)
train_study['class_val'] = train_study['class_val'].replace('typical', 1)
train_study['class_val'] = train_study['class_val'].replace('indeterminate', 2)
train_study['class_val'] = train_study['class_val'].replace('atypical', 3)
train_study.head()

In [None]:
 train_study['StudyInstanceUID'] = train_study.id.apply(lambda x: x.split('_')[0])

In [None]:
train_study.head()

In [None]:
train_study_df = train_study[['class_name', 'class_val', 'StudyInstanceUID'] ]
train_study_df.head()

In [None]:
result_df = pd.merge(result, train_study_df, on='StudyInstanceUID', how='outer')
result_df.head()

In [None]:
result_df['yolo_label'] = result_df.apply(lambda x: convert(x.bboxs_512, x.class_val), axis=1)
result_df.head()

In [None]:
labels_dir = f'labels/'
os.makedirs(labels_dir, exist_ok=True)

In [None]:
def setYoloLables(name, labels):
    with open(labels_dir+name+'.txt', 'w',) as f:
        f.write(labels)
        f.close()

In [None]:
result_df.apply(lambda x: setYoloLables(x.image_id, x.yolo_label), axis=1)

In [None]:

bboxes = []
with open(labels_dir+'29b23a11d1e4.txt', 'r') as file:
     for line in file:
            print(line)

In [None]:
!zip -r labels.zip ./labels
clear_output()

In [None]:
!rm -rf ./images
!rm -rf ./labels