In [None]:
!conda install -c conda-forge gdcm -y

In [None]:
import glob
import pydicom
import os
import numpy as np
import pandas as pd

In [None]:
from pydicom.pixel_data_handlers.util import apply_voi_lut, apply_modality_lut
from pydicom import dcmread
import matplotlib.pyplot as plt
%matplotlib inline

# functin to read dicom given the path , considers modality_lut and corrections w.r.t monochromes
def read_dicom( filepath ,modality_lut=True, fix_monochrome=True):
    dcm = pydicom.read_file(filepath)
    img = dcm.pixel_array
    if modality_lut == True:
        img = apply_modality_lut(dcm.pixel_array, dcm)
    max_img = np.max(img)
    min_img = np.min(img)
    if fix_monochrome == True and dcm.PhotometricInterpretation=='MONOCHROME1':
        img = max_img - img

    img = (img - np.min(img))/(max_img - min_img)
    img = (img * 255).astype(np.uint8)

    return img 

In [None]:
dirs = glob.glob(os.path.join('/kaggle/input/siim-covid19-detection','*'))
dirs

In [None]:
train_study = pd.read_csv('/kaggle/input/siim-covid19-detection/train_study_level.csv')
print(train_study.columns)
df_sub = train_study[train_study['Negative for Pneumonia']==1]
print('length of negative for pnemonia',len(df_sub))
df_sub = train_study[train_study['Typical Appearance']==1]
print('length of Typical Appearance',len(df_sub))
df_sub = train_study[train_study['Indeterminate Appearance']==1]
print('length of Indeterminate Appearance',len(df_sub))
df_sub = train_study[train_study['Atypical Appearance']==1]
print('length of Atypical Appearance',len(df_sub))

# train_study['id']

## Enable the line pertaining to    "Atypical Appearance", "Inderminate Appearance" , "Typical Appearance" , "Negative for Pnuemonia"

In [None]:
df_sub = train_study[train_study['Atypical Appearance']==1]
# df_sub = train_study[train_study['Indeterminate Appearance']==1]
# df_sub = train_study[train_study['Typical Appearance']==1]
# df_sub = train_study[train_study['Negative for Pneumonia']==1]
study_id = [ item.split('_')[0] for item in  df_sub['id'].values]
print(len(df_sub))
df_sub.head()
# study_id_ap


#### train_study is the dataframe from the Study level. train_img is the dataframe from the image_level. we first select the study pertaining to one of the 4 category, then find the images belongs to all those studies of the selected category.

In [None]:
train_img = pd.read_csv('/kaggle/input/siim-covid19-detection/train_image_level.csv')
filtr  = [val in study_id   for val in train_img['StudyInstanceUID'].values]
train_img = train_img[filtr]

In [None]:
import ast
import json 
import matplotlib.patches as patches

def convert_to_list(value):
    try:
        return ast.literal_eval(value)
    except:
        return []

train_img['boxes'] = train_img['boxes'].apply(convert_to_list)


In [None]:
def process_bounding_boxes(boxes):
    boxes_list = []
    for box in boxes:
        boxes_list.append([box['x'], box['y'], box['width'], box['height']])
    return boxes_list
        
def read_image_bb_index(train_img, index):
    df = train_img.iloc[index]
    study_id = df['StudyInstanceUID']
    image_id = df['id'].split('_')[0]
    path1 = '/kaggle/input/siim-covid19-detection/train/'+study_id
    path2 = image_id+'*dcm'
    imagepath = glob.glob(os.path.join(path1,'*', path2))
    img = read_dicom(imagepath[0])
    
    boxes_list = process_bounding_boxes(df['boxes'])
    return img, boxes_list
    
    
def plot_examples(train_img, sz=16):
#     fig, ax = plt.subplots(nrows=3, ncols=4, figsize=(16,12), gridspec_kw={'height_ratios': [1, 1, 1]})
    fig, ax = plt.subplots(nrows=4, ncols=4, figsize=(16,16))
    random_selection = np.random.choice(len(train_img), size=sz, replace=False)
    for plt_idx,idx in enumerate(random_selection):
        img, boxes_list = read_image_bb_index(train_img, idx)
        row = int(plt_idx / 4)
        col = int(plt_idx % 4)
        ax[row][col].imshow(img, cmap='gray')

        for bb in boxes_list:
            rect = patches.Rectangle((bb[0], bb[1]), bb[2], bb[3], linewidth=1, 
                                     edgecolor='r', facecolor='none')
            ax[row][col].add_patch(rect)
    plt.subplots_adjust(hspace=0.1, wspace=0.2)

    plt.show()
        
    

In [None]:
plot_examples(train_img)

In [None]:
# def find_empty(item_list):
#     return len(item_list)
# num_bb = [ find_empty(item) for item in train_img['boxes'] ]
# np.any([ item==0  for item in num_bb])
# filtr = [ item!=0  for item in num_bb]
# # train_img_ip0 = train_img[filtr]
# # train_img_ta0 = train_img[filtr]
# train_img_na0 = train_img[filtr]

# # print(len(num_bb))
# # num_bb