# import libraries

In [None]:
import pandas as pd
import numpy as np
import torch 
import torch.nn as nn
import os
import glob
import pydicom
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from pydicom import dcmread
import ast


# Data

**Files**
* train_study_level.csv - the train study-level metadata, with one row for each study, including correct labels.
* train_image_level.csv - the train image-level metadata, with one row for each image, including both correct labels and any bounding boxes in a dictionary format. Some images in both test and train have multiple bounding boxes.
* sample_submission.csv - a sample submission file containing all image- and study-level IDs.

**train_study_level.csv**
* id - unique study identifier
* Negative for Pneumonia - 1 if the study is negative for pneumonia, 0 otherwise
* Typical Appearance - 1 if the study has this appearance, 0 otherwise
* Indeterminate Appearance  - 1 if the study has this appearance, 0 otherwise
* Atypical Appearance  - 1 if the study has this appearance, 0 otherwise

**train_image_level.csv**
* id - unique image identifier
* boxes - bounding boxes in easily-readable dictionary format
* label - the correct prediction label for the provided bounding boxes

# Path

In [None]:
DIR_PATH = "../input/siim-covid19-detection"
train_imglvl_path = f"{DIR_PATH}/train_image_level.csv"
train_stdylvl_path = f"{DIR_PATH}/train_study_level.csv"
train_path = f"{DIR_PATH}/train"

#loading csv file using pandas 

train_df = pd.read_csv(train_imglvl_path)
#train_df.sample(5)

In [None]:
#train_df['id'].unique().shape
#Nan_df = train_df.groupby("id")["boxes"].agg(lambda s: (s == "NaN").sum()).reset_index().rename({"class_id" : "Nan_values"}, axis = 1)

**Drop Nan Values and convert boxes from str to list**

In [None]:
train_new = train_df.dropna(axis = 0, inplace = False).reset_index(drop = True)
train_new['boxes'] = train_new.boxes.apply(ast.literal_eval)  # converting into list 

**Get random sample from DataFrame**

In [None]:
sample_df = train_new.sample(5).reset_index(drop = True)

In [None]:
sample_df

In [None]:
for i, rows in sample_df.iterrows():
    # get dir 
    dir = os.listdir(train_path + "/" + rows["StudyInstanceUID"])
    #print(rows['id'])
    #print(train_path + "/" + rows["StudyInstanceUID"] + "/"+ dir[0] + "/" + rows["id"][:-6] + ".dcm")
    #continue
    dicom = pydicom.dcmread(train_path + "/" + rows["StudyInstanceUID"] + "/"+ dir[0] + "/" + rows["id"][:-6] + ".dcm")
    img = dicom.pixel_array
    
    boxs = rows['boxes']
    #print(boxs)
    fig, a = plt.subplots(1,1)
    fig.set_size_inches(10,10)
    a.imshow(img, cmap = 'gray')
    
    
    for box in boxs:
        x, y, width, height = int(box['x']), int(box['y']), int(box['width']), int(box['height'])
        #print(x, y, width, height)
        rect = patches.Rectangle((x, y),
                                 width, height,
                                 linewidth = 2,
                                 edgecolor = 'r',
                                 facecolor = 'none')
        a.add_patch(rect)
        
    plt.show()



# ***Thanks for your patience***
