### Install Pydicom library to read Patient DCM data

In [0]:
!pip install pydicom



### Mount Google Drive

In [0]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


### Import necessary libraries

In [0]:
import pydicom
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
from glob import glob
from PIL import Image
import os

### Set the working directory

In [0]:
project_path =  '/content/drive/My Drive/great learning/capstone project/rsna-pneumonia-detection'
os.chdir(project_path)

### Set the path to DCM data directory

In [0]:
data_dir = os.path.join(project_path,"stage_2_train_images")


### Load the CSV with bbox annotations

In [0]:
bbox_dataframe = pd.read_csv(os.path.join(project_path,"stage_2_train_labels.csv")).reset_index()

### Create list of pneumonia and non-pneumonia patients

In [0]:
pneumonia_patients = bbox_dataframe[bbox_dataframe['Target'] == 1]['patientId'].drop_duplicates().to_list()
non_pneumonia_patients = bbox_dataframe[bbox_dataframe['Target'] == 0]['patientId'].drop_duplicates().to_list()

### Set the image size and path to the destination folders for X-Ray images and annotation masks

In [0]:
image_height = 1024
image_width = 1024
image_path = os.path.join(project_path,"data","images")
mask_path = os.path.join(project_path,"data","masks")

### Iterate through the Patient IDs and generate corresponding X-ray images from DCM Pixel Array and Annotation masks from CSV with bbox annotations

In [0]:
for ids in tqdm(pneumonia_patients):
  dcm_file = os.path.join(data_dir,"%s.dcm" %ids)
  dcm_data = pydicom.read_file(dcm_file)
  image = dcm_data.pixel_array
  image = image.astype(np.uint8)
  im = Image.fromarray(image)
  
  mask = np.zeros(shape=(image_height, image_width),
                     dtype=np.uint8)
  
  
  for index,row in bbox_dataframe[bbox_dataframe['patientId'] == ids].iterrows():
    x = int(row['x'])
    y = int(row['y'])
    w = int(row['width'])
    h = int(row['height'])
    mask[y:y+h,x:x+w] = 255
  ms = Image.fromarray(mask)
  im.save(os.path.join(image_path,ids+".png"))
  ms.save(os.path.join(mask_path,ids+".png"))
    

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))




In [0]:
for ids in tqdm(non_pneumonia_patients):
  dcm_file = os.path.join(data_dir,"%s.dcm" %ids)
  dcm_data = pydicom.read_file(dcm_file)
  image = dcm_data.pixel_array
  image = image.astype(np.uint8)
  im = Image.fromarray(image)
  
  mask = np.zeros(shape=(image_height, image_width),
                     dtype=np.uint8)
  
  
  
  ms = Image.fromarray(mask)
  im.save(os.path.join(image_path,ids+".png"))
  ms.save(os.path.join(mask_path,ids+".png"))
    