In [None]:
# Import initial dependancies
import tensorflow as tf
import pydicom as dicom
import pandas as pd
import os
import PIL
import csv
import cv2

In [None]:
# Convert .dcm -> .jpeg
dcm_dir_path_train = "Data/SIIM/dicom-images-train"
dcm_dir_path_test = "Data/SIIM/dicom-images-test"

jpg_dir_path_train = "Data/SIIM_JPEG/jpeg-images-train"

def extract_images(dcm_path, jpg_path):
    if (not os.path.exists(jpg_path)):
        os.makedirs(jpg_path)
        
    dcm_dirs = os.listdir(dcm_path)
    
    # Check if the files were already converted to JPEG
    if (len(dcm_dirs)-1 == len(os.listdir(jpg_path))):
        print("Files have been converted already!")
        return
        
    print("Processing...")
    for dcm_dir in dcm_dirs:
        if (not dcm_dir.endswith('.DS_Store')):
            for inner_dcm_dir in os.listdir(os.path.join(dcm_path, dcm_dir)):              
                if (not inner_dcm_dir.endswith('.DS_Store')):
                    dcm_image_path = os.path.join(os.path.join(dcm_path, dcm_dir), inner_dcm_dir)
                    for dcm_file in os.listdir(dcm_image_path): 
                        image = dcm_file.replace('.dcm', '.jpeg')
                        # Check if the file already exists
                        jpeg_file_path = os.path.join(jpg_path, image)
                        if (not os.path.exists(jpeg_file_path)):
                            dcm_image = dicom.dcmread(os.path.join(dcm_image_path, dcm_file))
                            pixel_array_numpy = dcm_image.pixel_array
                            cv2.imwrite(jpeg_file_path, pixel_array_numpy)

            
    print("Finished!")
    print("DCM_DIRS LENGTH: " + str(len(dcm_dirs)))

In [None]:
import glob
import matplotlib.pyplot as plt

def extract_info(dcm_path, ds, print_general_info=True, attribs=[]):
    patient_name = ds.PatientName
    display_name = patient_name.family_name
    
    if (print_general_info):
        print("Filename............: " + dcm_path.split('/')[-1])
        print("Storage Type........: " + ds.SOPClassUID)
        print("Patient's Name......: " + display_name)
        print("Patient ID..........: " + ds.PatientID)
        print("Patient Age.........: " + ds.PatientAge)
        print("Patient's Sex.......: " + ds.PatientSex)
        print("Modality............: " + ds.Modality)
        print("Examined Body Part..: " + ds.BodyPartExamined)
        print("View Position.......: " + ds.ViewPosition)

        if 'PixelData' in ds:
            rows = int(ds.Rows)
            cols = int(ds.Columns)
            print("Image Size..........: {rows:d} x {cols:d}, {size:d} bytes".format(
                rows=rows,
                cols=cols,
                size=len(ds.PixelData)))

            if 'PixelSpacing' in ds:
                print("Pixel Spacing.......: " + str(ds.PixelSpacing))
                
                
                
    else:
        print(type(ds))
            
        
def plot_pixel_array(ds, figsize=(10, 10)):
    plt.figure(figsize=figsize)
    plt.imshow(ds.pixel_array, cmap=plt.cm.bone)
    plt.grid(False)
    plt.show()
    

In [None]:
extract_images(dcm_dir_path_train, jpg_dir_path_train)

In [None]:
# Test some params for one of the images
for dcm_path in glob.glob(dcm_dir_path_train + '/*/*/*.dcm'):
    ds = dicom.dcmread(dcm_path)
    extract_info(dcm_path, ds)
    plot_pixel_array(ds)
    break

In [None]:
from mask_functions import *
import time

# X-Ray With Masking

train_rle = 'Data/SIIM/train-rle.csv'
dcm_file_paths = dcm_dir_path_train + '/*/*/*.dcm'
dcm_file_path_dir = dcm_dir_path_train + '/*/*/'
num_images = 15

# Read train RLE data and store them in dictionary
df_rle = pd.read_csv(train_rle)

# fig, ax = plt.subplots(nrows=num_images // 5, ncols=5, sharey=True, figsize=(20, num_images // 5 * 4))
# axes = ax.ravel()


def get_labels_helper(i, labels, masks):
    image_id = df_rle.loc[i, 'ImageId']
    enc_pixel = df_rle.loc[i, ' EncodedPixels']
    if (os.path.exists(glob.glob(dcm_file_path_dir + image_id + '.dcm')[0])):
        if (enc_pixel.strip() != "-1"):
            image_rle_mask = rle2mask(enc_pixel, 1024, 1024)
            masks[image_id] = image_rle_mask
            labels.append(1)
        else:
            labels.append(0)
            
def get_image_paths(i, paths_dict):
    image_id = df_rle.loc[i, 'ImageId']   
    temp_path = glob.glob(dcm_file_path_dir + image_id + '.dcm')[0]


def get_labels():
    labels = []
    masks = {}
            
    for i in range(len(df_rle)):
        if (i % 100 == 0):
            print("First " + (str(i)))
                  
        image_id = df_rle.loc[i, 'ImageId']
        enc_pixel = df_rle.loc[i, ' EncodedPixels']
        if (os.path.exists(glob.glob(dcm_file_path_dir + image_id + '.dcm')[0])):
            if (enc_pixel.strip() != "-1"):
                image_rle_mask = rle2mask(enc_pixel, 1024, 1024)
                masks[image_id] = image_rle_mask
                labels.append(1)
            else:
                labels.append(0)

    return (labels, masks)

In [None]:
import multiprocessing as mp
from multiprocessing.pool import Pool, ThreadPool
from joblib import Parallel, delayed
import numpy as np
import pandas as pd

labels, masks = get_labels()

In [None]:
labels

In [None]:


# Gather feature info
dicom_features = ['PatientAge',
                 'PatientSex',
                 'ViewPosition',
                 'PixelSpacing',
                 'Rows',
                 'Columns']

def get_features(dcm_file, attribs):
    memo = []
    memo.append(dcm_file.split('/')[-1])
    ds = dicom.dcmread(dcm_file)
    for data in attribs:
        memo.append(ds.__getattr__(data))
    
    return np.array(memo).T

def fetch_data_info(dcm_file_paths, attributes):
    dicom_df = Parallel(n_jobs=mp.cpu_count(), verbose=1)(
        (delayed(get_features)(dcm_file, attributes) for dcm_file in glob.glob(dcm_file_paths)))        
    
    return dicom_df

dicom_df = fetch_data_info(dcm_file_paths, dicom_features)

In [None]:
dicom_df = pd.DataFrame(np.array(dicom_df), columns=['ImageId'] + dicom_features)
dicom_df.head()

In [None]:
image_mask = rle2mask(image, 1024, 1024)