In [1]:
import numpy as np
import pandas as pd
import os
import pydicom as pdcm
import matplotlib.pylab as plt
from pydicom.pixel_data_handlers.util import apply_voi_lut
import cv2
import pickle
from tqdm import tqdm

In [2]:
train_dir = r'train'
train_images = os.listdir(train_dir)

train_labels = pd.read_csv(r'train.csv', index_col=0)

In [3]:
def CLAHE(image):
    clahe = cv2.createCLAHE(
        clipLimit = 2., 
        tileGridSize = (10, 10)
    )
    
    image = clahe.apply(image) 
    image = np.expand_dims(image, axis = 2)
    
    return image

In [4]:
rows = []
count = 0

cnn_data_path = r'cnnData'
os.makedirs(cnn_data_path)


for patient in tqdm(train_images):
    # get the name of the file
    name = patient[:len(patient)-6]
    # create the path
    path = os.path.join(train_dir, patient)
    dicom_file = pdcm.dcmread(path)
    # process the root image
    img = apply_voi_lut(dicom_file.pixel_array, dicom_file)
    if dicom_file.PhotometricInterpretation == "MONOCHROME1":
        img = np.amax(img) - img
    # rescale pixel value between 0 to 255
    img = (((img - np.min(img))/np.max(img))*255.0).astype(np.uint8)
    img = CLAHE(img)
    
    # get fields
    class_names = train_labels._get_value(name, 'class_name')
    class_ids = train_labels._get_value(name, 'class_id')
    x_mins = train_labels._get_value(name, 'x_min')
    y_mins = train_labels._get_value(name, 'y_min')
    x_maxs = train_labels._get_value(name, 'x_max')
    y_maxs = train_labels._get_value(name, 'y_max')
    for i in range(len(class_names)):
        class_id = class_ids[i]
        if class_id != 14:
            pickle_name = 'cnn' + str(count) + '.data'
            class_name = class_names[i]
            x_min = int(x_mins[i])
            y_min = int(y_mins[i])
            x_max = int(x_maxs[i])
            y_max = int(y_maxs[i])
            newImg = img[y_min:y_max, x_min:x_max]
            newImg = cv2.resize(newImg, dsize=(64,64), interpolation=cv2.INTER_CUBIC)


            imgFile = open(os.path.join(cnn_data_path, pickle_name), 'wb')
            pickle.dump(newImg, imgFile)
            imgFile.close()

            rows.append([pickle_name, class_name, class_id])

            count += 1
columns = ['image_id', 'class_name', 'class_id']
cnn_data = pd.DataFrame(rows, columns=columns)
cnn_data.to_csv('cnn_data.csv', index=False)

100%|██████████████████████████████████████████████████████████████████████████| 15000/15000 [1:19:41<00:00,  3.14it/s]
