In [1]:
%pylab inline
import os
import pydicom as dicom
import pandas as pd
import cv2

Populating the interactive namespace from numpy and matplotlib


In [2]:
def get_indices(idx_file_path):
    txt = ''
    with open(idx_file_path, "r") as f:
        txt = f.read()

    indices = np.array([[int(i) for i in t.split()]\
                        for t in txt.split('\n') if len(t)>0])
    indices = np.unique(indices[:,2])
    return indices

def separate_slices(dcm_path, indices):
    dcms = [dicom.dcmread(os.path.join(dcm_path,d)) for d in os.listdir(dcm_path)]
    dcms.sort(key = lambda x: int(x.InstanceNumber))
    image_stack = np.stack([d.pixel_array for d in dcms])
    positive = image_stack[indices]
    negative = [slice_img for i,slice_img in enumerate(image_stack) if i not in indices]
    
    return (positive, negative)


def save_images(images, patient_name, save_path):
    for idx, img in enumerate(images):
        fname = f'{patient_name}_{idx}.png'
        print(os.path.join(save_path,fname))
        print(img.shape)
        plt.imsave(os.path.join(save_path,fname), img)

        

def gen_data(idx_files,save_path):
    for f in idx_files:
        try:
            #load the indices file from Annotations.
            indices = get_indices(f)
            #Generate the filename and patient name (for labeling and lookup)
            fname = f.split('/')[-1]
            name = '_'.join(fname.split('_')[:3])
            print(name)
            #Get load path of dcm images.
            dcm_path = name_map[name]

            #Split grab positive images and negative images from the dcms
            positive, negative = separate_slices(dcm_path, indices)
            
            #Save the images in train or valid depending on argument passed above.
            print('saving positive')
            print(len(positive))
            save_images(positive, name, os.path.join(save_path,'1/'))
            print('saving negavite')
            save_images(negative, name, os.path.join(save_path,'0/'))            

        except:
            print('error')
            pass

In [3]:
series = pd.read_csv('./data/ct_lymph_nodes_series_data.csv')
uids = set(series.SeriesInstanceUID)

root = './data/CT Lymph Nodes/'
annot_root = './data/MED_ABD_LYMPH_ANNOTATIONS/MED_ABD_LYMPH_ANNOTATIONS'

paths = []
annot_names = []
name_map = {}
reverse_map = {}
for d in os.listdir(root):
    if d in uids:
        path = os.path.join(root,d)
        paths.append(path)
        load_path = os.path.join(path,os.listdir(path)[0])
        try:
            annot_name = str(dicom.dcmread(load_path).PatientName)
            annot_names.append(annot_name)
            name_map[annot_name] = path
            reverse_map[path] = annot_name
        except:
            #bad file
            pass

In [4]:
idx_files = []
for path, dirs, files in os.walk(annot_root):
    for f in files:
        if '_indices.txt' in f:
            idx_files.append(os.path.join(path,f))


split_idx = int(len(idx_files)*.85)
train = idx_files[:split_idx]
train_path = 'data/ct_data/train'
valid = idx_files[split_idx:]
valid_path = 'data/ct_data/valid'

In [6]:
valid

['./data/MED_ABD_LYMPH_ANNOTATIONS/MED_ABD_LYMPH_ANNOTATIONS/MED_LYMPH_090/MED_LYMPH_090_lymphnodes_indices.txt',
 './data/MED_ABD_LYMPH_ANNOTATIONS/MED_ABD_LYMPH_ANNOTATIONS/MED_LYMPH_039/MED_LYMPH_039_lymphnodes_indices.txt',
 './data/MED_ABD_LYMPH_ANNOTATIONS/MED_ABD_LYMPH_ANNOTATIONS/ABD_LYMPH_005/ABD_LYMPH_005_lymphnodes_indices.txt',
 './data/MED_ABD_LYMPH_ANNOTATIONS/MED_ABD_LYMPH_ANNOTATIONS/MED_LYMPH_028/MED_LYMPH_028_lymphnodes_indices.txt',
 './data/MED_ABD_LYMPH_ANNOTATIONS/MED_ABD_LYMPH_ANNOTATIONS/MED_LYMPH_023/MED_LYMPH_023_lymphnodes_indices.txt',
 './data/MED_ABD_LYMPH_ANNOTATIONS/MED_ABD_LYMPH_ANNOTATIONS/ABD_LYMPH_002/ABD_LYMPH_002_lymphnodes_indices.txt',
 './data/MED_ABD_LYMPH_ANNOTATIONS/MED_ABD_LYMPH_ANNOTATIONS/ABD_LYMPH_027/ABD_LYMPH_027_lymphnodes_indices.txt',
 './data/MED_ABD_LYMPH_ANNOTATIONS/MED_ABD_LYMPH_ANNOTATIONS/MED_LYMPH_019/MED_LYMPH_019_lymphnodes_indices.txt',
 './data/MED_ABD_LYMPH_ANNOTATIONS/MED_ABD_LYMPH_ANNOTATIONS/MED_LYMPH_015/MED_LYMPH_015

In [7]:
gen_items = [(train, train_path), (valid, valid_path)]

In [None]:
for files, path in gen_items[1:]:
    gen_data(files,path)

MED_LYMPH_090
saving positive
1
data/ct_data/valid/1/MED_LYMPH_090_0.png
(512, 512)
saving negavite
data/ct_data/valid/0/MED_LYMPH_090_0.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_1.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_2.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_3.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_4.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_5.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_6.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_7.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_8.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_9.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_10.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_11.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_12.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_13.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_14.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_15.png
(512, 512)
data/ct_data/valid/0/MED_LYMPH_090_16.png
(512, 512)
data/ct_d