In [1]:
%matplotlib inline
import cv2
import numpy as np
import pydicom as dicom
from glob import glob
import json
import os
import shutil
import sys
import random
from matplotlib import image
import matplotlib.pyplot as plt
import re

In [2]:
class Dataset(object):
    dataset_count = 0

    def __init__(self, directory, subdir):
        # deal with any intervening directories
        while True:
            subdirs = next(os.walk(directory))[1]
            if len(subdirs) == 1:
                directory = os.path.join(directory, subdirs[0])
            else:
                break

        slices = []
        for s in subdirs:
            m = re.match("sax_(\d+)", s)
            if m is not None:
                slices.append(int(m.group(1)))

        slices_map = {}
        first = True
        times = []
        for s in slices:
            files = next(os.walk(os.path.join(directory, "sax_%d" % s)))[2]
            offset = None

            for f in files:
                m = re.match("(\D{3})_(\d{2})_(\D{1})(\d{3})_(\D{1})(\d{1})\.dcm", f)
                if m is not None:
                    if first:
                        times.append(int(m.group(2)))
                    if offset is None:
                        offset = int(m.group(1))

            first = False
            slices_map[s] = offset

        self.directory = directory
        self.time = sorted(times)
        self.slices = sorted(slices)
        self.slices_map = slices_map
        self.name = subdir

    def _filename(self, s, t):
        return os.path.join(self.directory,"sax_%d" % s, "(\D{3})_(\d{2})_(\D{1})(\d{3})_(\D{1})(\d{1})\.dcm" % (self.slices_map[s], t))

    def _read_dicom_image(self, filename):
        d = dicom.read_file(filename)
        img = d.pixel_array
        return np.array(img)

    def _read_all_dicom_images(self):
        f1 = self._filename(self.slices[0], self.time[0])
        d1 = dicom.read_file(f1)
        (x, y) = d1.PixelSpacing
        (x, y) = (float(x), float(y))
        f2 = self._filename(self.slices[1], self.time[0])
        d2 = dicom.read_file(f2)

        # try a couple of things to measure distance between slices
        try:
            dist = np.abs(d2.SliceLocation - d1.SliceLocation)
        except AttributeError:
            try:
                dist = d1.SliceThickness
            except AttributeError:
                dist = 8  # better than nothing...

        self.images = np.array([[self._read_dicom_image(self._filename(d, i))
                                 for i in self.time]
                                for d in self.slices])
        self.dist = dist
        self.area_multiplier = x * y

    def load(self):
        self._read_all_dicom_images()
        


In [3]:
# number of bins to use in histogram for gaussian regression
NUM_BINS = 100
# number of standard deviatons past which we will consider a pixel an outlier
STD_MULTIPLIER = 2
# number of points of our interpolated dataset to consider when searching for
# a threshold value; the function by default is interpolated over 1000 points,
# so 250 will look at the half of the points that is centered around the known
# myocardium pixel
THRESHOLD_AREA = 250
# number of pixels on the line within which to search for a connected component
# in a thresholded image, increase this to look for components further away
COMPONENT_INDEX_TOLERANCE = 20
# number of angles to search when looking for the correct orientation
ANGLE_SLICES = 36

#ALL_DATA_DIR =  os.path.join('..', 'input', 'train', 'train')
#ALL_DATA_DIR =  os.path.join('home','amos', '3D_UNet', 'data', 'raw') 
ALL_DATA_DIR =  "/home/amos/3D_UNet/data/raw"

print(ALL_DATA_DIR)

X_DIM, Y_DIM = 64, 64
X_DIM, Y_DIM = 128, 128
T_DIM = 30

home/amos/3D_UNet/data/raw


In [4]:
base_path = os.path.join(ALL_DATA_DIR,'140')
tData = Dataset(base_path, '140')
tData.load()

# base_path = os.path.join(ALL_DATA_DIR)
# tData = Dataset(base_path)
# tData.load()




StopIteration: 