In [1]:
import os
import numpy as np
import dicom
from scipy.misc import imresize
import random

In [2]:
directory = os.getcwd()
resize = True
img_shape = (128,128)

In [3]:
'''Loading train labels'''
def load_labels(filename):
    # load the train labels and create a disctionary with patient id as a key and Systole and Diastole vols as values
    labels_tr = np.loadtxt(os.path.join(directory, filename), delimiter=",", skiprows=1)
    tr_map = {}
    for l in labels_tr:
        tr_map[int(l[0])] = [float(l[1]), float(l[2])]
    
    return tr_map

In [4]:
def image_resize(img):
    '''Resize the image as per img_shape. Crop if required from the center'''
    if img.shape[0] < img.shape[1]:
        img = img.T
    short_side = min(img.shape[:2])
    '''Crop from center'''
    y = int(img.shape[0] - short_side/2)
    x = int(img.shape[1] - short_side/2)
    cropped_img = img[y:y+short_side, x:x+short_side]
    img = imresize(cropped_img, img_shape)
    
    return img

In [5]:
def load_images(folder):
    ''' Loads all the dicom images in the given folder'''
    path, patient_folders, _ = next(os.walk(os.path.join(directory,folder)))
    patient_folders = [int(p) for p in patient_folders ]
    #patient_folders = random.sample(patient_folders, 100)
    study_images = {}
    study_ids = set()
    images = []
    slice_images = []
    s_count = 0
    for subdir in patient_folders:
        subdir = str(subdir)
        img_path, subdirs,_ = next(os.walk(os.path.join(path, subdir)))
        if len(subdirs) == 1:
            img_path, slices,_ = next(os.walk(os.path.join(path, subdir, subdirs[0])))
        else:
            slices = subdirs

        print("loading images from folder %s of %s" %(subdir, folder))

        for s in slices:
            if "sax" in s:
                files = next(os.walk(os.path.join(img_path, s)))[2]
                img_count = len([file for file in files if file.endswith(".dcm")])
                for file in files:

                    if file.endswith(".dcm"):
                        file_path = os.path.join(img_path, s, file)
                        image = dicom.read_file(file_path)
                        try:
                            image = image.pixel_array.astype(float)
                            image /= np.max(image) #scaling
                            if resize:
                                image = image_resize(image)

                            images.append(image)
                        except ValueError as e:
                            print("error in making array from %s file of %s/%s"%(file,subdir,s))
                        
                        

                        if len(images) == img_count:
                            if img_count != 30:
                                x=0
                                try:
                                    while len(images) < 30:
                                        images.append(images[x])
                                        x += 1
                                    if len(images) > 30:
                                        images = images[0:30]
                                except IndexError:
                                    pass

                            slice_images.append(np.array(images))
                            images = []
                            

        print("%d sax foldes" %len(slice_images))
        study_images[subdir] = np.array(slice_images)
        study_ids.add(subdir)
        slice_images = []

    return list(study_ids), study_images
                    

In [None]:
'''Load train and validation images. Resize and check if every slice has got 30 images'''

def train_npy(folder):
    print("Writing npy file for train images...")
    ids, images = load_images(folder)
    #ids, images = load_images()
    f = folder + ".csv"
    labels = load_labels(f)
    X = []
    y = []
    for pid in ids:
        study_images = images[pid]
        output = labels[int(pid)]
        
        for i in range(study_images.shape[0]):
            X.append(study_images[i,:,:,:])
            y.append(output)
    #Create data and labels arrays to save as .npy file
    
    X = np.array(X, dtype=np.uint8)
    y = np.array(y)
    
    return X,y
     

In [None]:
X1,y1 = train_npy("train")
X2,y2 = train_npy("validate")

X = np.concatenate((X1,X2))
y = np.concatenate((y1,y2))

np.save("X_trval.npy", X)
np.save("y_trval.npy", y)
print("Done saving processed train and validation images and labels")


In [None]:
'''Load validation images. Resize and check if every slice has got 30 images'''

def validation_npy():
    print("Writing npy file for validation images...")
    ids, images = load_images("validate")
    #labels = load_labels()
    X = []
    study_ids = []
    #y = []
    for pid in ids:
        study_images = images[pid]

        for i in range(study_images.shape[0]):
            X.append(study_images[i,:,:,:])
            study_ids.append(pid)

    #Create data and labels arrays to save as .npy file

    X = np.array(X, dtype=np.uint8)

    np.save('X_validation.npy', X)
    np.save('ids_validate.npy', study_ids)

    print("Done saving processed validation images")

In [6]:
'''Load test images. Resize and check if every slice has got 30 images'''

def test_npy():
    print("Writing npy file for validation images...")
    ids, images = load_images("test")
    #labels = load_labels()
    X = []
    study_ids = []
    #y = []
    for pid in ids:
        study_images = images[pid]

        for i in range(study_images.shape[0]):
            X.append(study_images[i,:,:,:])
            study_ids.append(pid)

    #Create data and labels arrays to save as .npy file

    X = np.array(X, dtype=np.uint8)

    np.save('X_test.npy', X)
    np.save('ids_test.npy', study_ids)

    print("Done saving processed images of test folder")

In [None]:
#Creates target values for validation images.
ids=np.load("ids_validate.npy")
labels = np.loadtxt("validate.csv", delimiter=",", skiprows=1)
map_val = {}
y_val = []
for l in labels:
    map_val[int(l[0])] = [float(l[1]), float(l[2])]

for i in ids:
    out = map_val[int(i)]
    y_val.append(out)


In [None]:
#Combine train and validate images
a = np.load("X_train.npy")
b = np.load("X_validation.npy")
c= np.concatenate((a,b))

#Combine train and validate labels
d=np.load("y_train.npy")
y_val = np.array(y_val)
y_trval = np.concatenate((d,y_val))

#Save them as numpy array
np.save("X_trval.npy", c)
np.save("y_trval.npy", y_trval)

In [7]:
test_npy()

Writing npy file for validation images...
loading images from folder 1000 of test
9 sax foldes
loading images from folder 1001 of test
10 sax foldes
loading images from folder 1002 of test
10 sax foldes
loading images from folder 1003 of test
11 sax foldes
loading images from folder 1004 of test
11 sax foldes
loading images from folder 1005 of test
9 sax foldes
loading images from folder 1006 of test
10 sax foldes
loading images from folder 1007 of test
11 sax foldes
loading images from folder 1008 of test
9 sax foldes
loading images from folder 1009 of test
9 sax foldes
loading images from folder 1010 of test
11 sax foldes
loading images from folder 1011 of test
11 sax foldes
loading images from folder 1012 of test
12 sax foldes
loading images from folder 1013 of test
12 sax foldes
loading images from folder 1014 of test
9 sax foldes
loading images from folder 1015 of test
10 sax foldes
loading images from folder 1016 of test
10 sax foldes
loading images from folder 1017 of test
11 sa