In [1]:
import pydicom as dicom # Read the dicom files
import os
import pandas as pd

dir = '../DSB3/'
data_dir = os.path.join(dir, 'stage1')
patients = os.listdir(data_dir)
labels_df = pd.read_csv(os.path.join(dir, 'stage1_labels.csv'), index_col=0)

labels_df.head()

Unnamed: 0_level_0,cancer
id,Unnamed: 1_level_1
0015ceb851d7251b8f399e39779d1e7d,1
0030a160d58723ff36d73f41b170ec21,0
003f41c78e6acfa92430a057ac0b306e,0
006b96310a37b36cccb2ab48d10b49a3,1
008464bb8521d09a42985dd8add3d0d2,1


In [2]:
for patient in patients[:5]:
    label = labels_df._get_value(patient, 'cancer')
    path = os.path.join(data_dir, patient)
    slices = [dicom.read_file(os.path.join(path, s)) for s in os.listdir(path)]
    slices.sort(key=lambda x: int(x.ImagePositionPatient[2]))

    print(len(slices), slices[0].pixel_array.shape)
    # print(slices[0])

195 (512, 512)
265 (512, 512)
233 (512, 512)
173 (512, 512)
146 (512, 512)


In [3]:
len(patients)

1595

In [4]:
import matplotlib.pyplot as plt
import cv2
import numpy as np
import math
from tqdm import tqdm

IMG_PX_SIZE = 50
HM_SLICES = 20

def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

def mean(l):
    return sum(l) / len(l)

def process_data(patient, labels_df, img_px_size=50, hm_slices=20, visualize=False):
    label = labels_df._get_value(patient, 'cancer')
    path = os.path.join(data_dir, patient)
    slices = [dicom.read_file(os.path.join(path, s)) for s in os.listdir(path)]
    slices.sort(key=lambda x: int(x.ImagePositionPatient[2]))
    
    new_slices = []
    slices = [cv2.resize(np.array(each_slice.pixel_array), (img_px_size, img_px_size)) for each_slice in slices]
    chunk_sizes = math.ceil(len(slices) / hm_slices)

    for slice_chunk in chunks(slices, chunk_sizes):
        slice_chunk = list(map(mean, zip(*slice_chunk)))
        new_slices.append(slice_chunk)
    
    while len(new_slices) < hm_slices:
        new_slices.append(new_slices[-1])
    
    while len(new_slices) > hm_slices:
        new_val = list(map(mean, zip(*[new_slices[-2], new_slices[-1]])))
        del new_slices[-1]
        new_slices[-1] = new_val
    
    if len(new_slices) != 20: print(len(new_slices)) 
    
    if visualize:
        fig = plt.figure()

        for num, each_slice in enumerate(new_slices):
            y = fig.add_subplot(4, 5, num + 1)
            y.imshow(each_slice)
        plt.show()
    
    if label == 1: label = np.array([0, 1])
    elif label == 0: label = np.array([1, 0])

    return np.array(new_slices), label

# much_data = []

# for num, patient in enumerate(tqdm(patients[:20])):
#     try:
#         img_data, label = process_data(patient, labels_df, img_px_size=IMG_PX_SIZE, hm_slices=HM_SLICES)
#         much_data.append([img_data, label])
#     except KeyError as e:
#         print('This is unlabeled data.')
# try:
#     os.makedirs('ml71-data')
# except:
#     pass



# np.save(os.path.join('ml71-data', 'muchdata-{}-{}-{}.npy'.format(IMG_PX_SIZE, IMG_PX_SIZE, HM_SLICES)), np.array(much_data, dtype=object))

In [5]:
IMG_SIZE_PX = IMG_PX_SIZE
SLICE_COUNT = HM_SLICES

much_data = np.load(os.path.join('ml71-data', 'muchdata-{}-{}-{}.npy'.format(IMG_SIZE_PX, IMG_SIZE_PX, SLICE_COUNT)), allow_pickle=True)
train_data = much_data[:-100]
validation_data = much_data[-100:]

In [6]:
import tensorflow._api.v2.compat.v1 as v1
import tensorflow as tf
import numpy as np
v1.disable_eager_execution()

hm_epochs = 15
n_classes = 2

x = v1.placeholder('float')
y = v1.placeholder('float')

keep_rate = 0.8
keep_prob = v1.placeholder(tf.float32)

def conv3d(x, W):
    return tf.nn.conv3d(x, W, strides=[1, 1, 1, 1, 1], padding='SAME')

def maxpool3d(x):
    #                        size of window      movement of window
    return tf.nn.max_pool3d(x, ksize=[1, 2, 2, 2, 1], strides=[1, 2, 2, 2, 1], padding='SAME')

def convolutional_neural_network(x):
    weights = {'W_conv1': tf.Variable(v1.random_normal([3, 3, 3, 1, 32])),
               'W_conv2': tf.Variable(v1.random_normal([3, 3, 3, 32, 64])),
               'W_fc': tf.Variable(v1.random_normal([13*13*5*64, 1024])),
               'out': tf.Variable(v1.random_normal([1024, n_classes]))}

    biases = {'b_conv1': tf.Variable(v1.random_normal([32])),
              'b_conv2': tf.Variable(v1.random_normal([64])),
              'b_fc': tf.Variable(v1.random_normal([1024])),
              'out': tf.Variable(v1.random_normal([n_classes]))}
    
    x = tf.reshape(x, shape=[-1, IMG_SIZE_PX, IMG_SIZE_PX, SLICE_COUNT, 1])
    
    conv1 = tf.nn.relu(conv3d(x, weights['W_conv1']) + biases['b_conv1'])
    conv1 = maxpool3d(conv1)

    conv2 = tf.nn.relu(conv3d(conv1, weights['W_conv2']) + biases['b_conv2'])
    conv2 = maxpool3d(conv2)

    fc = tf.reshape(conv2, [-1, 13*13*5*64])
    fc = tf.nn.relu(tf.matmul(fc, weights['W_fc']) + biases['b_fc'])

    fc = tf.nn.dropout(fc, rate=keep_rate)

    output = tf.matmul(fc, weights['out']) + biases['out']

    return output


def train_neural_network(x):
    prediction = convolutional_neural_network(x)
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y))
    optimizer = v1.train.AdamOptimizer().minimize(cost)
    
    with v1.Session() as sess:
        sess.run(v1.global_variables_initializer())
        
        for epoch in range(hm_epochs):
            epoch_loss = 0
            
            for data in train_data:
                X = data[0]
                Y = data[1]

                _, c = sess.run([optimizer, cost], feed_dict={x: X, y: Y})
                epoch_loss += c
            
            print('Epoch', epoch + 1, 'completed out of', hm_epochs, 'loss:', epoch_loss)

        correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))

        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
        print('Accuracy:', accuracy.eval({x: [i[0] for i in validation_data], y: [i[1] for i in validation_data]}))

train_neural_network(x)

Epoch 1 completed out of 15 loss: 534245014298.0
Epoch 2 completed out of 15 loss: 39017792953.0
Epoch 3 completed out of 15 loss: 14129113383.0
Epoch 4 completed out of 15 loss: 6287104469.90625
