In [15]:
import numpy as np
import pandas as pd
import dicom
import os
import matplotlib.pyplot as plt
import cv2
import math
dataDirectory = 'Lung_Cancer/stage1/stage1/'
lungPatients = os.listdir(dataDirectory)
labels = pd.read_csv('Lung_Cancer/stage1_labels/stage1_labels.csv', index_col=0)
size = 50
NoSlices = 20

def chunks(l, n):
    count=0
    for i in range(0, len(l), n):
        if(count < NoSlices):
            yield l[i:i + n]
            count=count + 1
def mean(l):
    return sum(l) / len(l)

def dataProcessing(patient,labels_df,size=50, noslices=20, visualize=False):
    
    label = labels_df.get_value(patient, 'cancer')
    path = dataDirectory + patient
    slices = [dicom.read_file(path + '/' + s) for s in os.listdir(path)]
    slices.sort(key = lambda x: int(x.ImagePositionPatient[2]))

    new_slices = []
    slices = [cv2.resize(np.array(each_slice.pixel_array),(size,size)) for each_slice in slices]
    
    chunk_sizes = math.floor(len(slices) / noslices)
    for slice_chunk in chunks(slices, chunk_sizes):
        slice_chunk = list(map(mean, zip(*slice_chunk)))
        new_slices.append(slice_chunk)

    if label == 1: label=np.array([0,1])
    elif label == 0: label=np.array([1,0])    
    return np.array(new_slices),label

imageData = []
for num,patient in enumerate(lungPatients):
    if num % 100 == 0:
        print('Saved -',num)
    try:
        img_data,label = dataProcessing(patient,labels,size=size, noslices=NoSlices)
        imageData.append([img_data,label])
    except KeyError as e:
        print('Data is unlabeled')

np.save('imageData-{}-{}-{}.npy'.format(size,size,NoSlices), imageData)

Saved - 0
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Saved - 100
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Saved - 200
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Saved - 300
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Saved - 400
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Saved - 500
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unlabeled
Data is unla

In [None]:
fig = plt.figure()
for num,each_slice in enumerate(new_slices):
    y = fig.add_subplot(4,5,num+1)
    y.imshow(each_slice, cmap='gray')
plt.show()

In [1]:
import tensorflow as tf
import numpy as np
imageData = np.load('imageData-50-50-20.npy')
trainingData = imageData[:-100]
validationData = imageData[-100:]
x = tf.placeholder('float')
y = tf.placeholder('float')
size = 50
keep_rate = 0.8
NoSlices = 20

def conv3d(x, W):
    return tf.nn.conv3d(x, W, strides=[1,1,1,1,1], padding='SAME')
def maxpool3d(x):
    return tf.nn.max_pool3d(x, ksize=[1,2,2,2,1], strides=[1,2,2,2,1], padding='SAME')
def cnn(x):

    x = tf.reshape(x, shape=[-1, size, size, NoSlices, 1])
    convolution1 = tf.nn.relu(conv3d(x, tf.Variable(tf.random_normal([3,3,3,1,32]))) + tf.Variable(tf.random_normal([32])))
    convolution1 = maxpool3d(convolution1)
    convolution2 = tf.nn.relu(conv3d(convolution1, tf.Variable(tf.random_normal([3,3,3,32,64]))) + tf.Variable(tf.random_normal([64])))
    convolution2 = maxpool3d(convolution2)
    convolution3 = tf.nn.relu(conv3d(convolution2, tf.Variable(tf.random_normal([3,3,3,64,128]))) + tf.Variable(tf.random_normal([128])))
    convolution3 = maxpool3d(convolution3)
    convolution4 = tf.nn.relu(conv3d(convolution3, tf.Variable(tf.random_normal([3,3,3,128,256]))) + tf.Variable(tf.random_normal([256])))
    convolution4 = maxpool3d(convolution4)
    convolution5 = tf.nn.relu(conv3d(convolution4, tf.Variable(tf.random_normal([3,3,3,256,512]))) + tf.Variable(tf.random_normal([512])))
    convolution5 = maxpool3d(convolution4)
    fc = tf.reshape(convolution5,[-1, 1024])
    fc = tf.nn.relu(tf.matmul(fc, tf.Variable(tf.random_normal([1024,1024])))+tf.Variable(tf.random_normal([1024])))
    fc = tf.nn.dropout(fc, keep_rate)
    output = tf.matmul(fc, tf.Variable(tf.random_normal([1024, 2])))+tf.Variable(tf.random_normal([2]))
    return output

def network(x):
    prediction = cnn(x)
    cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=y) )
    optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(cost)
    epochs = 3
    with tf.Session() as session:
        session.run(tf.global_variables_initializer())     
        for epoch in range(epochs):
            epoch_loss = 0
            for data in trainingData:
                try:
                    X = data[0]
                    Y = data[1]
                    _, c = session.run([optimizer, cost], feed_dict={x: X, y: Y})
                    epoch_loss += c
                except Exception as e:
                    pass

            correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
            accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
            print('Epoch', epoch+1, 'completed out of',epochs,'loss:',epoch_loss)
            print('Accuracy:',accuracy.eval({x:[i[0] for i in validationData], y:[i[1] for i in validationData]}))
        print('Final Accuracy:',accuracy.eval({x:[i[0] for i in validationData], y:[i[1] for i in validationData]}))
        
network(x)

Epoch 1 completed out of 3 loss: 4.91657477263e+13
Accuracy: 0.53
Epoch 2 completed out of 3 loss: 1.04683080274e+13
Accuracy: 0.66
Epoch 3 completed out of 3 loss: 4.037828848e+12
Accuracy: 0.63
Final Accuracy: 0.64
