In [1]:
import re
import sys
import datetime
import scipy
import numpy as np
import pandas as pd
import cv2
import dicom
import os
import glob
import math
import time
from datetime import timedelta
import matplotlib
# Force matplotlib to not use any Xwindows backend, so that you can output graphs
matplotlib.use('Agg')
from sklearn import model_selection
from matplotlib import pyplot as plt
import tensorflow as tf
from tqdm import tqdm

# Fixes "SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame"
pd.options.mode.chained_assignment = None

In [2]:
def variable_summaries(var):
    # Attach a lot of summaries to a Tensor (for TensorBoard visualization).
    with tf.name_scope('summaries'):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))
        tf.summary.histogram('histogram', var)

def img_to_rgb(im):
        n, x, y, z = im.shape
        ret = np.empty((n, x, y, z, 1), dtype=np.float32)
        ret[:, :, :, :, 0] = im
        return ret
        
def get_ids():
    ids = set()
    for path in glob.glob(DATA_PATH + '[0-9\.]*_X.npy'):
        patient_id = re.match(r'([0-9\.]*)_X.npy', os.path.basename(path)).group(1)
        ids.add(patient_id)
    return ids

def get_data(patient_ids):
    num_chunks = 0
    
    for patient_id in patient_ids:
        x = np.load(DATA_PATH + patient_id + '_X.npy')
        num_chunks = num_chunks + x.shape[0]
       
    X = np.ndarray([num_chunks, 64, 64, 64, 1], dtype=np.float32)
    Y = np.ndarray([num_chunks, 7], dtype=np.float32)
    
    count = 0
    for patient_id in patient_ids:
        x = np.load(DATA_PATH + patient_id + '_X.npy').astype(np.float32, copy=False)
        y = np.load(DATA_PATH + patient_id + '_Y.npy').astype(np.float32, copy=False)
        
        X[count : count + x.shape[0], :, :, :, :] = img_to_rgb(x)
        Y[count : count + y.shape[0], :] = y
        
        count = count + x.shape[0]
    
    return X, Y

def new_weights(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.05))

def new_biases(length):
    return tf.Variable(tf.constant(0.05, shape=[length]))

def conv3d(inputs,             # The previous layer.
           filter_size,        # Width and height of each filter.
           num_filters,        # Number of filters.
           num_channels,       # 1
           strides,            # [1,1,1,1,1]
           name):
    filters = tf.Variable(tf.truncated_normal([filter_size, filter_size, filter_size, num_channels, num_filters],
                                              dtype=tf.float32, stddev=1e-1), name= name + '_weights')
    conv = tf.nn.conv3d(inputs, filters, strides, padding='SAME', name=name)
    biases = tf.Variable(tf.constant(0.0, shape=[num_filters], dtype=tf.float32), name= name + '_biases')
    out = tf.nn.bias_add(conv, biases)

    out = tf.nn.relu(out)
    return out, filters

def max_pool_3d(inputs,
                filter_size,  # [1, 2, 2, 2, 1]
                strides,      # [1, 2, 2, 2, 1]
                name):
    return tf.nn.max_pool3d(inputs,
                               ksize=filter_size,
                               strides=strides,
                               padding='SAME',
                               name= name)

def dropout_3d(inputs,
               keep_prob,
               name):
    return tf.nn.dropout(inputs, keep_prob, name=name)

def flatten_3d(layer):
    layer_shape = layer.get_shape()
    num_features = layer_shape[1:5].num_elements()   
    layer_flat = tf.reshape(layer, [-1, num_features])
    return layer_flat, num_features

def dense_3d(inputs,
             num_inputs,
             num_outputs,
             name):
    weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs], dtype=tf.float32, stddev=1e-1), name= name + '_weights')
    biases = tf.Variable(tf.constant(0.0, shape=[num_outputs], dtype=tf.float32), name= name + '_biases')
    layer = tf.matmul(inputs, weights) + biases
    layer = tf.nn.relu(layer)
    return layer

def get_batch(x, y, batch_size):
        num_images = len(x)
        idx = np.random.choice(num_images,
                               size=batch_size,
                               replace=False)
        x_batch = x[idx]
        y_batch = y[idx]

        return x_batch, y_batch

In [26]:
def train_3d_nn(train_x, validation_x, train_y, validation_y):
    # Graph construction
    graph = tf.Graph()
    with graph.as_default():
        x = tf.placeholder(tf.float32, shape=[None, 64, 64, 64, 1], name = 'x')
        y = tf.placeholder(tf.float32, shape=[None, FLAGS.num_classes], name = 'y')
        y_labels = tf.placeholder(tf.float32, shape=[None, FLAGS.num_classes], name ='y_labels')
        
        layer1_conv3d_out, layer1_conv3d_weights = conv3d(inputs = x, filter_size = 3, num_filters = 16,
                                                          num_channels = 1, strides = [1, 3, 3, 3, 1],
                                                          name ='layer1_conv3d')
        print(layer1_conv3d_out)
        layer1_maxpool3d_out = max_pool_3d(inputs = layer1_conv3d_out, filter_size = [1, 2, 2, 2, 1],
                                           strides = [1, 2, 2, 2, 1], name ='layer1_maxpool3d')
        

        print(layer1_maxpool3d_out)
        layer2_conv3d_out, layer2_conv3d_weights = conv3d(inputs = layer1_maxpool3d_out, filter_size = 3,
                                                          num_filters = 32, num_channels = 16, strides = [1, 3, 3, 3, 1],
                                                          name ='layer2_conv3d')
        
        print(layer2_conv3d_out)
        layer2_maxpool3d_out = max_pool_3d(inputs = layer2_conv3d_out, filter_size = [1, 2, 2, 2, 1],
                                           strides = [1, 2, 2, 2, 1], name ='layer2_maxpool3d')
        
        print(layer2_maxpool3d_out)
        layer3_conv3d_out, layer3_conv3d_weights = conv3d(inputs = layer2_maxpool3d_out, filter_size = 3,
                                                          num_filters = 64, num_channels = 32, strides = [1, 3, 3, 3, 1],
                                                          name = 'layer3_conv3d')
        print(layer3_conv3d_out)
        
        layer3_maxpool3d_out = max_pool_3d(inputs = layer3_conv3d_out, filter_size = [1, 2, 2, 2, 1],
                                           strides = [1, 2, 2, 2, 1], name = 'layer3_maxpool3d')
        print(layer3_maxpool3d_out)
        
        layer3_dropout3d_out = dropout_3d(layer3_maxpool3d_out, 0.25, 'layer3_dropout3d')
        print(layer3_dropout3d_out)
        
        layer3_flatten3d_out, layer3_flatten3d_features = flatten_3d(layer3_dropout3d_out)
        print(layer3_flatten3d_out)
        
        # shape=(?, 64)
        layer4_dense3d_out = dense_3d(inputs=layer3_flatten3d_out, num_inputs=int(layer3_flatten3d_out.shape[1]),
                                     num_outputs=512, name ='layer4_dense3d')
        print(layer4_dense3d_out)
        
        layer4_dropout3d_out = dropout_3d(layer4_dense3d_out, 0.5, 'layer4_dropout3d')
        print(layer4_dropout3d_out)
        
        layer5_dense3d_out = dense_3d(inputs=layer4_dropout3d_out, num_inputs=int(layer4_dropout3d_out.shape[1]),
                                     num_outputs=128, name ='layer5_dense3d')
        print(layer5_dense3d_out)
        
        layer5_dropout3d_out = dropout_3d(layer5_dense3d_out, 0.5, 'layer5_dropout3d')
        print(layer5_dropout3d_out)
        
        layer6_dense3d_out = dense_3d(inputs=layer5_dropout3d_out, num_inputs=int(layer5_dropout3d_out.shape[1]),
                                     num_outputs=7, name ='layer6_dense3d')
        print(layer6_dense3d_out)
        
        y = tf.nn.softmax(layer6_dense3d_out)
        
        print(y)
        
        log_loss = tf.losses.log_loss(y_labels, y, epsilon=10e-15)
        
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-4).minimize(log_loss)
    
    # Setting up config
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = FLAGS.allow_growth
    config.log_device_placement=FLAGS.log_device_placement
    config.allow_soft_placement=FLAGS.allow_soft_placement
    
    
    with tf.Session(graph=graph, config=config) as sess:
        sess.run(tf.global_variables_initializer())
        
        for i in tqdm(range(FLAGS.max_iterations)):
            x_batch, y_batch = get_batch(train_x, train_y, FLAGS.batch_size)
            _, loss_val = sess.run([optimizer, log_loss], feed_dict={x: x_batch, y_labels: y_batch})
            
            print('Batch {} log loss: {}'.format(i, loss_val))
            
        sess.close()

In [None]:
train_3d_nn(train_x, validation_x, train_y, validation_y)

Tensor("Relu:0", shape=(?, 22, 22, 22, 16), dtype=float32)
Tensor("layer1_maxpool3d:0", shape=(?, 11, 11, 11, 16), dtype=float32)
Tensor("Relu_1:0", shape=(?, 4, 4, 4, 32), dtype=float32)
Tensor("layer2_maxpool3d:0", shape=(?, 2, 2, 2, 32), dtype=float32)
Tensor("Relu_2:0", shape=(?, 1, 1, 1, 64), dtype=float32)
Tensor("layer3_maxpool3d:0", shape=(?, 1, 1, 1, 64), dtype=float32)
Tensor("layer3_dropout3d/mul:0", shape=(?, 1, 1, 1, 64), dtype=float32)
Tensor("Reshape:0", shape=(?, 64), dtype=float32)
Tensor("Relu_3:0", shape=(?, 512), dtype=float32)
Tensor("layer4_dropout3d/mul:0", shape=(?, 512), dtype=float32)
Tensor("Relu_4:0", shape=(?, 128), dtype=float32)
Tensor("layer5_dropout3d/mul:0", shape=(?, 128), dtype=float32)
Tensor("Relu_5:0", shape=(?, 7), dtype=float32)
Tensor("Softmax:0", shape=(?, 7), dtype=float32)


  0%|          | 0/100000 [00:00<?, ?it/s]

In [4]:
if __name__ == '__main__':
    start_time = time.time()
    DATA_PATH = '/kaggle_2/luna/luna16/data/pre_processed_chunks/'
    TENSORBOARD_SUMMARIES = '/kaggle/dev/data-science-bowl-2017-data/tensorboard_summaries'
    
    #globals initializing
    FLAGS = tf.app.flags.FLAGS

    ## Prediction problem specific
    tf.app.flags.DEFINE_integer('num_classes', 7,
                                """Number of classes to predict.""")
    tf.app.flags.DEFINE_integer('batch_size', 32,
                                """Number of items in a batch.""")
    tf.app.flags.DEFINE_integer('max_iterations', 100000,
                                """Number of batches to run.""")
    tf.app.flags.DEFINE_float('require_improvement', 0.20,
                                """Percent of max_iterations after which optimization will be halted if no improvement found""")
    tf.app.flags.DEFINE_float('iteration_analysis', 0.10,
                                """Percent of max_iterations after which analysis will be done""")

    ## Tensorflow specific
    tf.app.flags.DEFINE_integer('num_gpus', 2,
                                """How many GPUs to use.""")
    tf.app.flags.DEFINE_boolean('log_device_placement', False,
                                """Whether to log device placement.""")
    tf.app.flags.DEFINE_boolean('allow_soft_placement', True,
                                """Whether to allow soft placement of calculations by tf.""")
    tf.app.flags.DEFINE_boolean('allow_growth', True,
                                """Whether to allow GPU growth by tf.""")

    patient_ids = get_ids()
    X, Y = get_data(patient_ids)
    
    print(X.shape)
    print(Y.shape)
    ##################################
    # TODO: Normalize, zero-center X #
    ##################################
    print('Splitting into train, validation sets')
    train_x, validation_x, train_y, validation_y = model_selection.train_test_split(X, Y, random_state=42, stratify=Y,
                                                                    test_size=0.20)
    
    print('train_x: {}'.format(train_x.shape))
    print('validation_x: {}'.format(validation_x.shape))
    print('train_y: {}'.format(train_y.shape))
    print('validation_y: {}'.format(validation_y.shape))
    
    train_3d_nn()
    end_time = time.time()
    print("Total Time usage: " + str(timedelta(seconds=int(round(end_time - start_time)))))

(46378, 64, 64, 64, 1)
(46378, 7)
Splitting into train, validation sets
train_x: (37102, 64, 64, 64, 1)
validation_x: (9276, 64, 64, 64, 1)
train_y: (37102, 7)
validation_y: (9276, 7)


TypeError: train_3d_nn() missing 4 required positional arguments: 'train_x', 'validation_x', 'train_y', and 'validation_y'