### 3D CNN with tensorflow

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import keras
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # for 3d plotting

import h5py

%matplotlib inline

import os
data_path = os.getcwd() + "\\3d-mnist"
# Input data files are available in the {path} directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
print(data_path)
from subprocess import check_output
# print(check_output(["dir", data_path]).decode("utf8"))

# Any results you write to the current directory are saved as output.

Using TensorFlow backend.


C:\Users\212606295\Desktop\3D-CNN\3d-mnist


#### Load data

In [2]:
# load the data
with h5py.File(data_path + '/full_dataset_vectors.h5', 'r') as hf:
    x_train_raw = hf["X_train"][:]
    y_train_raw = hf["y_train"][:]
    x_test_raw = hf["X_test"][:]
    y_test_raw = hf["y_test"][:]


# length check
assert(len(x_train_raw) == len(y_train_raw))
assert(len(x_test_raw) == len(y_test_raw))

In [3]:
# 1D vector to rgb values, provided by ../input/plot3d.py
def array_to_color(array, cmap="Oranges"):
    s_m = plt.cm.ScalarMappable(cmap=cmap)
    return s_m.to_rgba(array)[:,:-1]

# Transform data from 1d to 3d rgb
def rgb_data_transform(data):
    data_t = []
    for i in range(data.shape[0]):
        data_t.append(array_to_color(data[i]).reshape(16, 16, 16, 3))
    return np.asarray(data_t, dtype=np.float32)

In [4]:
n_classes = 10 # from 0 to 9, 10 labels totally

x_train = rgb_data_transform(x_train_raw)
x_test = rgb_data_transform(x_test_raw)

y_train = keras.utils.to_categorical(y_train_raw, n_classes)
y_test = keras.utils.to_categorical(y_test_raw, n_classes)

In [5]:
x_train.shape

(10000, 16, 16, 16, 3)

#### 3D Conv & Pooling layer 
Stride [batch, width, height, depth, channel]

PS: Tensorflow does not support multi-stride in channel yet, so the last param has to be 1 for now

In [9]:
def conv3d(x, W):
    with tf.device("/gpu:1"):
        return tf.nn.conv3d(x, W, strides=[1,1,1,1,1], padding='SAME')

def maxpool3d(x):
    with tf.device("/gpu:1"):
        return tf.nn.max_pool3d(x, ksize=[1,2,2,2,1], strides=[1,2,2,2,1], padding='SAME')

Input placeholder init

In [10]:
with tf.name_scope('inputs'):
    x_input = tf.placeholder(tf.float32, shape=[None, 16, 16, 16, 3])
    y_input = tf.placeholder(tf.float32, shape=[None, n_classes]) 

Construct CNN model with 3 conv layers and apply dropout in final layer.

In [20]:
def cnn_model(x_train_data, keep_rate=0.7, seed=None):
    weights = {'W_conv1':tf.Variable(tf.random_normal([3,3,3,3,32], seed=seed)), #  conv => 14*14*14
               'W_conv2':tf.Variable(tf.random_normal([3,3,3,32,64], seed=seed)), # conv => 12*12*12~ pool 6*6*6
               'W_conv3':tf.Variable(tf.random_normal([3,3,3,64,128], seed=seed)),# conv => 4*4*4
               'W_conv4':tf.Variable(tf.random_normal([3,3,3,128,256], seed=seed)),# conv => 2*2*2 ~ pool 1*1*1
               'W_fc1':tf.Variable(tf.random_normal([4*4*4*256,4096], seed=seed)),
               'W_fc2':tf.Variable(tf.random_normal([4096,1024], seed=seed)),
               'out':tf.Variable(tf.random_normal([1024, n_classes], seed=seed))}

#     biases = { 'b_conv1':tf.Variable(tf.random_normal([256], seed=seed)),
#                'b_conv2':tf.Variable(tf.random_normal([512], seed=seed)),
#                'b_conv3':tf.Variable(tf.random_normal([1024], seed=seed)),
#                'b_fc1':tf.Variable(tf.random_normal([4096], seed=seed)),
#                'b_fc2':tf.Variable(tf.random_normal([1024], seed=seed)),
#                'out':tf.Variable(tf.random_normal([n_classes], seed=seed))}
    
    biases = { 'b_conv1':tf.Variable(tf.constant(0.1, shape=[32])),
               'b_conv2':tf.Variable(tf.constant(0.1, shape=[64])),
               'b_conv3':tf.Variable(tf.constant(0.1, shape=[128])),
               'b_conv4':tf.Variable(tf.constant(0.1, shape=[256])),
               'b_fc1':tf.Variable(tf.constant(0.1, shape=[4096])),
               'b_fc2':tf.Variable(tf.constant(0.1, shape=[1024])),
               'out':tf.Variable(tf.constant(0.1, shape=[n_classes]))}
    
    with tf.name_scope("layer_a"):
        conv1 = tf.layers.conv3d(inputs=x_train_data, filters=16, kernel_size=[3,3,3], padding='same', activation=tf.nn.relu)
        conv2 = tf.layers.conv3d(inputs=conv1, filters=32, kernel_size=[3,3,3], padding='same', activation=tf.nn.relu)
        pool3 = tf.layers.max_pooling3d(inputs=conv2, pool_size=[2, 2, 2], strides=2)
#         conv1 = tf.nn.relu(conv3d(x_train_data, weights['W_conv1']) + biases['b_conv1'])
#         conv2 = tf.nn.relu(conv3d(conv1, weights['W_conv2']) + biases['b_conv2'])
#         conv2 = maxpool3d(conv2)

#     with tf.name_scope("layer_b"):
#         conv2 = tf.nn.relu(conv3d(conv1, weights['W_conv2']) + biases['b_conv2'])
#         conv2 = maxpool3d(conv2)
        
    with tf.name_scope("layer_c"):
        conv4 = tf.layers.conv3d(inputs=pool3, filters=64, kernel_size=[3,3,3], padding='same', activation=tf.nn.relu)
        conv5 = tf.layers.conv3d(inputs=conv4, filters=128, kernel_size=[3,3,3], padding='same', activation=tf.nn.relu)
        pool6 = tf.layers.max_pooling3d(inputs=conv5, pool_size=[2, 2, 2], strides=2)
#         conv3 = tf.nn.relu(conv3d(conv2, weights['W_conv3']) + biases['b_conv3'])
#         conv4 = tf.nn.relu(conv3d(conv3, weights['W_conv4']) + biases['b_conv4'])
#         conv4 = maxpool3d(conv4)
        
    with tf.name_scope("batch_norm"):
        cnn3d_bn = tf.layers.batch_normalization(
            inputs=pool6,training=True
        )
#         cnn3d_bn = tf.contrib.layers.batch_norm(
#             conv4,
#             data_format='NHWC',
#             center=True,
#             scale=True,
#             is_training=True)
        
    with tf.name_scope("fully_con"):
        flattening = tf.reshape(cnn3d_bn, [-1, 4*4*4*128])
        dense = tf.layers.dense(inputs=flattening, units=1024, activation=tf.nn.relu)
        dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=True)
#         fc = tf.nn.relu(tf.matmul(flattening, weights['W_fc1']) + biases['b_fc1'])
#         fc = tf.nn.dropout(fc, keep_rate)
        
#     with tf.name_scope("fully_con2"):
#         fc2 = tf.nn.relu(tf.matmul(fc, weights['W_fc2']) + biases['b_fc2'])
#         fc2 = tf.nn.dropout(fc2, keep_rate)
        
#     with tf.name_scope("fully_con2"):
#         fc2 = tf.nn.relu(tf.matmul(fc, weights['W_fc2']) + biases['b_fc2'])
#         fc2 = tf.nn.dropout(fc2, keep_rate)
    
    with tf.name_scope("y_conv"):
#         y_conv = tf.matmul(fc2, weights['out']) + biases['out']
        y_conv = tf.layers.dense(inputs=dropout, units=10)
    
    return y_conv

In [21]:
def train_neural_network(x_train_data, y_train_data, x_test_data, y_test_data, learning_rate=0.1, keep_rate=0.7, epochs=10, batch_size=128, using_gpu=False):

    if using_gpu:
        device_name = '/gpu:1'
    else:
        device_name = '/cpu:0'

    with tf.device(device_name):
        with tf.name_scope("cross_entropy"):
            prediction = cnn_model(x_input, keep_rate, seed=1)
            cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y_input))
                              
        with tf.name_scope("training"):
            optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

        correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y_input, 1))
        accuracy = tf.reduce_mean(tf.cast(correct, 'float'))

        iterations = int(len(x_train_data)/batch_size) + 1
    
    # GPU using BFC
    config = tf.ConfigProto()
    config.gpu_options.allocator_type = 'BFC'
    with tf.Session(config = config) as sess:
        sess.run(tf.global_variables_initializer())
        import datetime
        
        start_time = datetime.datetime.now()
        
        # run epochs
        for epoch in range(epochs):
            print('Epoch', epoch, 'started', end='')
            epoch_loss = 0
            # mini batch
            for itr in range(iterations):
                epoch_x = x_train_data[itr*batch_size: (itr+1)*batch_size]
                epoch_y = y_train_data[itr*batch_size: (itr+1)*batch_size]
                _optimizer, _cost = sess.run([optimizer, cost], feed_dict={x_input: epoch_x, y_input: epoch_y})
                epoch_loss += _cost
#                 print('.',_cost, end='')
                if itr>0 and itr%10 == 0: # separate for low gpu memory
                    pass
#                     print('\nAccuracy for:', epoch+1, '/',epochs, ':', (accuracy.eval({x_input:x_train_data[:2000], y_input:y_train_data[:2000]}) + accuracy.eval({x_input:x_train_data[2000:4000], y_input:y_train_data[2000:4000]}))/2, 'loss:', epoch_loss)
            
#             print('\nAccuracy for:', epoch+1, '/',epochs, ':',(accuracy.eval({x_input:x_train_data[:2000], y_input:y_train_data[:2000]}) + accuracy.eval({x_input:x_train_data[2000:4000], y_input:y_train_data[2000:4000]}))/2, 'loss:', epoch_loss)

            acc = 0
            itrs = len(x_test_data)
            for itr in range(itrs):
                acc += accuracy.eval({x_input:x_test_data[itr : (itr+1)], y_input:y_test_data[itr : (itr+1)]})

            print('Testing Set Accuracy:',acc/itrs)
        
        end_time = datetime.datetime.now()
        print('Time elapse: ', str(end_time - start_time))

In [None]:
train_neural_network(x_train[:], y_train[:], x_test[:], y_test[:], learning_rate=0.0001, batch_size=32,epochs=50, using_gpu=True)

Epoch 0 startedTesting Set Accuracy: 0.545
Epoch 1 startedTesting Set Accuracy: 0.595
Epoch 2 startedTesting Set Accuracy: 0.619
Epoch 3 startedTesting Set Accuracy: 0.656
Epoch 4 startedTesting Set Accuracy: 0.66
Epoch 5 startedTesting Set Accuracy: 0.6845
Epoch 6 startedTesting Set Accuracy: 0.6785
Epoch 7 startedTesting Set Accuracy: 0.7015
Epoch 8 started

In [None]:
with tf.Session() as sess:
    prediction = cnn_model(x_input, keep_rate=0.5)
    sess.run(tf.global_variables_initializer())
    correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y_input, 1))
    accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
    print('Accuracy:',accuracy.eval({x_input:x_train[:2], y_input:y_train[:2]}))

In [None]:
fig = plt.figure(figsize=(16,9))
for i in range(16):
    ax = fig.add_subplot(4,4,i+1)
    plt.imshow(x_train[1][i,:,:])

print(y_train[1])