# Convolutional Neural Networks

In [1]:
import re
import sys
import math
import time
import random
from itertools import chain
from os import getcwd, listdir
from os.path import join, dirname, join, splitext

import numpy as np
import pandas as pd
import tensorflow as tf

sys.path.append(join(dirname(getcwd()), "src"))
from utils import (read_data, DataSet, inference, loss, training_adam,
                   training_gradient_descent, evaluation, fill_feed_dict,
                   do_eval_cnn, read_text_files_and_labels)

In [2]:
base_data_path = join(dirname(getcwd()), "data")
training_data_path = join(base_data_path, "test_data_revised",
                          "PRAXIS_rapid_eval_MODEL_TRAINING_2015/*/*.txt")
test_data_path = join(base_data_path, "test_data_revised",
                      "PRAXIS_rapid_eval_TESTING_2015/*/*.txt")

In [3]:
train_labels_path = join(base_data_path, "test_data_revised",
                         "training_macro.csv")
test_labels_path = join(base_data_path, "test_data_revised",
                        "testing_macro.csv")
df = pd.read_csv(train_labels_path)
df = pd.concat([df, pd.read_csv(test_labels_path)])
df = df[["appointment_id", "H1"]]
df.rename(columns={"appointment_id": "id", "H1": "label"}, inplace=True)
ids_to_labels_dict = {}
if len(df.id) != len(set(df.id)):
    raise ValueError("Duplicate IDs!")
for id_ in df.id:
    ids_to_labels_dict[id_] = df[df.id == id_].iloc[0].label

In [4]:
(train_data, test_data, dev_data) = \
    read_text_files_and_labels(ids_to_labels_dict,
                               training_data_path,
                               test_data_path,
                               get_id_from_text_file_func=lambda x: int(x[:16]))

In [5]:
# show_data = False
show_data = True

In [7]:
if show_data:
    print("Shape of data:\n\tTraining: {}\n\tTest: {}"
          .format(train_data._features.shape,
                  test_data._features.shape))

Shape of data:
	Training: (4000, 1219)
	Test: (2750, 1219)


In [12]:
# Define some parameters
log_dir_path = join(getcwd(), "logs")
max_steps = 20000
optimizer_type = "adam"
#optimizer_type = "gradient descent"
learning_rate = 0.01
hidden1 = 512
hidden2 = 128
hidden3 = 16
NUM_FEATURES = 1219
batch_size = 10
NUM_CLASSES = 6
dropout = 0.5

#FILTER_SIZES = [2, 3, 4, 5]
#NUM_FILTERS = 5

In [13]:
def fully_connected_network(input_fc, vector_sizes, keep_prob, num_classes):

    fc_w1 = tf.Variable(tf.random_normal([vector_sizes[0], vector_sizes[1]]))
    fc_b1 = tf.Variable(tf.random_normal([vector_sizes[1]]))

    hidden1 = tf.add(tf.matmul(input_fc, fc_w1), fc_b1)
    hidden1 = tf.nn.relu(hidden1)
    hidden1 = tf.nn.dropout(hidden1, keep_prob)

    fc_w2 = tf.Variable(tf.random_normal([vector_sizes[1], vector_sizes[2]]))
    fc_b2 = tf.Variable(tf.random_normal([vector_sizes[2]]))

    hidden2 = tf.add(tf.matmul(hidden1, fc_w2), fc_b2)
    hidden2 = tf.nn.relu(hidden2)
    hidden2 = tf.nn.dropout(hidden2, keep_prob)

    weights = tf.Variable(tf.random_normal([vector_sizes[2], num_classes]))
    biases = tf.Variable(tf.random_normal([num_classes]))
    logits = tf.matmul(hidden2, weights) + biases
    
    return logits

In [14]:
def conv_layer(input, filter_size, num_filter, max_pool_filter_size, max_pool_stride_size):
    
    weight = tf.Variable(tf.random_normal([filter_size, 1, 1, num_filter]))
    bias = tf.Variable(tf.random_normal([num_filter]))
    
    conv = tf.nn.conv2d(input, weight, strides=[1, 1, 1, 1], padding='SAME')
    conv = tf.nn.bias_add(conv, bias)
    conv = tf.nn.relu(conv, name="relu")
    
    conv = tf.nn.max_pool(conv, ksize=[1, 1, max_pool_filter_size, 1], strides=[1, 1, max_pool_stride_size, 1],
                          padding='VALID')
    return conv

In [15]:
# Generate placeholders for the input feature data and labels.
inputs_placeholder = tf.placeholder(tf.float32, shape=(batch_size,
                                                       NUM_FEATURES))
labels_placeholder = tf.placeholder(tf.int32, shape=(batch_size))

keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)

x = tf.reshape(inputs_placeholder, shape=[-1, 1, NUM_FEATURES, 1])

conv1 = conv_layer(x, 2, NUM_FEATURES, 2, 2)
conv2 = conv_layer(x, 3, NUM_FEATURES, 2, 2)
conv3 = conv_layer(x, 4, NUM_FEATURES, 2, 2)

conv_concat = tf.concat(2, [conv1, conv2, conv3])

In [16]:
sess = tf.InteractiveSession()

init_op = tf.global_variables_initializer()
sess.run(init_op)

In [17]:
id, train_batch, label_batch = train_data.next_batch(batch_size)

In [18]:
outs = conv_concat.eval(feed_dict={inputs_placeholder: train_batch})

In [19]:
outs.shape

(10, 1, 1827, 1219)

In [20]:
conv_concat_out = conv_concat.eval(feed_dict={inputs_placeholder: train_batch})

In [21]:
conv_concat_out.shape

(10, 1, 1827, 1219)

In [28]:
# Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default():

    # Generate placeholders for the input feature data and labels.
    inputs_placeholder = tf.placeholder(tf.float32, shape=(batch_size,
                                                           NUM_FEATURES))
    labels_placeholder = tf.placeholder(tf.int32, shape=(batch_size))
    
    keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
    
    x = tf.reshape(inputs_placeholder, shape=[-1, 1, NUM_FEATURES, 1])

    conv1 = conv_layer(x, 2, NUM_FEATURES, 2, 2)
    conv2 = conv_layer(x, 3, NUM_FEATURES, 2, 2)
    conv3 = conv_layer(x, 4, NUM_FEATURES, 2, 2)    
    
    conv = tf.concat(2, [conv1, conv2, conv3])
    conv.get_shape()
    
    input_fc = tf.reshape(conv, [-1, 22271130])
    
    logits = fully_connected_network(input_fc, [22271130, 6, 6], keep_prob, NUM_CLASSES)
    
    # Add to the Graph the Ops for loss calculation.
    loss_ = loss(logits, labels_placeholder)
    
    # Add to the Graph the Ops that calculate and apply gradients.
    if optimizer_type == "adam":
        train_op = training_adam(loss_, learning_rate)
    elif optimizer_type == "gradient descent":
        train_op = training_gradient_descent(loss_, learning_rate)
    else:
        raise ValueError("Choose either \"adam\" or \"gradient descent\" for "
                         "`optimizer_type`.")

    # Add the Op to compare the logits to the labels during evaluation.
    eval_correct = evaluation(logits, labels_placeholder)

    # Build the summary Tensor based on the TF collection of Summaries.
    summary = tf.summary.merge_all()

    # Add the variable initializer Op.
    init = tf.global_variables_initializer()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Instantiate a SummaryWriter to output summaries and the Graph.
    summary_writer = tf.summary.FileWriter(log_dir_path, sess.graph)

    # And then after everything is built:

    # Run the Op to initialize the variables.
    sess.run(init)

    # Start the training loop.
    for step in range(max_steps):
        start_time = time.time()

        # Fill a feed dictionary with the actual set of images and labels
        # for this particular training step.
        feed_dict = fill_feed_dict(train_data,
                                   inputs_placeholder,
                                   labels_placeholder,
                                   batch_size)

        feed_dict[keep_prob] = dropout
        # Run one step of the model.  The return values are the activations
        # from the `train_op` (which is discarded) and the `loss` Op.  To
        # inspect the values of your Ops or variables, you may include them
        # in the list passed to sess.run() and the value tensors will be
        # returned in the tuple from the call.
        _, loss_value = sess.run([train_op, loss_],
                                 feed_dict=feed_dict)

        duration = time.time() - start_time

        # Write the summaries and print an overview fairly often.
        if step % 100 == 0:

            # Print status to stdout.
            print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
            # Update the events file.
            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer.add_summary(summary_str, step)
            summary_writer.flush()
        
        # Save a checkpoint and evaluate the model periodically.
        if (step + 1) % 1000 == 0 or (step + 1) == max_steps:
            checkpoint_file = join(log_dir_path, 'model.ckpt')
            saver.save(sess, checkpoint_file, global_step=step)

            # Evaluate against the training set.
            print('Train Data Eval:')
            do_eval_cnn(sess,
                        eval_correct,
                        inputs_placeholder,
                        labels_placeholder,
                        train_data,
                        logits,
                        batch_size,
                        keep_prob, dropout)

            # Evaluate against the development set.
            if dev_labels is not None:
                print('Development Data Eval:')
                do_eval_cnn(sess,
                            eval_correct,
                            inputs_placeholder,
                            labels_placeholder,
                            dev_data,
                            logits,
                            batch_size,
                            keep_prob, dropout)

            # Evaluate against the test set.
            print('Test Data Eval:')
            do_eval_cnn(sess,
                        eval_correct,
                        inputs_placeholder,
                        labels_placeholder,
                        test_data,
                        logits,
                        batch_size,
                        keep_prob, dropout)

ValueError: Dimensions must be equal, but are 1 and 10 for 'xentropy/xentropy' (op: 'SparseSoftmaxCrossEntropyWithLogits') with input shapes: [1,6], [10].