# Convolutional Neural Networks

In [1]:
import sys
import math
import time
import random
from os import getcwd
from os.path import join, dirname

import tensorflow as tf
import numpy as np

sys.path.append(join(dirname(getcwd()), "src"))
from utils import (read_data, DataSet, inference, loss, training_adam,
                   training_gradient_descent, evaluation, fill_feed_dict,
                   do_eval_cnn)

In [2]:
data_path = join(dirname(getcwd()), "data", "test_data_revised")

In [3]:
# Read in data

# NOTE: Download test_data_revised.zip (in email since it can't be shared)
# and save it somewhere, preferably in the "tutorial_notebooks/data"
# directory. If it is somewhere else, just make sure to pass in the path when
# this function is used.

# Choose "micro" or "macro". This will change the types of features we're
# using. There are 220 "micro" features in total while thre are 9 macro
# features.
# dataset_type = "macro"
dataset_type = "micro"

(train_ids, train_features, train_labels,
 test_ids, test_features, test_labels,
 dev_ids, dev_features, dev_labels) = read_data(data_path,
                                                macro_or_micro=dataset_type,
                                                dev_set=False)
#random_sampler = False
random_sampler = True
train_data = DataSet(train_ids, train_features, train_labels, random_=random_sampler)
test_data = DataSet(test_ids, test_features, test_labels)
if dev_labels is not None:
    dev_data = DataSet(dev_ids, dev_features, dev_labels)

In [4]:
# show_data = False
show_data = True

In [5]:
if show_data:
    print("Shape of data:\n\tTraining: {}\n\t{}Test: {}"
          .format(train_features.shape,
                  "" if dev_features is None
                     else "Development: {}\n\t".format(dev_features.shape),
                  test_features.shape))
    print("Shape of labels data:\n\tTraining: {}\n\t{}Test: {}"
          .format(train_labels.shape,
                  "" if dev_labels is None
                     else "Development: {}\n\t".format(dev_labels.shape),
                  test_labels.shape))

Shape of data:
	Training: (4000, 220)
	Test: (2750, 220)
Shape of labels data:
	Training: (4000,)
	Test: (2750,)


In [6]:
# Define some parameters
log_dir_path = join(getcwd(), "logs")
max_steps = 20000
optimizer_type = "adam"
#optimizer_type = "gradient descent"
if dataset_type == "macro":
    learning_rate = 0.01
    hidden1 = 8
    hidden2 = 8
    hidden3 = None
    NUM_FEATURES = 9
    batch_size = 10
else:
    learning_rate = 0.01
    hidden1 = 512
    hidden2 = 128
    hidden3 = 16
    NUM_FEATURES = 220
    batch_size = 10
NUM_CLASSES = 6
dropout = 0.5

FILTER_SIZES = [2, 3, 4, 5]
NUM_FILTERS = 5

In [7]:
def fully_connected_network(input_fc, vector_sizes, keep_prob, num_classes):

    fc_w1 = tf.Variable(tf.random_normal([vector_sizes[0], vector_sizes[1]]))
    fc_b1 = tf.Variable(tf.random_normal([vector_sizes[1]]))

    hidden1 = tf.add(tf.matmul(input_fc, fc_w1), fc_b1)
    hidden1 = tf.nn.relu(hidden1)
    hidden1 = tf.nn.dropout(hidden1, keep_prob)

    fc_w2 = tf.Variable(tf.random_normal([vector_sizes[1], vector_sizes[2]]))
    fc_b2 = tf.Variable(tf.random_normal([vector_sizes[2]]))

    hidden2 = tf.add(tf.matmul(hidden1, fc_w2), fc_b2)
    hidden2 = tf.nn.relu(hidden2)
    hidden2 = tf.nn.dropout(hidden2, keep_prob)

    weights = tf.Variable(tf.random_normal([vector_sizes[2], num_classes]))
    biases = tf.Variable(tf.random_normal([num_classes]))
    logits = tf.matmul(hidden2, weights) + biases
    
    return logits

In [8]:
def conv_layer(input, filter_size, num_filter, max_pool_filter_size, max_pool_stride_size):
    
    weight = tf.Variable(tf.random_normal([filter_size, 1, 1, num_filter]))
    bias = tf.Variable(tf.random_normal([num_filter]))
    
    conv = tf.nn.conv2d(input, weight, strides=[1, 1, 1, 1], padding='SAME')
    conv = tf.nn.bias_add(conv, bias)
    conv = tf.nn.relu(conv, name="relu")
    
    conv = tf.nn.max_pool(conv, ksize=[1, 1, max_pool_filter_size, 1], strides=[1, 1, max_pool_stride_size, 1],
                          padding='VALID')
    return conv

In [9]:
# Generate placeholders for the input feature data and labels.
inputs_placeholder = tf.placeholder(tf.float32, shape=(batch_size,
                                                       NUM_FEATURES))
labels_placeholder = tf.placeholder(tf.int32, shape=(batch_size))

keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)

x = tf.reshape(inputs_placeholder, shape=[-1, 1, NUM_FEATURES, 1])

conv1 = conv_layer(x, 2, 9, 2, 2)
conv2 = conv_layer(x, 3, 9, 2, 2)
conv3 = conv_layer(x, 4, 9, 2, 2)

conv_concat = tf.concat(2, [conv1, conv2, conv3])

In [10]:
sess = tf.InteractiveSession()

init_op = tf.global_variables_initializer()
sess.run(init_op)

In [11]:
id, train_batch, label_batch = train_data.next_batch(batch_size)

In [12]:
outs = conv_concat.eval(feed_dict={inputs_placeholder: train_batch})

In [13]:
outs.shape

(10, 1, 330, 9)

In [14]:
conv_concat_out = conv_concat.eval(feed_dict={inputs_placeholder: train_batch})

In [15]:
conv_concat_out.shape

(10, 1, 330, 9)

In [16]:
# Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default():

    # Generate placeholders for the input feature data and labels.
    inputs_placeholder = tf.placeholder(tf.float32, shape=(batch_size,
                                                           NUM_FEATURES))
    labels_placeholder = tf.placeholder(tf.int32, shape=(batch_size))
    
    keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
    
    x = tf.reshape(inputs_placeholder, shape=[-1, 1, NUM_FEATURES, 1])

    conv1 = conv_layer(x, 2, 9, 2, 2)
    conv2 = conv_layer(x, 3, 9, 2, 2)
    conv3 = conv_layer(x, 4, 9, 2, 2)    
    
    conv = tf.concat(2, [conv1, conv2, conv3])
    
    input_fc = tf.reshape(conv, [-1, 2970])
    
    logits = fully_connected_network(input_fc, [2970, 14, 14], keep_prob, NUM_CLASSES)
    
    # Add to the Graph the Ops for loss calculation.
    loss_ = loss(logits, labels_placeholder)
    
    # Add to the Graph the Ops that calculate and apply gradients.
    if optimizer_type == "adam":
        train_op = training_adam(loss_, learning_rate)
    elif optimizer_type == "gradient descent":
        train_op = training_gradient_descent(loss_, learning_rate)
    else:
        raise ValueError("Choose either \"adam\" or \"gradient descent\" for "
                         "`optimizer_type`.")

    # Add the Op to compare the logits to the labels during evaluation.
    eval_correct = evaluation(logits, labels_placeholder)

    # Build the summary Tensor based on the TF collection of Summaries.
    summary = tf.summary.merge_all()

    # Add the variable initializer Op.
    init = tf.global_variables_initializer()

    # Create a saver for writing training checkpoints.
    saver = tf.train.Saver()

    # Create a session for running Ops on the Graph.
    sess = tf.Session()

    # Instantiate a SummaryWriter to output summaries and the Graph.
    summary_writer = tf.summary.FileWriter(log_dir_path, sess.graph)

    # And then after everything is built:

    # Run the Op to initialize the variables.
    sess.run(init)

    # Start the training loop.
    for step in range(max_steps):
        start_time = time.time()

        # Fill a feed dictionary with the actual set of images and labels
        # for this particular training step.
        feed_dict = fill_feed_dict(train_data,
                                   inputs_placeholder,
                                   labels_placeholder,
                                   batch_size)

        feed_dict[keep_prob] = dropout
        # Run one step of the model.  The return values are the activations
        # from the `train_op` (which is discarded) and the `loss` Op.  To
        # inspect the values of your Ops or variables, you may include them
        # in the list passed to sess.run() and the value tensors will be
        # returned in the tuple from the call.
        _, loss_value = sess.run([train_op, loss_],
                                 feed_dict=feed_dict)

        duration = time.time() - start_time

        # Write the summaries and print an overview fairly often.
        if step % 100 == 0:

            # Print status to stdout.
            print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration))
            # Update the events file.
            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer.add_summary(summary_str, step)
            summary_writer.flush()
        
        # Save a checkpoint and evaluate the model periodically.
        if (step + 1) % 1000 == 0 or (step + 1) == max_steps:
            checkpoint_file = join(log_dir_path, 'model.ckpt')
            saver.save(sess, checkpoint_file, global_step=step)

            # Evaluate against the training set.
            print('Train Data Eval:')
            do_eval_cnn(sess,
                        eval_correct,
                        inputs_placeholder,
                        labels_placeholder,
                        train_data,
                        logits,
                        batch_size,
                        keep_prob, dropout)

            # Evaluate against the development set.
            if dev_labels is not None:
                print('Development Data Eval:')
                do_eval_cnn(sess,
                            eval_correct,
                            inputs_placeholder,
                            labels_placeholder,
                            dev_data,
                            logits,
                            batch_size,
                            keep_prob, dropout)

            # Evaluate against the test set.
            print('Test Data Eval:')
            do_eval_cnn(sess,
                        eval_correct,
                        inputs_placeholder,
                        labels_placeholder,
                        test_data,
                        logits,
                        batch_size,
                        keep_prob, dropout)

Step 0: loss = 9960648.00 (0.168 sec)
Step 100: loss = 1.76 (0.004 sec)
Step 200: loss = 1.34 (0.003 sec)
Step 300: loss = 0.95 (0.004 sec)
Step 400: loss = 0.90 (0.002 sec)
Step 500: loss = 1.20 (0.004 sec)
Step 600: loss = 1.06 (0.004 sec)
Step 700: loss = 0.88 (0.004 sec)
Step 800: loss = 0.88 (0.004 sec)
Step 900: loss = 1.19 (0.004 sec)
Train Data Eval:
  Num examples: 4000  Num correct: 2075  Accuracy @ 1: 0.5188
Test Data Eval:
  Num examples: 2750  Num correct: 1393  Accuracy @ 1: 0.5065
Step 1000: loss = 1.14 (0.004 sec)
Step 1100: loss = 0.85 (0.005 sec)
Step 1200: loss = 0.85 (0.006 sec)
Step 1300: loss = 1.18 (0.003 sec)
Step 1400: loss = 1.04 (0.004 sec)
Step 1500: loss = 0.85 (0.009 sec)
Step 1600: loss = 0.86 (0.005 sec)
Step 1700: loss = 1.18 (0.004 sec)
Step 1800: loss = 1.19 (0.004 sec)
Step 1900: loss = 0.85 (0.004 sec)
Train Data Eval:
  Num examples: 4000  Num correct: 2087  Accuracy @ 1: 0.5218
Test Data Eval:
  Num examples: 2750  Num correct: 1395  Accuracy @ 1: