In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import time
import datetime
import data_helpers
from text_cnn import TextCNN
from tensorflow.contrib import learn

In [2]:
train_data = pd.read_csv('data/train_data.csv')
test_data = pd.read_csv('data/test_data.csv')

In [3]:
# Build vocabulary
max_document_length = max([len(x.split(" ")) for x in train_data.text])
vocab_processor = learn.preprocessing.VocabularyProcessor(800)
x_train = np.array(list(vocab_processor.fit_transform(train_data.text)))

In [4]:
x_test = np.array(list(vocab_processor.transform(test_data.text)))

In [5]:
y_train, y_test = train_data.target, test_data.target
y_train = np.array(y_train).reshape(len(y_train), 1)
y_test = np.array(y_test).reshape(len(y_test), 1)

In [6]:
#Encode the label as one-hot code
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder()
y_train = ohe.fit_transform(y_train)
y_test = ohe.transform(y_test)

In [7]:
y_test = np.array([item.toarray().reshape(-1) for item in y_test])

In [8]:
graph = tf.Graph()
with graph.as_default():
    cnn = TextCNN(
            sequence_length=x_train.shape[1],
            num_classes=y_train.shape[1],
            vocab_size=len(vocab_processor.vocabulary_),
            embedding_size=64,
            filter_sizes= [3, 4, 5],
            num_filters=32,
            l2_reg_lambda=0.01)

In [9]:
with graph.as_default():
    # Define Training procedure
    global_step = tf.Variable(0, name="global_step", trainable=False)
    optimizer = tf.train.AdamOptimizer(1e-4)
    grads_and_vars = optimizer.compute_gradients(cnn.loss)
    train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
    
    # Keep track of gradient values and sparsity (optional)
    grad_summaries = []
    for g, v in grads_and_vars:
        if g is not None:
            grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
            sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
            grad_summaries.append(grad_hist_summary)
            grad_summaries.append(sparsity_summary)
    grad_summaries_merged = tf.summary.merge(grad_summaries)
    # Output directory for models and summaries
    timestamp = str(int(time.time()))
    out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
    print("Writing to {}\n".format(out_dir))
    # Summaries for loss and accuracy
    loss_summary = tf.summary.scalar("loss", cnn.loss)
    acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)
    
    # Train Summaries
    train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
    train_summary_dir = os.path.join(out_dir, "summaries", "train")
    train_summary_writer = tf.summary.FileWriter(train_summary_dir, graph)

    # Dev summaries
    dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
    dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
    dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, graph)

    # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
    checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)

INFO:tensorflow:Summary name embedding/W:0/grad/hist is illegal; using embedding/W_0/grad/hist instead.
INFO:tensorflow:Summary name embedding/W:0/grad/sparsity is illegal; using embedding/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-3/W:0/grad/hist is illegal; using conv-maxpool-3/W_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-3/W:0/grad/sparsity is illegal; using conv-maxpool-3/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-3/b:0/grad/hist is illegal; using conv-maxpool-3/b_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-3/b:0/grad/sparsity is illegal; using conv-maxpool-3/b_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-4/W:0/grad/hist is illegal; using conv-maxpool-4/W_0/grad/hist instead.
INFO:tensorflow:Summary name conv-maxpool-4/W:0/grad/sparsity is illegal; using conv-maxpool-4/W_0/grad/sparsity instead.
INFO:tensorflow:Summary name conv-maxpool-4/b:0/grad/hist is illegal; using 

In [10]:
with tf.Session(graph=graph) as sess:
    init = tf.global_variables_initializer()
    sess.run(tf.global_variables_initializer())
    def train_step(x_batch, y_batch):
            """
            A single training step
            """
            feed_dict = {
              cnn.input_x: x_batch,
              cnn.input_y: y_batch,
              cnn.dropout_keep_prob: 0.5
            }
            _, step, summaries, loss, accuracy = sess.run(
                [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            train_summary_writer.add_summary(summaries, step)

    def dev_step(x_batch, y_batch, writer=None):
        """
        Evaluates model on a dev set
        """
        print('Evaluation....')
        loops = int(len(x_batch)/32)
        remains = len(x_batch) - 32*loops
        count = 0
        for i in range(loops):
            start = i * 32
            end = (i+1) * 32
            x = x_batch[start: end]
            y = y_batch[start: end]
            feed_dict = {
                cnn.input_x: x,
                cnn.input_y: y,
                cnn.dropout_keep_prob: 1.0
            }
            step, summaries, loss, correct_num = sess.run([global_step, dev_summary_op, cnn.loss, cnn.correct_num],
            feed_dict)
            count += correct_num
        for i in range(remains):
            start = 32 * loops + i
            end = 32 * loops + i + 1
            x = x_batch[start: end]
            y = y_batch[start: end]
            feed_dict = {
                cnn.input_x: x,
                cnn.input_y: y,
                cnn.dropout_keep_prob: 1.0
            }
            step, summaries, loss, correct_num = sess.run([global_step, dev_summary_op, cnn.loss, cnn.correct_num],
            feed_dict)
            count += correct_num
        time_str = datetime.datetime.now().isoformat()
        print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, float(correct_num)/len(y_batch)))
        if writer:
            writer.add_summary(summaries, step)

    # Generate batches
    batches = data_helpers.batch_iter(
            list(zip(x_train, y_train)), 32, 1)
    # Training loop. For each batch...
    for batch in batches:
        x_batch, y_batch = zip(*batch)
        x_batch = np.array(x_batch)
        y_batch = np.array([item.toarray().reshape(-1) for item in y_batch])
        
        train_step(x_batch, y_batch)
        current_step = tf.train.global_step(sess, global_step)
        if current_step % 200 == 0:
            path = saver.save(sess, checkpoint_prefix, global_step=current_step)
            print("Saved model checkpoint to {}\n".format(path))
            
    #Testing 
    dev_step(x_test, y_test, writer=dev_summary_writer)

2017-10-29T22:30:16.726902: step 1, loss 9.68835, acc 0.03125
2017-10-29T22:30:16.978845: step 2, loss 9.62094, acc 0
2017-10-29T22:30:17.212626: step 3, loss 8.44433, acc 0.03125
2017-10-29T22:30:17.444415: step 4, loss 7.79644, acc 0.03125
2017-10-29T22:30:17.682706: step 5, loss 8.5534, acc 0
2017-10-29T22:30:17.914489: step 6, loss 8.03319, acc 0.0625
2017-10-29T22:30:18.146274: step 7, loss 7.54005, acc 0.0625
2017-10-29T22:30:18.384579: step 8, loss 8.47827, acc 0
2017-10-29T22:30:18.616358: step 9, loss 8.35973, acc 0.03125
2017-10-29T22:30:18.863631: step 10, loss 8.6254, acc 0.09375
2017-10-29T22:30:19.101915: step 11, loss 8.06973, acc 0.03125
2017-10-29T22:30:19.333705: step 12, loss 9.49952, acc 0
2017-10-29T22:30:19.565485: step 13, loss 8.04633, acc 0.03125
2017-10-29T22:30:19.803545: step 14, loss 7.31617, acc 0.03125
2017-10-29T22:30:20.035325: step 15, loss 8.90434, acc 0.09375
2017-10-29T22:30:20.286243: step 16, loss 8.21378, acc 0.09375
2017-10-29T22:30:20.505390: s