In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import time
import datetime
import data_helpers
from tensorflow.contrib import learn

# Load the dataset

In [2]:
train_data = pd.read_csv('data/train_data.csv')
test_data = pd.read_csv('data/test_data.csv')

In [3]:
# Build vocabulary
max_document_length = max([len(x.split(" ")) for x in train_data.text])
max_document_length = max_document_length if max_document_length < 800 else 800
#Cut long articles to 800 words. Pad short ones
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
x_train = np.array(list(vocab_processor.fit_transform(train_data.text)))
x_test = np.array(list(vocab_processor.transform(test_data.text)))

In [4]:
y_train, y_test = train_data.target, test_data.target
#y_train = np.array(y_train).reshape(len(y_train), 1)
#y_test = np.array(y_test).reshape(len(y_test), 1)

In [5]:
#from sklearn.preprocessing import OneHotEncoder
#ohe = OneHotEncoder()
#y_train = ohe.fit_transform(y_train)
#y_test = ohe.transform(y_test)

In [6]:
#Restore the values from sparse matrix
#y_train = np.array([item.toarray().reshape(-1) for item in y_train])
#y_test = np.array([item.toarray().reshape(-1) for item in y_test])

# Set Configuration

In [63]:
class config:
    vocab_size = 2000
    class_num = 20
    embedding_size = 32
    filter_size = 8
    num_layers = 5
    block_size = 5
    filter_h = 5 #conv_kernel height_size
    doc_len = 800#context_size
    context_size = 800#context_size after padding
    batch_size = 32
    epochs = 50
    num_sampled = 1
    learning_rate = 0.01
    momentum = 0.99
    num_batches = 0
    ckpt_path = "cpkt"
    summary_path = "logs"
    data_dir = "data"
    grad_clip = 6
    
class testConfig:
    vocab_size = 2000
    class_num = 20
    embedding_size = 32
    filter_size = 8
    num_layers = 5
    block_size = 5
    filter_h = 5 #conv_kernel height_size
    doc_len = 800#context_size
    context_size = 800#context_size after padding
    batch_size = 32
    epochs = 50
    num_sampled = 1
    learning_rate = 0.1
    momentum = 0.99
    num_batches = 0
    ckpt_path = "cpkt"
    summary_path = "logs"
    data_dir = "data"
    grad_clip = 6

In [64]:
def prepare_conf(conf):
    conf.filter_w = conf.embedding_size
    #Pad the first k-1 item
    conf.context_size = conf.doc_len + int((conf.filter_h - 1)/2)
    
    # Check if data exists
    if not os.path.exists(conf.data_dir):
        exit("Please download the data as mentioned in Requirements")

    # Create paths for checkpointing
    #ckpt_model_path = 'vocab%d_embed%d_filters%d_batch%d_layers%d_block%d_fdim%d'%(conf.vocab_size, conf.embedding_size, 
            #conf.filter_size, conf.batch_size, conf.num_layers, conf.block_size, conf.filter_h)
    #conf.ckpt_path = os.path.join(conf.ckpt_path, ckpt_model_path)
    conf.ckpt_path = 'cpkt'

    if not os.path.exists(conf.ckpt_path):
        os.makedirs(conf.ckpt_path)
    conf.ckpt_file = os.path.join(conf.ckpt_path, "model.ckpt")

    # Create Logs Folder
    if tf.gfile.Exists(conf.summary_path):
        tf.gfile.DeleteRecursively(conf.summary_path)
    tf.gfile.MakeDirs(conf.summary_path)
    return conf


In [65]:
#Below code was referred to http://danijar.com/structuring-your-tensorflow-models/
import functools
def doublewrap(function):
    """
    A decorator decorator, allowing to use the decorator to be used without
    parentheses if not arguments are provided. All arguments must be optional.
    """
    @functools.wraps(function)
    def decorator(*args, **kwargs):
        if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
            return function(args[0])
        else:
            return lambda wrapee: function(wrapee, *args, **kwargs)
    return decorator


@doublewrap
def define_scope(function, scope=None, *args, **kwargs):
    """
    A decorator for functions that define TensorFlow operations. The wrapped
    function will only be executed once. Subsequent calls to it will directly
    return the result so that operations are added to the graph only once.

    The operations added by the function live within a tf.variable_scope(). If
    this decorator is used with arguments, they will be forwarded to the
    variable scope. The scope name defaults to the name of the wrapped
    function.
    """
    attribute = '_cache_' + function.__name__#Get function name
    name = scope or function.__name__
    @property
    @functools.wraps(function)#Keep the original function
    def decorator(self):
        if not hasattr(self, attribute):#If the attribute not exist
            with tf.variable_scope(name, *args, **kwargs):#Add scope name
                setattr(self, attribute, function(self))
        return getattr(self, attribute)#otherwise return the attribute
    return decorator

In [66]:
#Created a Gated CNN model
class GatedCNN(object):

    def __init__(self, conf):
        ##tf.reset_default_graph()
        #Input is a series of words
        #Paddle the first beginning k-1 values as zeros
        #doc_len = conf.doc_len + filter_h - 1
        self.X = tf.placeholder(shape=[conf.batch_size, conf.context_size], dtype=tf.int32, name="X")
        self.y = tf.placeholder(shape=[conf.batch_size], dtype=tf.int32, name="y")
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            embed = self.create_embeddings(self.X, conf)
            h, res_input = embed, embed
        
        with tf.name_scope("GatedConvLayers"):
            for i in range(conf.num_layers):
                #Channels of current input
                fanin_depth = h.get_shape()[-1]
                #Set current filter size
                filter_size = conf.filter_size if i < conf.num_layers-1 else 1
                shape = (conf.filter_h, conf.filter_w, fanin_depth, filter_size)
                with tf.variable_scope("layer_%d"%i):
                    conv_w = self.conv_op(h, shape, "linear")
                    conv_v = self.conv_op(h, shape, "gated")
                    h = conv_w * tf.sigmoid(conv_v)
                    if i % conf.block_size == 0:
                        h += res_input
                        res_input = h
            
        #h = tf.reshape(h, (-1, conf.embedding_size))
        h = tf.squeeze(h)
        #Get the last one as the hidden state
        h = h[:, -1, :]
        #print(h)
        #Flatten
        #h = tf.reshape(h, [conf.batch_size, -1])
        h_final_size = h.get_shape()[-1]
        y_shape = self.y.get_shape().as_list()
        
        #Fully connected layer
        #with tf.variable_scope('Fully_Connected_Layer'):
            #f_w = tf.get_variable("fully_w", [h_final_size, conf.embedding_size], tf.float32, 
                                    #tf.random_normal_initializer(0.0, 0.1))
            #f_b = tf.get_variable("fully_b", [conf.embedding_size], tf.float32, 
                                    #tf.constant_initializer(1.0))
            #h_fully = tf.matmul(h, f_w) + f_b

        #self.y = tf.reshape(self.y, (y_shape[0] * y_shape[1], 1))
        #Transform y into one-hot
        #y = tf.one_hot(self.y, conf.class_num)
        with tf.variable_scope('Output'):
            softmax_w = tf.get_variable("softmax_w", [conf.embedding_size, conf.class_num], tf.float32, 
                                    tf.random_normal_initializer(0.0, 0.1))
            softmax_b = tf.get_variable("softmax_b", [conf.class_num], tf.float32, 
                                    tf.constant_initializer(1.0))
        
            logits = tf.matmul(h, softmax_w) + softmax_b
        pred = tf.nn.softmax(logits)
        prediction = tf.argmax(pred, 1)
        prediction = tf.cast(prediction, tf.int32)
        correct_prediction = tf.equal(prediction, self.y)
        self.correct_num = tf.reduce_sum(tf.cast(correct_prediction, "float"))


        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=logits)
        self.loss = tf.reduce_mean(loss)

        trainer = tf.train.MomentumOptimizer(conf.learning_rate, conf.momentum)
        gradients = trainer.compute_gradients(self.loss)
        clipped_gradients = [(tf.clip_by_value(_[0], -conf.grad_clip, conf.grad_clip), _[1]) for _ in gradients]
        self.optimizer = trainer.apply_gradients(clipped_gradients)
        #self.perplexity = tf.exp(self.loss)

        self.create_summaries()

    def create_embeddings(self, X, conf):

        embeddings = tf.get_variable("embeds",(conf.vocab_size, conf.embedding_size), tf.float32, 
                                     tf.random_uniform_initializer(-1.0,1.0))
        
        embed = tf.nn.embedding_lookup(embeddings, X)
        batch_size = self.X.get_shape()[0]
        mask_layer = np.ones((conf.batch_size, conf.context_size, conf.embedding_size))
        #Zero Pad the first beginning k-1 values of the input
        #In order to 
        #Batch Length Embedding
        k = int(conf.filter_h/2)
        mask_layer[:, :k, :] = 0
        embed *= mask_layer
        
        embed_shape = embed.get_shape().as_list()
        embed = tf.reshape(embed, (embed_shape[0], embed_shape[1], embed_shape[2], 1))
        #expand_dim
        return embed


    def conv_op(self, fan_in, shape, name):
        W = tf.get_variable("%s_W"%name, shape, tf.float32, tf.random_normal_initializer(0.0, 0.1))
        b = tf.get_variable("%s_b"%name, shape[-1], tf.float32, tf.constant_initializer(1.0))
        #Note the padding method is 'SAME', it will automatically pad the first k/2 items
        return tf.add(tf.nn.conv2d(fan_in, W, strides=[1,1,1,1], padding='SAME'), b)
    
    def create_summaries(self):
        tf.summary.scalar("loss", self.loss)
        #tf.summary.scalar("perplexity", self.perplexity)
        self.merged_summary_op = tf.summary.merge_all()

In [67]:
conf = config  

In [68]:
conf = prepare_conf(conf)
graph = tf.Graph()
with graph.as_default():
    gated_cnn = GatedCNN(conf)
    saver = tf.train.Saver(tf.trainable_variables())

In [69]:
with tf.Session(graph=graph) as sess:
    init = tf.global_variables_initializer()
    sess.run(tf.global_variables_initializer())
    # Generate batches
    batches = data_helpers.batch_iter(
            list(zip(x_train, y_train)), conf.batch_size, 1)
    # Training loop. For each batch...
    for i, batch in enumerate(batches):
        x_batch, y_batch = zip(*batch)
        x_batch = np.array(x_batch)
        x_batch_pad = np.zeros([conf.batch_size, conf.context_size])
        x_batch_pad[:, :conf.doc_len]
        y_batch = np.array(y_batch)
        #Training starts
        feed_dict = {gated_cnn.X: x_batch_pad, gated_cnn.y: y_batch}
        _, l = sess.run([gated_cnn.optimizer, gated_cnn.loss], feed_dict=feed_dict)
        if i % 100 == 0:
            print('step ', i, ' Loss:{:.3f}'.format(l))
            #saver.save(sess, save_path=conf.ckpt_path)
            
    loops = len(y_test)/32
    count = 0
    for i in range(loops):
        start = i * 32
        end = (i+1) * 32
        x = x_test[start: end]
        y = y_test[start: end]
        feed_dict = {
            gated_cnn.X: x,
            gated_cnn.y: y
            }
        correct_num = sess.run(gated_cnn.correct_num, feed_dict)
        count += correct_num
        
    count/float(32*loops)

step  0  Loss:20.924
step  100  Loss:2.995
step  200  Loss:3.012
step  300  Loss:2.973


ValueError: Cannot feed value of shape (18,) for Tensor 'y:0', which has shape '(32,)'

In [42]:
help(saver.save)

Help on method save in module tensorflow.python.training.saver:

save(sess, save_path, global_step=None, latest_filename=None, meta_graph_suffix='meta', write_meta_graph=True, write_state=True) method of tensorflow.python.training.saver.Saver instance
    Saves variables.
    
    This method runs the ops added by the constructor for saving variables.
    It requires a session in which the graph was launched.  The variables to
    save must also have been initialized.
    
    The method returns the path of the newly created checkpoint file.  This
    path can be passed directly to a call to `restore()`.
    
    Args:
      sess: A Session to use to save the variables.
      save_path: String.  Path to the checkpoint filename.  If the saver is
        `sharded`, this is the prefix of the sharded checkpoint filename.
      global_step: If provided the global step number is appended to
        `save_path` to create the checkpoint filename. The optional argument
        can be a `Tensor`