In [16]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
import time
import datetime
from tensorflow.contrib import learn

# Load the dataset

In [31]:
class config:
    vocab_size = 2000
    class_num = 20
    embedding_size = 128
    filter_size = 64
    num_layers = 10
    block_size = 5
    filter_h = 6 #卷积核
    doc_len = 800
    batch_size = 64
    epochs = 50
    num_sampled = 1
    learning_rate = 1
    momentum = 0.99
    num_batches = 0
    ckpt_path = "cpkt"
    summary_path = "logs"
    data_dir = "data"

In [3]:
train_data = pd.read_csv('data/train_data.csv')
test_data = pd.read_csv('data/test_data.csv')

In [4]:
# Build vocabulary
max_document_length = max([len(x.split(" ")) for x in train_data.text])
max_document_length = max_document_length if max_document_length < config.doc_len else config.doc_len
#Cut long articles to 800 words. Pad short ones
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
x_train = np.array(list(vocab_processor.fit_transform(train_data.text)))
x_test = np.array(list(vocab_processor.transform(test_data.text)))

In [5]:
y_train, y_test = train_data.target, test_data.target
y_train = np.array(y_train).reshape(len(y_train), 1)
y_test = np.array(y_test).reshape(len(y_test), 1)

In [6]:
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder()
y_train = ohe.fit_transform(y_train)
y_test = ohe.transform(y_test)

In [7]:
#Restore the values from sparse matrix
y_train = np.array([item.toarray().reshape(-1) for item in y_train])
y_test = np.array([item.toarray().reshape(-1) for item in y_test])

In [8]:
config.vocab_size = len(vocab_processor.vocabulary_)

# Set Configuration

In [35]:
def prepare_conf(conf):
    conf.filter_w = conf.embedding_size
    conf.doc_len += int(conf.filter_h/2)
    
    # Check if data exists
    if not os.path.exists(conf.data_dir):
        exit("Please download the data as mentioned in Requirements")

    # Create paths for checkpointing
    ckpt_model_path = 'vocab%d_embed%d_filters%d_batch%d_layers%d_block%d_fdim%d'%(conf.vocab_size, conf.embedding_size, 
            conf.filter_size, conf.batch_size, conf.num_layers, conf.block_size, conf.filter_h)
    #conf.ckpt_path = os.path.join(conf.ckpt_path, ckpt_model_path)
    conf.ckpt_path = 'cpkt'

    if not os.path.exists(conf.ckpt_path):
        os.makedirs(conf.ckpt_path)
    conf.ckpt_file = os.path.join(conf.ckpt_path, "model.ckpt")

    # Create Logs Folder
    if tf.gfile.Exists(conf.summary_path):
        tf.gfile.DeleteRecursively(conf.summary_path)
    tf.gfile.MakeDirs(conf.summary_path)
    return conf


In [50]:
#Created a Gated CNN model
class GatedCNN(object):

    def __init__(self, conf):
        ##tf.reset_default_graph()
        #Input is a series of words
        self.X = tf.placeholder(shape=[conf.batch_size, conf.doc_len-1], dtype=tf.int32, name="X")
        self.y = tf.placeholder(shape=[conf.batch_size, 1], dtype=tf.int32, name="y")
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            embed = self.create_embeddings(self.X, conf)
            h, res_input = embed, embed
        
        with tf.name_scope("GatedConvLayers"):
            for i in range(conf.num_layers):
                fanin_depth = h.get_shape()[-1]
                filter_size = conf.filter_size if i < conf.num_layers-1 else 1
                shape = (conf.filter_h, conf.filter_w, fanin_depth, filter_size)
                with tf.variable_scope("layer_%d"%i):
                    conv_w = self.conv_op(h, shape, "linear")
                    conv_v = self.conv_op(h, shape, "gated")
                    h = conv_w * tf.sigmoid(conv_v)
                    if i % conf.block_size == 0:
                        h += res_input
                        res_input = h
            
        #h = tf.reshape(h, (-1, conf.embedding_size))
        h = tf.squeeze(h)
        h = tf.reshape(h, [conf.batch_size])
        print(h)
        y_shape = self.y.get_shape().as_list()

        #self.y = tf.reshape(self.y, (y_shape[0] * y_shape[1], 1))
        #Transform y into one-hot
        #y = tf.one_hot(self.y, conf.class_num)

        softmax_w = tf.get_variable("softmax_w", [conf.embedding_size, conf.class_num], tf.float32, 
                                    tf.random_normal_initializer(0.0, 0.1))
        softmax_b = tf.get_variable("softmax_b", [conf.class_num], tf.float32, 
                                    tf.constant_initializer(1.0))
        
        pred = tf.matmul(h, softmax_w) + softmax_b
        print(pred)
        #pred = tf.nn.softmax(pred)

        #Preferance: NCE Loss, heirarchial softmax, adaptive softmax
        #self.loss = tf.reduce_mean(tf.nn.nce_loss(softmax_w, softmax_b, h, self.y, 
                                                  #conf.num_sampled, conf.vocab_size))
        self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=pred)

        trainer = tf.train.MomentumOptimizer(conf.learning_rate, conf.momentum)
        gradients = trainer.compute_gradients(self.loss)
        clipped_gradients = [(tf.clip_by_value(_[0], -conf.grad_clip, conf.grad_clip), _[1]) for _ in gradients]
        self.optimizer = trainer.apply_gradients(clipped_gradients)
        #self.perplexity = tf.exp(self.loss)

        self.create_summaries()

    def create_embeddings(self, X, conf):

        embeddings = tf.get_variable("embeds",(conf.vocab_size, conf.embedding_size), tf.float32, tf.random_uniform_initializer(-1.0,1.0))
        embed = tf.nn.embedding_lookup(embeddings, X)
        mask_layer = np.ones((conf.batch_size, conf.doc_len-1, conf.embedding_size))
        #Zero Pad the first beginning k-1 values
        k = int(conf.filter_h/2)
        mask_layer[:,0:k,:] = 0
        embed *= mask_layer
        
        embed_shape = embed.get_shape().as_list()
        embed = tf.reshape(embed, (embed_shape[0], embed_shape[1], embed_shape[2], 1))
        #expand_dim
        return embed


    def conv_op(self, fan_in, shape, name):
        W = tf.get_variable("%s_W"%name, shape, tf.float32, tf.random_normal_initializer(0.0, 0.1))
        b = tf.get_variable("%s_b"%name, shape[-1], tf.float32, tf.constant_initializer(1.0))
        return tf.add(tf.nn.conv2d(fan_in, W, strides=[1,1,1,1], padding='SAME'), b)
    
    def create_summaries(self):
        tf.summary.scalar("loss", self.loss)
        #tf.summary.scalar("perplexity", self.perplexity)
        self.merged_summary_op = tf.summary.merge_all()

In [51]:
conf = config
conf = prepare_conf(conf)
graph = tf.Graph()
with graph.as_default():
    gated_cnn = GatedCNN(conf)

Tensor("Squeeze:0", shape=(64, 826, 128), dtype=float32)


ValueError: Shape must be rank 2 but is rank 3 for 'MatMul' (op: 'MatMul') with input shapes: [64,826,128], [128,20].