In [1]:
import numpy as np
import tensorflow as tf

In [2]:
import tensorflow as tf

class TextCNN(object):
    """A CNN for text classification.
    """
    def __init__(self, 
                 sequence_length, 
                 vocab_size, 
                 word_embed_size,
                 filter_num,
                 filter_sizes,
                 num_classes):

        # Placeholders for input, output, dropout
        self.input_x = tf.placeholder(
            tf.int32, shape=[None, sequence_length], name='input_x')
        self.input_y = tf.placeholder(
            tf.int32, shape=[None, ], name='input_y')
        self.keep_proba = tf.placeholder(
            tf.float32, shape=None, name='keep_proba')

        # Embedding layer
        with tf.name_scope('embedding'):
            self.W = tf.get_variable('word_embedding', 
                                     [vocab_size, word_embed_size],
                                     tf.float32, 
                                     tf.random_normal_initializer())
            self.embeds = tf.nn.embedding_lookup(self.W, self.input_x)
            self.embeds_expanded = tf.expand_dims(self.embeds, -1)

        # Convolution + maxpool layer
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope(f'conv-maxpool-{filter_size}'):
                filter_shape = [filter_size, word_embed_size, 1, filter_num]
                W = tf.get_variable(f"W-{filter_size}", 
                                    filter_shape, 
                                    initializer=tf.truncated_normal_initializer(stddev=0.1))
                b = tf.get_variable(f"b-{filter_size}", [filter_num], 
                                    initializer=tf.constant_initializer(0.0))
                conv = tf.nn.conv2d(self.embeds_expanded,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding='VALID',
                                    name=f'conv-{filter_size}')
                conv_hidden = tf.nn.tanh(tf.add(conv, b), name=f'tanh-{filter_size}')
                # conv_hidden = tf.nn.relu(tf.add(conv, b), name=f'relu-{filter_size}')
                pool = tf.nn.max_pool(conv_hidden,
                                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                                    strides=[1, 1, 1, 1],
                                    padding='VALID',
                                    name=f'pool-{filter_size}')
                pooled_outputs.append(pool)
            
            num_filters_total = filter_num * len(filter_sizes)
            self.h_pool = tf.concat(pooled_outputs, 3)
            self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Drop out layer
        with tf.name_scope('dropout'):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.keep_proba)
            
        # Final scores and predictions
        with tf.name_scope('output'):
            softmax_w = tf.get_variable('softmax_w', [num_filters_total, num_classes], 
                                        tf.float32, tf.random_normal_initializer())
            softmax_b = tf.get_variable('softmax_b', [num_classes], tf.float32, 
                                        tf.constant_initializer(0.0))
            self.logits = tf.matmul(self.h_drop, softmax_w) + softmax_b
            self.y = tf.nn.softmax(self.logits, name='y')

        # CalculateMean cross-entropy loss
        with tf.name_scope('loss'):
            losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.logits, labels=self.input_y, name='losses')
            self.loss = tf.reduce_mean(losses, name='loss')

        # Accuracy
        with tf.name_scope('accuracy'):
            # 计算预测值
            self.pred = tf.argmax(self.y, 1, name='pred')
            # 判断两个张亮的每一维度是否相等
            correct_prediction = tf.equal(tf.cast(self.pred, tf.int32), self.input_y)
            # 先将布尔型的数值转为实数型，然后计算平均值
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32),
                                           name='accuracy')

In [4]:
vocab_size = 80000
word_embed_size = 128
batch_size = 50
num_classes = 2
filter_num = 64
learning_rate = 0.01
training_steps = 5001
print_loss_every = 500
filter_sizes = [3, 4, 5]

In [5]:
train = np.loadtxt('data/train_data.txt', dtype=int)
test = np.loadtxt('data/test_data.txt', dtype=int)
train_shuffle_idx = np.random.permutation(train.shape[0])
test_shuffle_idx = np.random.permutation(test.shape[0])
train = train[train_shuffle_idx]
test = test[test_shuffle_idx]
x_train = train[:, :-1]
y_train = train[:, -1:].reshape((-1,))
x_test = test[:, :-1]
y_test = test[:, -1:].reshape((-1,))
sequence_length = x_train.shape[1]
dataset_size = train.shape[0]

In [6]:
tf.reset_default_graph()
with tf.Graph().as_default():
    cnn = TextCNN(sequence_length, 
                  vocab_size, 
                  word_embed_size,
                  filter_num,
                  filter_sizes,
                  num_classes)
    
    # Set eval feed_dict
    input_x, input_y, keep_proba = (
        cnn.input_x, cnn.input_y, cnn.keep_proba)
    train_feed_dict = {input_x: x_train, 
                       input_y: y_train,
                       keep_proba: 1.0}
    test_feed_dict = {input_x: x_test, 
                      input_y: y_test,
                      keep_proba: 1.0}
    
    # Train
    saver = tf.train.Saver()
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(cnn.loss)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(training_steps):
            start = (i * batch_size) % dataset_size
            end = min(start + batch_size, dataset_size)
            feed_dict={input_x: x_train[start:end],
                       input_y: y_train[start:end],
                       keep_proba: 0.5}
            sess.run(train_step, feed_dict=feed_dict)
            if i % print_loss_every == 0:
                avg_cost = cnn.loss.eval(feed_dict=feed_dict)
                train_acc = cnn.accuracy.eval(feed_dict=train_feed_dict)
                test_acc = cnn.accuracy.eval(feed_dict=test_feed_dict)
                test_pred = cnn.pred.eval(feed_dict=test_feed_dict)
                print(f"Epoch: {i:04d} | AvgCost: {avg_cost:7.4f}", end="")
                print(f" | Train/Test ACC: {train_acc:.3f}/{test_acc:.3f}")
        
        # After training, save the sess
        save_path = saver.save(sess, 'model/model.ckpt')

Epoch: 0000 | AvgCost: 13.0630 | Train/Test ACC: 0.509/0.510
Epoch: 0500 | AvgCost:  0.5697 | Train/Test ACC: 0.718/0.688
Epoch: 1000 | AvgCost:  0.3931 | Train/Test ACC: 0.830/0.782
Epoch: 1500 | AvgCost:  0.5147 | Train/Test ACC: 0.891/0.833
Epoch: 2000 | AvgCost:  0.3279 | Train/Test ACC: 0.908/0.839
Epoch: 2500 | AvgCost:  0.4320 | Train/Test ACC: 0.902/0.842
Epoch: 3000 | AvgCost:  0.2598 | Train/Test ACC: 0.888/0.817
Epoch: 3500 | AvgCost:  0.2320 | Train/Test ACC: 0.904/0.837
Epoch: 4000 | AvgCost:  0.1873 | Train/Test ACC: 0.929/0.851
Epoch: 4500 | AvgCost:  0.3524 | Train/Test ACC: 0.951/0.876
Epoch: 5000 | AvgCost:  0.1530 | Train/Test ACC: 0.940/0.858


### Change Log:

v0: 添加 dropout，美化打印效果