# Tensorflow

### author qhduan@memect.co

In [1]:
import numpy as np
import tensorflow as tf
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

In [2]:
from data import X_train, X_test, y_train, y_test

训练集样本量：12126，测试集样本量：3032


In [3]:
tf.set_random_seed(0)

In [4]:
embedding_size = 128
PAD = ' ' # 句子不到max_len长度时的占位符
max_len = max(len(x) for x in X_train)
print('单个训练样本最大长度：{}'.format(max_len))

单个训练样本最大长度：14


In [5]:
# 载入我的自定义库 @qhduan
import sys
import importlib
sys.path.insert(0, '../../')
import tfkit
importlib.reload(tfkit)

<module 'tfkit' from '../../tfkit/__init__.py'>

In [6]:
wl = tfkit.WordLabel()
X_train_vec = wl.fit_transform(X_train, max_len=max_len)
X_test_vec = wl.transform(X_test, max_len=max_len)

In [7]:
print(wl.max_features, X_train_vec.shape, X_test_vec.shape)

4235 (12126, 14) (3032, 14)


In [8]:
oh = OneHotEncoder(sparse=False)
y_train = oh.fit_transform(y_train.reshape([-1, 1]))
y_test = oh.transform(y_test.reshape([-1, 1]))

In [9]:
learning_rate = 0.003
n_epoch = 10
batch_size = 128
time_steps = max_len
input_size = embedding_size
target_size = 2
print('time_steps', time_steps)
print('input_size', input_size)
print('target_size', target_size)

time_steps 14
input_size 128
target_size 2


In [10]:
X = tf.placeholder(tf.float32, [batch_size, max_len], name='X')
y = tf.placeholder(tf.float32, [batch_size, target_size], name='X')

In [11]:
model = X
model = tfkit.embedding(model, wl.max_features, embedding_size, max_len, name='embedding')
model = tf.reshape(model, [batch_size, max_len, embedding_size, 1])
cnn = tfkit.conv(model, 512, (14, 1), name='conv_1', activation='linear')
gate = tfkit.conv(model, 512, (14, 1), name='conv_1_gate', activation='sigmoid')
model = cnn * gate
model = tfkit.flatten(model, 'flatten')
model = tfkit.full_connect(model, target_size, name='fc_2', activation='linear')

===> (128, 14, 128) 	 embedding
===> (128, 1, 128, 512) 	 conv_1
===> (128, 1, 128, 512) 	 conv_1_gate
===> (128, 65536) 	 flatten
===> (128, 2) 	 fc_2


In [12]:
train_step, cost = tfkit.train_softmax(
    model, y,
    opt=tf.train.AdadeltaOptimizer(learning_rate=learning_rate)
)

In [13]:
measures = [
    cost,
    tfkit.accuracy(model, y, softmax=True),
]

In [14]:
# 初始化所有变量
init = tf.global_variables_initializer()

In [15]:
# 本来是要关，不过CNN不用GPU真的好慢……
# disable GPU，关闭GPU支持
config = tf.ConfigProto(
#     device_count = {'GPU': 0}
)

In [16]:
with tf.Session(config=config) as sess:
    sess.run(init)
    total = int((len(X_train) - 1) / batch_size + 1)
    for epoch in range(n_epoch):
        print('epoch: {}'.format(epoch))
        record = []
        for X_sample, y_sample in tqdm(tfkit.batch_flow(X_train_vec, y_train, batch_size), total=total, file=sys.stdout):
            feeds = {X: X_sample, y: y_sample}
            sess.run(train_step, feeds)
            record.append(sess.run(measures, feeds))
        print('train: loss: {:.4f}, acc: {:.4f}'.format(
            np.mean([x[0] for x in record]),
            np.mean([x[1] for x in record])
        ))
        record = []
        for X_sample, y_sample in tfkit.batch_flow(X_test_vec, y_test, batch_size):
            feeds = {X: X_sample, y: y_sample}
            record.append(sess.run(measures, feeds))
        print('test: loss: {:.4f}, acc: {:.4f}'.format(
            np.mean([x[0] for x in record]),
            np.mean([x[1] for x in record])
        ))

epoch: 0
100%|██████████| 95/95 [00:02<00:00, 38.87it/s]
train: loss: 0.6796, acc: 0.5724
test: loss: 0.6723, acc: 0.5911
epoch: 1
100%|██████████| 95/95 [00:02<00:00, 39.33it/s]
train: loss: 0.6692, acc: 0.5861
test: loss: 0.6679, acc: 0.5999
epoch: 2
100%|██████████| 95/95 [00:02<00:00, 38.27it/s]
train: loss: 0.6619, acc: 0.5939
test: loss: 0.6638, acc: 0.6074
epoch: 3
100%|██████████| 95/95 [00:02<00:00, 39.06it/s]
train: loss: 0.6551, acc: 0.6040
test: loss: 0.6602, acc: 0.6120
epoch: 4
100%|██████████| 95/95 [00:02<00:00, 38.88it/s]
train: loss: 0.6489, acc: 0.6143
test: loss: 0.6568, acc: 0.6234
epoch: 5
100%|██████████| 95/95 [00:02<00:00, 38.96it/s]
train: loss: 0.6430, acc: 0.6254
test: loss: 0.6538, acc: 0.6283
epoch: 6
100%|██████████| 95/95 [00:02<00:00, 39.40it/s]
train: loss: 0.6376, acc: 0.6377
test: loss: 0.6511, acc: 0.6377
epoch: 7
100%|██████████| 95/95 [00:02<00:00, 39.44it/s]
train: loss: 0.6325, acc: 0.6481
test: loss: 0.6486, acc: 0.6423
epoch: 8
100%|██████████