In [1]:
"""
We use following lines because we are running on Google Colab
If you are running notebook on a local computer, you don't need this cell
"""
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow2/dialog_clean/')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [2]:
!pip install tensorflow-gpu==2.0.0-rc1

Collecting tensorflow-gpu==2.0.0-rc1
[?25l  Downloading https://files.pythonhosted.org/packages/73/cf/2fc69ba3e59edc8333e2676fa71b40197718dea7dc1282c79955cf6b2acb/tensorflow_gpu-2.0.0rc1-cp36-cp36m-manylinux2010_x86_64.whl (380.5MB)
[K     |████████████████████████████████| 380.5MB 79kB/s 
Collecting tb-nightly<1.15.0a20190807,>=1.15.0a20190806 (from tensorflow-gpu==2.0.0-rc1)
[?25l  Downloading https://files.pythonhosted.org/packages/bc/88/24b5fb7280e74c7cf65bde47c171547fd02afb3840cff41bcbe9270650f5/tb_nightly-1.15.0a20190806-py3-none-any.whl (4.3MB)
[K     |████████████████████████████████| 4.3MB 32.2MB/s 
Collecting tf-estimator-nightly<1.14.0.dev2019080602,>=1.14.0.dev2019080601 (from tensorflow-gpu==2.0.0-rc1)
[?25l  Downloading https://files.pythonhosted.org/packages/21/28/f2a27a62943d5f041e4a6fd404b2d21cb7c59b2242a4e73b03d9ba166552/tf_estimator_nightly-1.14.0.dev2019080601-py2.py3-none-any.whl (501kB)
[K     |████████████████████████████████| 501kB 41.1MB/s 
Installing col

In [3]:
import tensorflow as tf

import numpy as np
import pprint
import logging
import time
import os
import random
import math

from pathlib import Path

print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())

TensorFlow Version 2.0.0-rc1
GPU Enabled: True


In [0]:
# stream data from text files
def data_generator(params, is_training):
  last = '我好想你'
  with open(params['train_path']) as f:
    print('Reading', params['train_path'])
    for line in f:
      line = line.rstrip()
      sp = line.split('|')
      if len(sp) == 2:
        source, target = sp
      else:
        continue
      _source = [params['char2idx'].get(c, len(params['char2idx'])) for c in source]
      _target = [params['char2idx'].get(c, len(params['char2idx'])) for c in target]
      _last = [params['char2idx'].get(c, len(params['char2idx'])) for c in last]
      if len(_source) > params['max_len']:
        _source = _source[:params['max_len']]
      if len(_target) > params['max_len']:
        _target = _target[:params['max_len']]
      if len(_last) > params['max_len']:
        _last = _last[:params['max_len']]
      
      if is_training:
        yield ((_source, _target), 1)
        yield ((_last, _target), 0)
        last = source
      else:
        yield ((_source, _target), 1)

      
def dataset(params, is_training):
  _shapes = (([None], [None]), ())
  _types = ((tf.int32, tf.int32), tf.int32)
  _pads = ((0, 0), -1)
  
  ds = tf.data.Dataset.from_generator(
    lambda: data_generator(params=params, is_training=is_training),
    output_shapes = _shapes,
    output_types = _types,)
  if is_training:
    ds = ds.shuffle(200000)
  ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
  
  return ds

In [0]:
class RE2(tf.keras.Model):
  def __init__(self, params: dict):
    super().__init__()
    
    self.embedding = tf.keras.layers.Embedding(params['vocab_size'], params['hidden_size'])
    self.embed_dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    
    self.birnn = [tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
        params['hidden_size'], return_sequences=True), name='birnn_%d'%(i+1)) for i in range(params['num_blocks'])]
    self.enc_dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    
    self.align_t = self.add_weight(name='temperature',
                                   shape=(),
                                   trainable=True,
                                   initializer=tf.initializers.constant(math.sqrt(1/params['hidden_size'])))
    self.align_dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    self.align_fc1 = [tf.keras.layers.Dense(params['hidden_size'], params['activation'], name='align_fc1_%d'%(i+1)) for i in range(params['num_blocks'])]
    self.align_fc2 = [tf.keras.layers.Dense(params['hidden_size'], params['activation'], name='align_fc2_%d'%(i+1)) for i in range(params['num_blocks'])]
      
    self.fusion_fc1 = [tf.keras.layers.Dense(params['hidden_size'], params['activation'], name='fusion_fc1_%d'%(i+1)) for i in range(params['num_blocks'])]
    self.fusion_fc2 = [tf.keras.layers.Dense(params['hidden_size'], params['activation'], name='fusion_fc2_%d'%(i+1)) for i in range(params['num_blocks'])]
    self.fusion_fc3 = [tf.keras.layers.Dense(params['hidden_size'], params['activation'], name='fusion_fc3_%d'%(i+1)) for i in range(params['num_blocks'])]
    self.fusion_fc4 = [tf.keras.layers.Dense(params['hidden_size'], params['activation'], name='fusion_fc4_%d'%(i+1)) for i in range(params['num_blocks'])]
    self.fusion_dropout = tf.keras.layers.Dropout(params['dropout_rate'])
    
    self.out_drop1 = tf.keras.layers.Dropout(params['dropout_rate'])
    self.out_fc = tf.keras.layers.Dense(params['hidden_size'], params['activation'], name='out_fc')
    self.out_drop2 = tf.keras.layers.Dropout(params['dropout_rate'])
    self.out_linear = tf.keras.layers.Dense(1, name='out_linear')
    
  
  
  def call(self, inputs, training=False):
    x1, x2 = inputs
    
    batch_sz = tf.shape(x1)[0]
    
    mask1 = tf.sign(x1)
    mask2 = tf.sign(x2)
    
    x1 = self.embedding(x1)
    x2 = self.embedding(x2)
    x1 = self.embed_dropout(x1, training=training)
    x2 = self.embed_dropout(x2, training=training)
    
    res_x1, res_x2 = x1, x2
    
    for i in range(params['num_blocks']):
      
      if i > 0:
        x1 = self.connection(x1, res_x1, i)
        x2 = self.connection(x2, res_x2, i)
        res_x1, res_x2 = x1, x2
    
      x1_enc = self.encoding(x1, mask1, i, training=training)
      x2_enc = self.encoding(x2, mask2, i, training=training)
    
      x1 = tf.concat((x1, x1_enc), -1)
      x2 = tf.concat((x2, x2_enc), -1)
      
      align_1, align_2 = self.alignment(x1, x2, mask1, mask2, i, training=training)
      
      x1 = self.fusion(x1, align_1, i, training=training)
      x2 = self.fusion(x2, align_2, i, training=training)
    
    x1 = self.pooling(x1, mask1)
    x2 = self.pooling(x2, mask2)
    
    x = self.prediction(x1, x2, training=training)
    
    return x
  
  
  def connection(self, x, res, i):
    if i == 1:
      x = tf.concat((res, x), -1)
    elif i > 1:
      hidden_size = x.shape[-1]
      x = (res[:, :, -hidden_size:] + x) * tf.math.sqrt(0.5)
      x = tf.concat((res[:, :, :-hidden_size], x), -1)
    return x
    
    
  def encoding(self, x, mask, i, training):
    mask = tf.cast(tf.expand_dims(mask, -1), tf.float32)
    x *= mask
    x = self.birnn[i](x)
    x = self.enc_dropout(x)
    return x
  
  
  def alignment(self, x1, x2, mask1, mask2, i, training):
    mask1 = tf.cast(tf.expand_dims(mask1, -1), tf.float32)
    mask2 = tf.cast(tf.expand_dims(mask2, -1), tf.float32)
    
    x1_ = self.align_fc1[i](self.align_dropout(x1, training=training))
    x2_ = self.align_fc2[i](self.align_dropout(x2, training=training))
    align = tf.matmul(x1_, x2_, transpose_b=True) * self.align_t
    mask = tf.matmul(mask1, mask2, transpose_b=True)
    align = mask * align + (1 - mask) * tf.float32.min
    align_1 = tf.nn.softmax(align, 1)
    align_2 = tf.nn.softmax(align, 2)
    
    x2 = tf.matmul(align_1, x1, transpose_a=True)
    x1 = tf.matmul(align_2, x2)
    return x1, x2
  
  
  def fusion(self, x, align, i, training):
    x = tf.concat([self.fusion_fc1[i](tf.concat((x, align), -1)),
                   self.fusion_fc2[i](tf.concat((x, x - align), -1)),
                   self.fusion_fc3[i](tf.concat((x, x * align), -1))], -1)
    x = self.fusion_dropout(x, training=training)
    x = self.fusion_fc4[i](x)
    return x
  
  
  def pooling(self, x, mask):
    mask = tf.cast(tf.expand_dims(mask, -1), tf.float32)
    return tf.reduce_max(x * mask, 1)
  
  
  def prediction(self, x1, x2, training):
    x = tf.concat((x1, x2, x1 * x2, x1 - x2), -1)
    x = self.out_drop1(x, training=training)
    x = self.out_fc(x)
    x = self.out_drop2(x, training=training)
    x = self.out_linear(x)
    x = tf.squeeze(x, -1)
    return x

In [0]:
def get_vocab(f_path):
  k2v = {}
  with open(f_path) as f:
    for i, line in enumerate(f):
      line = line.rstrip('\n')
      k2v[line] = i
  return k2v

In [0]:
def is_descending(history: list):
  history = history[-(params['num_patience']+1):]
  for i in range(1, len(history)):
    if history[i-1] <= history[i]:
      return False
  return True 

In [0]:
params = {
  'train_path': './train_cleaned.txt',
  'vocab_path': './char.txt',
  'batch_size': 512,
  'max_len': 50,
  'buffer_size': 400000,
  'num_blocks': 2,
  'dropout_rate': 0.2,
  'hidden_size': 150,
  'activation': tf.nn.relu,
  'lr': 4e-4,
  'clip_norm': 5.,
  'eval_steps': 1000,
  'num_patience': 5,
}

In [0]:
params['char2idx'] = get_vocab('char.txt')
params['vocab_size'] = len(params['char2idx']) + 1

In [10]:
model = RE2(params)
model.build([[None, None], [None, None]])
pprint.pprint([(v.name, v.shape) for v in model.trainable_variables])
#model.load_weights('model')

decay_lr = tf.optimizers.schedules.ExponentialDecay(params['lr'], 1000, 0.99)
optim = tf.optimizers.Adam(params['lr'])
global_step = 0

history_acc = []
best_acc = .0

t0 = time.time()
logger = logging.getLogger('tensorflow')
logger.setLevel(logging.INFO)

while True:
  # TRAINING
  for ((text1, text2), label) in dataset(params=params, is_training=True):
    with tf.GradientTape() as tape:
      logits = model((text1, text2), training=True)
      loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.cast(label, tf.float32), logits=logits)
      loss = tf.reduce_mean(loss)
      
    optim.lr.assign(decay_lr(global_step))
    grads = tape.gradient(loss, model.trainable_variables)
    grads, _ = tf.clip_by_global_norm(grads, params['clip_norm'])
    optim.apply_gradients(zip(grads, model.trainable_variables))
    
    if global_step % 50 == 0:
      logger.info("Step {} | Loss: {:.4f} | Spent: {:.1f} secs | LR: {:.6f}".format(
          global_step, loss.numpy().item(), time.time()-t0, optim.lr.numpy().item()))
      t0 = time.time()
    global_step += 1
  
    if global_step % params['eval_steps'] == 0:
      # EVALUATION
      m = tf.keras.metrics.Accuracy()
      
      scores = []
      for ((text1, text2), label) in dataset(params=params, is_training=False):
        logits = model((text1, text2), training=False)
        _scores = tf.sigmoid(logits)
        scores.append(_scores.numpy())
        y_pred = tf.cast(tf.math.round(_scores), tf.int32)
        m.update_state(y_true=label, y_pred=y_pred)
      
      scores = np.concatenate(scores)
      
      pairs = []
      with open(params['train_path']) as f:
        print('Reading', params['train_path'])
        for line in f:
          line = line.rstrip()
          sp = line.split('|')
          if len(sp) == 2:
            source, target = sp
          else:
            continue
          pairs.append((source, target))
      pairs = pairs[:len(scores)]
      pair_scores = [(source, target, score) for (source, target), score in zip(pairs, scores)]
      pair_scores.sort(key=lambda x: x[2])
      with open('score.txt', 'w') as f:
        for pair_score in reversed(pair_scores):
          source, target, score = pair_score
          f.write(source+'|'+target+'|'+str(score)+'\n')

      acc = m.result().numpy()
      logger.info("Evaluation: Testing Accuracy: {:.3f}".format(acc))
      history_acc.append(acc)
      
      if acc > best_acc:
        best_acc = acc
        model.save_weights('model')
      logger.info("Best Accuracy: {:.3f}".format(best_acc))

      if len(history_acc) > params['num_patience'] and is_descending(history_acc):
        logger.info("Testing Accuracy not improved over {} epochs, Early Stop".format(params['num_patience']))
        break

[('embedding/embeddings:0', TensorShape([5751, 150])),
 ('birnn_1/forward_lstm/kernel:0', TensorShape([150, 600])),
 ('birnn_1/forward_lstm/recurrent_kernel:0', TensorShape([150, 600])),
 ('birnn_1/forward_lstm/bias:0', TensorShape([600])),
 ('birnn_1/backward_lstm/kernel:0', TensorShape([150, 600])),
 ('birnn_1/backward_lstm/recurrent_kernel:0', TensorShape([150, 600])),
 ('birnn_1/backward_lstm/bias:0', TensorShape([600])),
 ('birnn_2/forward_lstm_1/kernel:0', TensorShape([300, 600])),
 ('birnn_2/forward_lstm_1/recurrent_kernel:0', TensorShape([150, 600])),
 ('birnn_2/forward_lstm_1/bias:0', TensorShape([600])),
 ('birnn_2/backward_lstm_1/kernel:0', TensorShape([300, 600])),
 ('birnn_2/backward_lstm_1/recurrent_kernel:0', TensorShape([150, 600])),
 ('birnn_2/backward_lstm_1/bias:0', TensorShape([600])),
 ('align_fc1_1/kernel:0', TensorShape([450, 150])),
 ('align_fc1_1/bias:0', TensorShape([150])),
 ('align_fc1_2/kernel:0', TensorShape([600, 150])),
 ('align_fc1_2/bias:0', TensorShap

KeyboardInterrupt: ignored