In [1]:
import numpy as np
import tensorflow as tf
import math
from random import random
import random as rand

# parameters used
MODEL_PATH = 'model/Seq2Seqmodel_CRF.ckpt'

  from ._conv import register_converters as _register_converters


In [2]:

# Create a sequence classification instance.
def get_sequence(sequence_length):
    # Create a sequence of random numbers in between 0,1
#     X = np.random.rand(sequence_length)
    X = np.array([random() for _ in range(sequence_length)])
    # Calculate cut-off value to change class values.
    limit = sequence_length / 4.0
    # Determine the class outcome for each item in cumulative sequence 
    y = np.array([0 if x < limit else 1 for x in np.cumsum(X)])
    
    return X, y 
    ## ^ X = Sequence, y = sequence - both have samp dim

# create n examples with random sequence lengths between 5 and 15
def get_examples(n):
    X_list = []
    y_list = []
    sequence_length_list = []
    
    for _ in range(n):
#         sequence_length = np.random.randint(5, 16, 1)
        sequence_length = rand.randrange(start=5, stop=15)
        X, y = get_sequence(sequence_length)
        X_list.append(X)
        y_list.append(y)
        sequence_length_list.append(sequence_length)
        
    return X_list, y_list, sequence_length_list
    
# Tensorflow requires that all sentences(and all lables)
# inside the same batch have the same length, 
# so we have to pad the data(and labels) inside the batches (with 0`s, for exmaple)
def pad(sentence, max_length):
    pad_len = max_length - len(sentence)
    padding = np.zeros(pad_len)
    return np.concatenate((sentence, padding))

# Create input batches
def batch(data, labels, sequence_lengths, batch_size, input_size):
    n_batch = int(math.ceil(len(data) / batch_size)) #올림 
    index = 0 
    for _ in range(n_batch):
        batch_sequence_lengths = np.array(sequence_lengths[index: index + batch_size])
        
#         batch_length = np.array(tf.map_fn(max, batch_sequence_lengths, tf.int32))
        batch_length = np.array(max(batch_sequence_lengths)) # max length in batch 
        # pad data
        batch_data = np.array([pad(x, batch_length) for x in data[index: index + batch_size]])
        # pad labels
        batch_labels = np.array([pad(x, batch_length) for x in labels[index: index + batch_size]])
        
        index += batch_size
        
        # Reshape input data to be suitable for LSTMs.
        batch_data = batch_data.reshape(-1, batch_length, input_size)
        
        yield batch_data, batch_labels, batch_length, batch_sequence_lengths
        



In [3]:
# Generate train and test data. 
x_train, y_train, sequence_length_train = get_examples(100)
x_test, y_test, sequence_length_test = get_examples(30)

In [4]:
# Bidirectional LSTM + CRF model. 

## parameters 
# learning_rate = 1e-3
learning_rate = 0.001
training_epochs = 100 
input_size = 1 
batch_size = 32
num_units = 128 # the number of units in the LSTM cell
####### Exact meaning of the number of units in the LSTM cell
number_of_classes = 2 

input_data = tf.placeholder(tf.float32, [None, None, input_size], name='input_data')
# ^ shape = (batch, batch_sequence_length, input_size)
labels = tf.placeholder(tf.int32, shape=[None, None], name="lables")
# ^ shape = (batch, sentence)
batch_sequence_length = tf.placeholder(tf.int32) # max sequence length in batch
original_sequence_lengths = tf.placeholder(tf.int32, [None])

# Scope is mandatory to use LSTMCell 
### reference(https://github.com/tensorflow/tensorflow/issues/799)
with tf.name_scope("BiLSTM"):
    with tf.variable_scope('forward'):
        lstm_fw_cell = tf.nn.rnn_cell.LSTMCell(num_units, 
                                               forget_bias=1.0, 
                                              state_is_tuple=True)
    with tf.variable_scope('backward'):
        lstm_bw_cell = tf.nn.rnn_cell.LSTMCell(num_units, 
                                               forget_bias=1.0, 
                                              state_is_tuple=True)
    (output_fw, output_bw), states = tf.nn.bidirectional_dynamic_rnn(cell_fw=lstm_fw_cell,
                                                                    cell_bw=lstm_bw_cell,
                                                                    inputs=input_data,
                                                                    sequence_length=original_sequence_lengths,
                                                                    dtype=tf.float32, scope="BiLSTM")

#As we have a Bi-LSTM, we have two outputs which are not connected, 
# so we need to merge them. 
outputs = tf.concat([output_fw, output_bw], axis=2)

# Fully connected layers
W = tf.get_variable(name="W", shape=[2*num_units, number_of_classes], dtype=tf.float32)
b = tf.get_variable(name='b', shape=[number_of_classes], dtype=tf.float32,
                   initializer=tf.zeros_initializer())
outputs_flat = tf.reshape(outputs, [-1, 2 * num_units])
pred = tf.matmul(outputs_flat, W) + b
scores = tf.reshape(pred, [-1, batch_sequence_length, number_of_classes])

# Linear-CRF
log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(scores, labels, original_sequence_lengths)

loss = tf.reduce_mean(-log_likelihood)

# compute the viterbi sequence and score (used for prediction, and test time)
viterbi_sequence, viterbi_score = tf.contrib.crf.crf_decode(scores, transition_params, original_sequence_lengths)

# training optimizer 
optimizer = tf.train.AdamOptimizer(learning_rate)
train_op = optimizer.minimize(loss)

# Add ops to save and restore all the varibales.
saver = tf.train.Saver()


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [5]:
#### Training the model #### 

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for i in range(training_epochs):
        for batch_data, batch_labels, batch_seq_len, batch_sequence_lengths in batch(x_train, y_train, sequence_length_train, batch_size, input_size):
            
            tf_viterbi_sequence, _ = sess.run([viterbi_sequence, train_op], 
                                             feed_dict={input_data: batch_data,
                                                       labels: batch_labels,
                                                       batch_sequence_length: batch_seq_len,
                                                       original_sequence_lengths: batch_sequence_lengths})
            # Show train accuracy 
            if i % 10 == 0 :
                # Create a mask to fix input lengths. 
                mask = (np.expand_dims(np.arange(batch_seq_len), axis=0) < np.expand_dims(batch_sequence_lengths, axis=1))
                total_labels = np.sum(batch_sequence_lengths)
                correct_labels = np.sum((batch_labels == tf_viterbi_sequence) * mask)
                accuracy = 100.0 * correct_labels / float(total_labels)
                print("Epoch: {}, Accuracy: {}".format(i, accuracy))
                
    saver.save(sess, MODEL_PATH)

Epoch: 0, Accuracy: 45.59748427672956
Epoch: 0, Accuracy: 43.58108108108108
Epoch: 0, Accuracy: 47.61904761904762
Epoch: 0, Accuracy: 48.38709677419355
Epoch: 10, Accuracy: 90.25157232704403
Epoch: 10, Accuracy: 91.21621621621621
Epoch: 10, Accuracy: 93.01587301587301
Epoch: 10, Accuracy: 87.09677419354838
Epoch: 20, Accuracy: 93.71069182389937
Epoch: 20, Accuracy: 93.58108108108108
Epoch: 20, Accuracy: 93.33333333333333
Epoch: 20, Accuracy: 90.3225806451613
Epoch: 30, Accuracy: 94.33962264150944
Epoch: 30, Accuracy: 94.93243243243244
Epoch: 30, Accuracy: 95.55555555555556
Epoch: 30, Accuracy: 90.3225806451613
Epoch: 40, Accuracy: 95.59748427672956
Epoch: 40, Accuracy: 95.60810810810811
Epoch: 40, Accuracy: 94.60317460317461
Epoch: 40, Accuracy: 90.3225806451613
Epoch: 50, Accuracy: 96.54088050314465
Epoch: 50, Accuracy: 96.28378378378379
Epoch: 50, Accuracy: 95.23809523809524
Epoch: 50, Accuracy: 90.3225806451613
Epoch: 60, Accuracy: 96.85534591194968
Epoch: 60, Accuracy: 96.283783783

In [11]:
# Testing the model. 
with tf.Session() as sess:
    # Restore variables 
    saver.restore(sess, MODEL_PATH)
    
    for batch_data, batch_labels, batch_seq_len, batch_sequence_lengths in batch(x_test, y_test, sequence_length_test, len(x_test), input_size):
        tf_viterbi_sequence = sess.run(viterbi_sequence, feed_dict={input_data: batch_data, 
                                                                      labels: batch_labels, 
                                                                      batch_sequence_length: batch_seq_len,
                                                                      original_sequence_lengths: batch_sequence_lengths})
        
    # mask to correct input size 
    mask = (np.expand_dims(np.arange(batch_seq_len), axis=0) < np.expand_dims(batch_sequence_lengths, axis=1))
    total_labels = np.sum(batch_sequence_lengths)
    correct_labels = np.sum((batch_labels == tf_viterbi_sequence) * mask)
    accuracy = 100.0 * correct_labels / float(total_labels)
    print("Test Accuracy : {0:0.4f}".format(accuracy))
    print('Label : {}'.format(batch_labels[0].astype(int)))
    print("Predi : {}".format(tf_viterbi_sequence[0]))

INFO:tensorflow:Restoring parameters from model/Seq2Seqmodel_CRF.ckpt
Test Accuracy : 96.5300
Label : [0 0 0 0 0 0 0 0 1 1 1 1 0 0]
Predi : [0 0 0 0 0 0 0 1 1 1 1 1 0 0]
