# Experiment 1. Separate span-2 & span-3 data

In [174]:
import random
import numpy as np

In [175]:
WORDS = [chr(asciiNum) for asciiNum in range(97,123)]
LABELS = ["*",">","-"]
ID2WORD = {i:w for i,w in enumerate(WORDS)}
WORD2ID = {w:i for i,w in enumerate(WORDS)}
ID2LABEL = {i:l for i,l in enumerate(LABELS)}
LABEL2ID = {l:i for i,l in ID2LABEL.items()}
FINAL_ID = len(WORDS)
LEN_FROM, LEN_TO = 10, 15

In [176]:
def is_consecutive(seq):
    length = len(seq)
    begin = WORDS.index(seq[0])
    indices = [WORDS.index(w) for w in seq]
    consecIndices = list(range(begin,begin+length))
    if consecIndices!=indices:
        return False
    return True

def rand_sent(consec=1):
    randSent = []
    consecSpans = []
    length = np.random.randint(LEN_FROM, LEN_TO+1)
    for i in range(length):
        currWord = random.choice(WORDS)
        randSent.append(currWord)
        if i>=consec and is_consecutive(randSent[i-consec:i+1]):
            if len(consecSpans)!=0 and consecSpans[-1][1]==i-1:
                continue
            consecSpans.append((i-consec,i))
    return randSent, consecSpans

def label_sent(randSent, consecSpans):
    randLabels = ['*' for _ in range(len(randSent))]
    for begin,end in consecSpans:
        randLabels[begin] = '>'
        for i in range(begin+1,end+1):
            randLabels[i] = '-'
    return randLabels

def gen_sents(consec=1, numSents=100, numSpans=2):
    count = 0
    sents, labels = [], []
    while count<numSents:
        randSent, consecSpans = rand_sent(consec)
        if len(consecSpans) >= numSpans:
            count += 1
            sents.append(randSent)
            labels.append(label_sent(randSent,consecSpans))
    return sents, labels

In [177]:
a,b = gen_sents(consec=2)
list(zip(a,b))

[(['s', 'q', 'r', 's', 'r', 'n', 'n', 'f', 'g', 'h', 'j', 'm'],
  ['*', '>', '-', '-', '*', '*', '*', '>', '-', '-', '*', '*']),
 (['o', 'i', 'l', 'm', 'n', 'u', 'k', 'e', 'r', 'r', 'n', 'o', 'p'],
  ['*', '*', '>', '-', '-', '*', '*', '*', '*', '*', '>', '-', '-']),
 (['j', 'k', 'l', 'm', 'n', 'i', 'i', 'h', 'n', 'q', 'c', 'n', 'w'],
  ['>', '-', '>', '-', '-', '*', '*', '*', '*', '*', '*', '*', '*']),
 (['v', 'w', 'x', 'i', 'j', 'k', 'q', 'k', 'w', 'f', 'o', 'u', 'u', 'i'],
  ['>', '-', '-', '>', '-', '-', '*', '*', '*', '*', '*', '*', '*', '*']),
 (['l', 'v', 'l', 'm', 'n', 'i', 'i', 'k', 'l', 'm', 'p', 'y', 'k'],
  ['*', '*', '>', '-', '-', '*', '*', '>', '-', '-', '*', '*', '*']),
 (['f', 'h', 'o', 'o', 'o', 'p', 'q', 'o', 'm', 's', 't', 'u', 'h'],
  ['*', '*', '*', '*', '>', '-', '-', '*', '*', '>', '-', '-', '*']),
 (['t', 'd', 'b', 'z', 's', 's', 't', 'u', 'w', 't', 'u', 'v'],
  ['*', '*', '*', '*', '*', '>', '-', '-', '*', '>', '-', '-']),
 (['v', 'l', 'm', 'n', 'd', 'n', 'l',

In [178]:
PAD_LEN = LEN_TO

def pad_sent(sent):
    length = len(sent)
    paddedSent = sent+[FINAL_ID]*(PAD_LEN-length) if length<PAD_LEN else sent[:PAD_LEN]
    return paddedSent

def pad_labels(lbs):
    length = len(lbs)
    paddedLbs = lbs+[LABEL2ID["-"]]*(PAD_LEN-length) if length<PAD_LEN else lbs[:PAD_LEN]
    return paddedLbs

def data_proc(sents, labels):
    encodedSents = np.array([pad_sent([WORD2ID[word] for word in sent]) for sent in sents])
    encodedLabels = np.array([pad_labels([LABEL2ID[label] for label in lbs]) for lbs in labels])
    seqLengths = np.array([len(sent) for sent in sents])
    return encodedSents, encodedLabels, seqLengths

In [179]:
%%time

print("Generating span-2 sentences")
X2, Y2, L2 = data_proc(*gen_sents(consec=1, numSents=5000, numSpans=2))
print("Generating span-3 sentences")
X3, Y3, L3 = data_proc(*gen_sents(consec=2, numSents=5000, numSpans=1))
print("DONE!")
print(X2.shape, Y2.shape, L2.shape, X3.shape, Y3.shape, L3.shape)

Generating span-2 sentences
Generating span-3 sentences
DONE!
(5000, 15) (5000, 15) (5000,) (5000, 15) (5000, 15) (5000,)
CPU times: user 12.1 s, sys: 0 ns, total: 12.1 s
Wall time: 12.2 s


In [185]:
X = np.concatenate((X2,X3), axis=0)
Y = np.concatenate((Y2,Y3), axis=0)
L = np.concatenate((L2,L3), axis=0)
print(X.shape, Y.shape, L.shape)

(10000, 15) (10000, 15) (10000,)


In [186]:
cutoff = int(len(X)*0.8)
randIndices = np.arange(len(X))
X = X[randIndices]
trainX, testX = X[:cutoff], X[cutoff:]
trainY, testY = Y[:cutoff], Y[cutoff:]
trainL, testL = L[:cutoff], L[cutoff:]
print(trainX.shape, trainY.shape, trainL.shape)
print(testX.shape, testY.shape, testL.shape)

(8000, 15) (8000, 15) (8000,)
(2000, 15) (2000, 15) (2000,)


In [187]:
from copy import deepcopy

class DataIter:
    
    def __init__(self, X, Y, L):
        self.X = deepcopy(X)
        self.Y = deepcopy(Y)
        self.L = deepcopy(L)
        self.size = len(self.X)
        self.indices = np.arange(self.size)
    
    def next_batch(self, n):
        subIndices = np.random.choice(self.indices, n, replace=False)
        return self.X[subIndices], self.Y[subIndices], self.L[subIndices]

In [188]:
import tensorflow as tf
from tensorflow.contrib.rnn import LSTMCell, MultiRNNCell, DropoutWrapper

In [189]:
tf.reset_default_graph()

VOCAB_SIZE = len(WORDS) + len(LABELS) + 1 # pad
EMB_SIZE = 20
HID_SIZE = 30
NUM_LAYERS = 1
NUM_CLASSES = len(LABELS)
KEEP_PROB = 0.7
LEARNING_RATE = 1e-4
BATCH_SIZE = 32

inputWords = tf.placeholder(tf.int32, shape=[BATCH_SIZE, PAD_LEN], name="inputWords") # <b,l>
inputLabels = tf.placeholder(tf.int32, shape=[BATCH_SIZE, PAD_LEN], name="inputLabels")
inputLengths = tf.placeholder(tf.int32, shape=[BATCH_SIZE], name="inputLengths")
keepProb = tf.placeholder(tf.float32, name="keepProb")

embeddings = tf.get_variable("Embeddings", shape=[VOCAB_SIZE, EMB_SIZE],
                             initializer=tf.contrib.layers.xavier_initializer())
inputWordsEmbs = tf.nn.embedding_lookup(embeddings, inputWords, name="inputWordsEmbs") # <b,l,e>

cell = MultiRNNCell([DropoutWrapper(LSTMCell(HID_SIZE),output_keep_prob=keepProb)]*NUM_LAYERS)

((fwOutputs,bwOutputs),
 (fwFinalState,bwFinalState)) = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell,
                                                               cell_bw=cell,
                                                               inputs=inputWordsEmbs,
                                                               sequence_length=inputLengths,
                                                               dtype=tf.float32)
    # fw/bwOutputs: <b,l,h>
    # fw/bwFinalState: c = h = <b,h>

outputs = tf.reshape(tf.concat([fwOutputs,bwOutputs], axis=-1),[-1,2*HID_SIZE]) 
    # op1. -> <b,l,2h>
    # op2. flatten: <b,l,2h> -> <b*l,2h>

W = tf.get_variable("W", shape=[2*HID_SIZE, NUM_CLASSES], 
                    initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable("b", shape=[NUM_CLASSES],
                    initializer=tf.contrib.layers.xavier_initializer())

logits = tf.add(tf.matmul(outputs, W), b) # <b*l,c>
labels = tf.reshape(inputLabels, shape=[-1])
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)

preds = tf.cast(tf.argmax(tf.nn.softmax(logits, axis=-1),axis=-1),tf.int32)
correct = tf.cast(tf.equal(preds, labels), tf.int32)
mask = tf.cast(tf.not_equal(labels, LABEL2ID["-"]), tf.int32)
totalLength = tf.cast(tf.reduce_sum(inputLengths), tf.float32)
accuracy = tf.cast(tf.reduce_sum(correct), tf.float32) / totalLength

trainOp = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

In [190]:
NUM_BATCHES = 1000
VERBOSE_PER = 50

with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())
    
    trainIter = DataIter(trainX, trainY, trainL)
    testIter = DataIter(testX, testY, testL)
    trainLosses, trainAccuracies = [], []
    testLosses, testAccuracies = [], []
    
    currLosses, currAccuracies = [], []
    for b in range(NUM_BATCHES):
        batchInputWords, batchInputLabels, batchInputLengths = trainIter.next_batch(BATCH_SIZE)
        fd = {inputWords:batchInputWords,
              inputLabels:batchInputLabels,
              inputLengths:batchInputLengths,
              keepProb:KEEP_PROB}
        _, batchLoss, batchAccuracy = sess.run([trainOp, loss, accuracy], feed_dict=fd)
        trainLosses.append(batchLoss)
        trainAccuracies.append(batchAccuracy)
        currLosses.append(batchLoss)
        currAccuracies.append(batchAccuracy)
        if b!=0 and b%VERBOSE_PER==0:
            print("Report @%d batches: avg.loss = %.4f | avg.acc = %.2f" % (b,
                                                                            np.mean(currLosses),
                                                                            np.mean(currAccuracies)))
            currLosses, currAccuracies = [], []
    
    for b in range(testIter.size//NUM_BATCHES):
        batchInputWords, batchInputLabels, batchInputLengths = testIter.next_batch(BATCH_SIZE)
        fd = {inputWords:batchInputWords,
              inputLabels:batchInputLabels,
              inputLengths:batchInputLengths,
              keepProb:1.0}
        _, batchLoss, batchAccuracy = sess.run([trainOp, loss, accuracy], feed_dict=fd)
        testLosses.append(batchLoss)
        testAccuracies.append(batchAccuracy)
    print("\nTest results: avg.loss = %.4f | avg.acc = %.2f" % (np.mean(testLosses),
                                                                np.mean(testAccuracies)))

Report @50 batches: avg.loss = 1.5236 | avg.acc = 0.32
Report @100 batches: avg.loss = 1.4646 | avg.acc = 0.33
Report @150 batches: avg.loss = 1.4075 | avg.acc = 0.32
Report @200 batches: avg.loss = 1.2640 | avg.acc = 0.32
Report @250 batches: avg.loss = 1.0025 | avg.acc = 0.57
Report @300 batches: avg.loss = 0.8132 | avg.acc = 0.85
Report @350 batches: avg.loss = 0.7984 | avg.acc = 0.87
Report @400 batches: avg.loss = 0.7961 | avg.acc = 0.86
Report @450 batches: avg.loss = 0.7901 | avg.acc = 0.87
Report @500 batches: avg.loss = 0.7903 | avg.acc = 0.86
Report @550 batches: avg.loss = 0.7888 | avg.acc = 0.87
Report @600 batches: avg.loss = 0.7901 | avg.acc = 0.87
Report @650 batches: avg.loss = 0.7895 | avg.acc = 0.87
Report @700 batches: avg.loss = 0.7874 | avg.acc = 0.87
Report @750 batches: avg.loss = 0.7841 | avg.acc = 0.87
Report @800 batches: avg.loss = 0.7873 | avg.acc = 0.87
Report @850 batches: avg.loss = 0.7867 | avg.acc = 0.86
Report @900 batches: avg.loss = 0.7849 | avg.acc 

# Experiment 2. Simultaneous span-2 & span-3 data

In [158]:
import random
import numpy as np

In [159]:
WORDS = [chr(asciiNum) for asciiNum in range(97,123)]
LABELS = ["*",">","-"]
ID2WORD = {i:w for i,w in enumerate(WORDS)}
WORD2ID = {w:i for i,w in enumerate(WORDS)}
ID2LABEL = {i:l for i,l in enumerate(LABELS)}
LABEL2ID = {l:i for i,l in ID2LABEL.items()}
FINAL_ID = len(WORDS)
LEN_FROM, LEN_TO = 10, 15

In [164]:
def is_consecutive(seq):
    length = len(seq)
    begin = WORDS.index(seq[0])
    indices = [WORDS.index(w) for w in seq]
    consecIndices = list(range(begin,begin+length))
    if consecIndices!=indices:
        return False
    return True

def rand_sent(consecs=[1,2]):
    randSent = []
    consecSpans = [[] for _ in consecs]
    length = np.random.randint(LEN_FROM, LEN_TO+1)
    for i in range(length):
        currWord = random.choice(WORDS)
        randSent.append(currWord)
        for c,consec in enumerate(consecs):
            if i>=consec and is_consecutive(randSent[i-consec:i+1]):
                if len(consecSpans[c])!=0 and consecSpans[c][-1][1]==i-1:
                    continue
                consecSpans[c].append((i-consec,i))
    return randSent, consecSpans

def label_sent(randSent, consecSpans):
    randLabels = [['*' for _ in range(len(randSent))] for _ in range(len(consecSpans))]
    for c,consecSpan in enumerate(consecSpans):
        for begin,end in consecSpan:
            randLabels[c][begin] = '>'
            for i in range(begin+1,end+1):
                randLabels[c][i] = '-'
    return randLabels

def gen_sents(consecs=[1,2], numSents=100, numSpans=1):
    count = 0
    sents, labels = [], [[] for _ in consecs]
    while count<numSents:
        randSent, consecSpans = rand_sent(consecs)
        if len(consecSpans[1]) >= numSpans:
            count += 1
            sents.append(randSent)
            randLabels = label_sent(randSent,consecSpans)
            for c,randLabel in enumerate(randLabels):
                labels[c].append(randLabel)
    return sents, labels

In [142]:
a,b = gen_sents(consecs=[1,2])

In [143]:
list(zip(a,b[0]))

[(['e', 'f', 'g', 'l', 'o', 't', 'w', 'w', 'e', 'j', 'l'],
  ['>', '-', '*', '*', '*', '*', '*', '*', '*', '*', '*']),
 (['x', 'l', 'j', 'w', 'u', 'i', 'e', 'j', 'a', 'x', 'f', 'c', 'd', 'e', 'd'],
  ['*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '>', '-', '*', '*']),
 (['c', 'e', 's', 't', 'u', 'g', 'l', 'l', 'j', 'h', 'w', 'd', 'u'],
  ['*', '*', '>', '-', '*', '*', '*', '*', '*', '*', '*', '*', '*']),
 (['k', 't', 'l', 'd', 'e', 'f', 'e', 'b', 'w', 'c', 'i', 'g', 'i'],
  ['*', '*', '*', '>', '-', '*', '*', '*', '*', '*', '*', '*', '*']),
 (['q', 'z', 'z', 'v', 'f', 'n', 'f', 'f', 'j', 'b', 'f', 'g', 'h'],
  ['*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '>', '-', '*']),
 (['e', 'a', 'z', 'k', 'c', 's', 'm', 'n', 'o', 'p', 'o', 'n', 'i', 'g'],
  ['*', '*', '*', '*', '*', '*', '>', '-', '>', '-', '*', '*', '*', '*']),
 (['l', 'z', 'e', 't', 'b', 'c', 'd', 'z', 'k', 'l', 'm', 't', 'e', 'w'],
  ['*', '*', '*', '*', '>', '-', '*', '*', '>', '-', '*', '*', '*', '*']),
 (['j',

In [144]:
list(zip(a,b[1]))

[(['e', 'f', 'g', 'l', 'o', 't', 'w', 'w', 'e', 'j', 'l'],
  ['>', '-', '-', '*', '*', '*', '*', '*', '*', '*', '*']),
 (['x', 'l', 'j', 'w', 'u', 'i', 'e', 'j', 'a', 'x', 'f', 'c', 'd', 'e', 'd'],
  ['*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '>', '-', '-', '*']),
 (['c', 'e', 's', 't', 'u', 'g', 'l', 'l', 'j', 'h', 'w', 'd', 'u'],
  ['*', '*', '>', '-', '-', '*', '*', '*', '*', '*', '*', '*', '*']),
 (['k', 't', 'l', 'd', 'e', 'f', 'e', 'b', 'w', 'c', 'i', 'g', 'i'],
  ['*', '*', '*', '>', '-', '-', '*', '*', '*', '*', '*', '*', '*']),
 (['q', 'z', 'z', 'v', 'f', 'n', 'f', 'f', 'j', 'b', 'f', 'g', 'h'],
  ['*', '*', '*', '*', '*', '*', '*', '*', '*', '*', '>', '-', '-']),
 (['e', 'a', 'z', 'k', 'c', 's', 'm', 'n', 'o', 'p', 'o', 'n', 'i', 'g'],
  ['*', '*', '*', '*', '*', '*', '>', '-', '-', '*', '*', '*', '*', '*']),
 (['l', 'z', 'e', 't', 'b', 'c', 'd', 'z', 'k', 'l', 'm', 't', 'e', 'w'],
  ['*', '*', '*', '*', '>', '-', '-', '*', '>', '-', '-', '*', '*', '*']),
 (['j',

In [165]:
PAD_LEN = LEN_TO

def pad_sent(sent):
    length = len(sent)
    paddedSent = sent+[FINAL_ID]*(PAD_LEN-length) if length<PAD_LEN else sent[:PAD_LEN]
    return paddedSent

def pad_labels(lbs):
    length = len(lbs)
    paddedLbs = lbs+[LABEL2ID["-"]]*(PAD_LEN-length) if length<PAD_LEN else lbs[:PAD_LEN]
    return paddedLbs

def data_proc(sents, labels): # hard-coded to do span-2 and span-3 only now
    encodedSents = np.array([pad_sent([WORD2ID[word] for word in sent]) for sent in sents])
    labels2, labels3 = labels
    encodedLabels2 = np.array([pad_labels([LABEL2ID[label] for label in lbs]) for lbs in labels2])
    encodedLabels3 = np.array([pad_labels([LABEL2ID[label] for label in lbs]) for lbs in labels3])
    seqLengths = np.array([len(sent) for sent in sents])
    return encodedSents, encodedLabels2, encodedLabels3, seqLengths

In [166]:
%%time

print("Jointly generating span-2 & span-3 sentences")
X, Y2, Y3, L = data_proc(*gen_sents(consecs=[1,2], numSents=10000, numSpans=1))
print("DONE!")
print(X.shape, Y2.shape, Y3.shape, L.shape)

Jointly generating span-2 & span-3 sentences
DONE!
(10000, 15) (10000, 15) (10000, 15) (10000,)
CPU times: user 32.3 s, sys: 0 ns, total: 32.3 s
Wall time: 32.3 s


In [168]:
cutoff = int(len(X)*0.8)
randIndices = np.arange(len(X))
X = X[randIndices]
trainX, testX = X[:cutoff], X[cutoff:]
trainY2, testY2 = Y2[:cutoff], Y2[cutoff:]
trainY3, testY3 = Y3[:cutoff], Y3[cutoff:]
trainL, testL = L[:cutoff], L[cutoff:]
print(trainX.shape, trainY2.shape, trainY3.shape, trainL.shape)
print(testX.shape, testY2.shape, testY3.shape, testL.shape)

(8000, 15) (8000, 15) (8000, 15) (8000,)
(2000, 15) (2000, 15) (2000, 15) (2000,)


In [170]:
from copy import deepcopy

class DataIter:
    
    def __init__(self, X, Y2, Y3, L):
        self.X = deepcopy(X)
        self.Y2 = deepcopy(Y2)
        self.Y3 = deepcopy(Y3)
        self.L = deepcopy(L)
        self.size = len(self.X)
        self.indices = np.arange(self.size)
    
    def next_batch(self, n):
        subIndices = np.random.choice(self.indices, n, replace=False)
        return self.X[subIndices], self.Y2[subIndices], self.Y3[subIndices], self.L[subIndices]

In [171]:
import tensorflow as tf
from tensorflow.contrib.rnn import LSTMCell, MultiRNNCell, DropoutWrapper

In [172]:
tf.reset_default_graph()

VOCAB_SIZE = len(WORDS) + len(LABELS) + 1 # pad
EMB_SIZE = 20
HID_SIZE = 30
NUM_LAYERS = 1
NUM_CLASSES = len(LABELS)
KEEP_PROB = 0.7
LEARNING_RATE = 1e-4
BATCH_SIZE = 32

inputWords = tf.placeholder(tf.int32, shape=[BATCH_SIZE, PAD_LEN], name="inputWords") # <b,l>
inputLabels2 = tf.placeholder(tf.int32, shape=[BATCH_SIZE, PAD_LEN], name="inputLabels2")
inputLabels3 = tf.placeholder(tf.int32, shape=[BATCH_SIZE, PAD_LEN], name="inputLabels3")
inputLengths = tf.placeholder(tf.int32, shape=[BATCH_SIZE], name="inputLengths")
keepProb = tf.placeholder(tf.float32, name="keepProb")

embeddings = tf.get_variable("Embeddings", shape=[VOCAB_SIZE, EMB_SIZE],
                             initializer=tf.contrib.layers.xavier_initializer())
inputWordsEmbs = tf.nn.embedding_lookup(embeddings, inputWords, name="inputWordsEmbs") # <b,l,e>

cell = MultiRNNCell([DropoutWrapper(LSTMCell(HID_SIZE),output_keep_prob=keepProb)]*NUM_LAYERS)

((fwOutputs,bwOutputs),
 (fwFinalState,bwFinalState)) = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell,
                                                               cell_bw=cell,
                                                               inputs=inputWordsEmbs,
                                                               sequence_length=inputLengths,
                                                               dtype=tf.float32)
    # fw/bwOutputs: <b,l,h>
    # fw/bwFinalState: c = h = <b,h>

outputs = tf.reshape(tf.concat([fwOutputs,bwOutputs], axis=-1),[-1,2*HID_SIZE]) 
    # op1. -> <b,l,2h>
    # op2. flatten: <b,l,2h> -> <b*l,2h>

W = tf.get_variable("W", shape=[2*HID_SIZE, NUM_CLASSES], 
                    initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable("b", shape=[NUM_CLASSES],
                    initializer=tf.contrib.layers.xavier_initializer())

logits = tf.add(tf.matmul(outputs, W), b) # <b*l,c>
labels2 = tf.reshape(inputLabels2, shape=[-1])
labels3 = tf.reshape(inputLabels3, shape=[-1])
loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels2)
loss3 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels3)
loss = loss2 + loss3

preds = tf.cast(tf.argmax(tf.nn.softmax(logits, axis=-1),axis=-1),tf.int32)
correct2 = tf.cast(tf.equal(preds, labels2), tf.int32)
mask2 = tf.cast(tf.not_equal(labels2, LABEL2ID["-"]), tf.int32)
correct3 = tf.cast(tf.equal(preds, labels3), tf.int32)        batchInputWords, batchInputLabels, batchInputLengths = testIter.next_batch(BATCH_SIZE)
        fd = {inputWords:batchInputWords,
              inputLabels:batchInputLabels,
              inputLengths:batchInputLengths,
mask3 = tf.cast(tf.not_equal(labels3, LABEL2ID["-"]), tf.int32)
totalLength = tf.cast(tf.reduce_sum(inputLengths) * 2, tf.float32)
accuracy = tf.cast(tf.reduce_sum(correct2) + tf.reduce_sum(correct3), tf.float32) / totalLength

trainOp = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

In [173]:
NUM_BATCHES = 1000
VERBOSE_PER = 50

with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())
    
    trainIter = DataIter(trainX, trainY2, trainY3, trainL)
    testIter = DataIter(testX, testY2, testY3, testL)
    trainLosses, trainAccuracies = [], []
    testLosses, testAccuracies = [], []
    
    currLosses, currAccuracies = [], []
    for b in range(NUM_BATCHES):
        batchInputWords, batchInputLabels2, batchInputLabels3, batchInputLengths = trainIter.next_batch(BATCH_SIZE)
        fd = {inputWords:batchInputWords,
              inputLabels2:batchInputLabels2,
              inputLabels3:batchInputLabels3,
              inputLengths:batchInputLengths,
              keepProb:KEEP_PROB}
        _, batchLoss, batchAccuracy = sess.run([trainOp, loss, accuracy], feed_dict=fd)
        trainLosses.append(batchLoss)
        trainAccuracies.append(batchAccuracy)
        currLosses.append(batchLoss)
        currAccuracies.append(batchAccuracy)
        if b!=0 and b%VERBOSE_PER==0:
            print("Report @%d batches: avg.loss = %.4f | avg.acc = %.2f" % (b,
                                                                            np.mean(currLosses),
                                                                            np.mean(currAccuracies)))
            currLosses, currAccuracies = [], []
    
    for b in range(testIter.size//NUM_BATCHES):
        batchInputWords, batchInputLabels2, batchInputLabels3, batchInputLengths = testIter.next_batch(BATCH_SIZE)
        fd = {inputWords:batchInputWords,
              inputLabels2:batchInputLabels2,
              inputLabels3:batchInputLabels3,
              inputLengths:batchInputLengths,
              keepProb:1.0}
        _, batchLoss, batchAccuracy = sess.run([trainOp, loss, accuracy], feed_dict=fd)
        testLosses.append(batchLoss)
        testAccuracies.append(batchAccuracy)
    print("\nTest results: avg.loss = %.4f | avg.acc = %.2f" % (np.mean(testLosses),
                                                                np.mean(testAccuracies)))

Report @50 batches: avg.loss = 1.9865 | avg.acc = 0.32
Report @100 batches: avg.loss = 1.9197 | avg.acc = 0.41
Report @150 batches: avg.loss = 1.8228 | avg.acc = 0.72
Report @200 batches: avg.loss = 1.6732 | avg.acc = 0.93
Report @250 batches: avg.loss = 1.4767 | avg.acc = 0.96
Report @300 batches: avg.loss = 1.3975 | avg.acc = 0.96
Report @350 batches: avg.loss = 1.3934 | avg.acc = 0.96
Report @400 batches: avg.loss = 1.3944 | avg.acc = 0.96
Report @450 batches: avg.loss = 1.3964 | avg.acc = 0.96
Report @500 batches: avg.loss = 1.3784 | avg.acc = 0.96
Report @550 batches: avg.loss = 1.3883 | avg.acc = 0.96
Report @600 batches: avg.loss = 1.3854 | avg.acc = 0.97
Report @650 batches: avg.loss = 1.3807 | avg.acc = 0.95
Report @700 batches: avg.loss = 1.3799 | avg.acc = 0.95
Report @750 batches: avg.loss = 1.3710 | avg.acc = 0.96
Report @800 batches: avg.loss = 1.3755 | avg.acc = 0.95
Report @850 batches: avg.loss = 1.3713 | avg.acc = 0.96
Report @900 batches: avg.loss = 1.3703 | avg.acc 