# Neural Network Models for Joint Intent Detection and Slot Filling

##### LOADING THE DATA
One line of data looks like this:  
  
&nbsp; ORIGINAL SEQUENCE: &nbsp; BOS i want to fly from baltimore           to dallas           round        trip        EOS  
&nbsp; LABELED SEQUENCE:  &nbsp;  O  O   O   O  O   O   B-fromloc.city_name O B-toloc.city_name B-round_trip I-round_trip  
&nbsp; INTENT:  &nbsp; atis_flight
  
Segmented into a dictionary: [original sentence words, labeled sequence, intent]

In [1]:
import numpy as np

# convert an empty 2D list into an empty 1D list
flatten = lambda l: [item for sublist in l for item in sublist]

index_seq2slot = lambda s, index2slot: [index2slot[i] for i in s]
index_seq2word = lambda s, index2word: [index2word[i] for i in s]

train_data = open("dataset/atis-2.train.w-intent.iob", "r").readlines()
test_data = open("dataset/atis-2.dev.w-intent.iob", "r").readlines()

print('This is the first line of data:')

train_data[0]

This is the first line of data:


'BOS i want to fly from baltimore to dallas round trip EOS\tO O O O O O B-fromloc.city_name O B-toloc.city_name B-round_trip I-round_trip atis_flight\n'

##### PREPROCESSING

In [2]:
def data_pipeline(data, length=50):
    '''
    [length] represents the standard size of the sequence to be inputed in the model
    This function will make sure that every line from the data has the same length
    before it is fed in the model
    '''
    # remove the '\n' spaces
    data = [t[:-1] for t in data]
    
    # split the data by white spaces
    data = [[t.split("\t")[0].split(" "), t.split("\t")[1].split(" ")[:-1], t.split("\t")[1].split(" ")[-1]] for t in
            data]  
    
    # transform every line into a dictionary: [ORIGINAL data, LABELED data, and INTEND]
    data = [[t[0][1:-1], t[1][1:], t[2]] for t in data]
    seq_in, seq_out, intent = list(zip(*data))
    
    sin = []
    sout = []
    
    # iterate through every line of the original seq
    for line in range(len(seq_in)):
        ### A D J U S T   T H E   S I Z E   O F   T H E   O R I G I N A L   S E Q U E N C E ###
        temp = seq_in[line]
        # if the line being read is shorter than 'length', this will apply padding to fill it
        if len(temp) < length:
            # <EOS> = End of Sentence
            temp.append('<EOS>')
            while len(temp) < length:
                temp.append('<PAD>')
        
        # if the line being read is larger than 'length', this will cut it to adjust its size
        else:
            temp = temp[:length]
            temp[-1] = '<EOS>'
        sin.append(temp)
        
        ### A D J U S T   T H E   S I Z E   O F   T H E   L A B E L E D   S E Q U E N C E ###
        temp = seq_out[line]
        if len(temp) < length:
            while len(temp) < length:
                temp.append('<PAD>')
        else:
            temp = temp[:length]
            temp[-1] = '<EOS>'
        sout.append(temp)
        data = list(zip(sin, sout, intent))
        
    return data

# transform the data so every sequence has the same size/legth
train_data_ed = data_pipeline(train_data)
test_data_ed = data_pipeline(test_data)

print(train_data_ed[0])

(['i', 'want', 'to', 'fly', 'from', 'baltimore', 'to', 'dallas', 'round', 'trip', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>'], ['O', 'O', 'O', 'O', 'O', 'B-fromloc.city_name', 'O', 'B-toloc.city_name', 'B-round_trip', 'I-round_trip', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>'], 'atis_flight')


##### MAPPING DATA
The following code will map the data from every list (original, labeled, and intents) creating dictionaries representing this information as index to words and viceversa.

In [3]:
def get_info_from_training_data(data):
    seq_in, seq_out, intent = list(zip(*data))
    vocab = set(flatten(seq_in))
    slot_tag = set(flatten(seq_out))
    intent_tag = set(intent)
    
    # generate word2index
    word2index = {'<PAD>': 0, '<UNK>': 1, '<SOS>': 2, '<EOS>': 3}
    for token in vocab:
        if token not in word2index.keys():
            word2index[token] = len(word2index)

    # generate index2word
    index2word = {v: k for k, v in word2index.items()}

    # generate tag2index
    tag2index = {'<PAD>': 0, '<UNK>': 1, "O": 2}
    for tag in slot_tag:
        if tag not in tag2index.keys():
            tag2index[tag] = len(tag2index)

    # generate index2tag
    index2tag = {v: k for k, v in tag2index.items()}

    # generate intent2index
    intent2index = {'<UNK>': 0}
    for ii in intent_tag:
        if ii not in intent2index.keys():
            intent2index[ii] = len(intent2index)

    # generate index2intent
    index2intent = {v: k for k, v in intent2index.items()}
    return word2index, index2word, tag2index, index2tag, intent2index, index2intent


word2index, index2word, slot2index, index2slot, intent2index, index2intent = \
        get_info_from_training_data(train_data_ed)
    
word2index

{'<PAD>': 0,
 '<UNK>': 1,
 '<SOS>': 2,
 '<EOS>': 3,
 'mealtime': 4,
 'leaves': 5,
 'sure': 6,
 'operating': 7,
 '2': 8,
 'cheap': 9,
 'ap57': 10,
 'listings': 11,
 '305': 12,
 '815': 13,
 'travel': 14,
 'way': 15,
 'eleventh': 16,
 'tacoma': 17,
 'thursday': 18,
 'eight': 19,
 'reverse': 20,
 'book': 21,
 'i': 22,
 '106': 23,
 'uses': 24,
 'anywhere': 25,
 'than': 26,
 'do': 27,
 'traveling': 28,
 'lastest': 29,
 '6': 30,
 'much': 31,
 'intercontinental': 32,
 'abbreviation': 33,
 'fares': 34,
 'georgia': 35,
 'airfare': 36,
 'hou': 37,
 'depart': 38,
 'itinerary': 39,
 'reaches': 40,
 'march': 41,
 '1991': 42,
 '505': 43,
 'june': 44,
 '1505': 45,
 'lake': 46,
 'see': 47,
 '417': 48,
 'begins': 49,
 'connecting': 50,
 'express': 51,
 '2134': 52,
 'too': 53,
 'by': 54,
 'reservations': 55,
 '825': 56,
 '230': 57,
 'chicago': 58,
 'highest': 59,
 "sunday's": 60,
 'sfo': 61,
 'dinnertime': 62,
 'around': 63,
 'business': 64,
 'milwaukee': 65,
 'many': 66,
 'put': 67,
 '723': 68,
 'tuesda

##### TRANSLATE ITEMS TO NUMBER
This section of the code will replace each word on every sequence with an ID number (index)

In [4]:
def to_index(train, word2index, slot2index, intent2index):
    new_train = []
    for sin, sout, intent in train:
        sin_ix = list(map(lambda i: word2index[i] if i in word2index else word2index["<UNK>"],
                          sin))
        true_length = sin.index("<EOS>")
        sout_ix = list(map(lambda i: slot2index[i] if i in slot2index else slot2index["<UNK>"],
                           sout))
        intent_ix = intent2index[intent] if intent in intent2index else intent2index["<UNK>"]
        new_train.append([sin_ix, true_length, sout_ix, intent_ix])
    return new_train

index_train = to_index(train_data_ed, word2index, slot2index, intent2index)
index_test = to_index(test_data_ed, word2index, slot2index, intent2index)

print('TRAINING AND TESTING DATA STRUCTURE:')
print('ORIGINAL_index | UNPADDED_seqLength | LABELED_index | INDENT\n')
print(index_train[0])

TRAINING AND TESTING DATA STRUCTURE:
ORIGINAL_index | UNPADDED_seqLength | LABELED_index | INDENT

[[22, 190, 391, 565, 782, 221, 391, 198, 666, 445, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 10, [2, 2, 2, 2, 2, 49, 2, 28, 103, 55, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 20]


## MODEL ARCHITECTURE

### Tensorflow with dynamic rnn

`tf.nn.rnn creates an unrolled graph for a fixed RNN length. That means, if you call tf.nn.rnn with inputs having 200 time steps you are creating a static graph with 200 RNN steps. First, graph creation is slow. Second, you’re unable to pass in longer sequences (> 200) than you’ve originally specified.tf.nn.dynamic_rnn solves this. It uses a tf.While loop to dynamically construct the graph when it is executed. That means graph creation is faster and you can feed batches of variable size.`

[Whats the difference between tensorflow dynamic_rnn and rnn?](https://stackoverflow.com/questions/39734146/whats-the-difference-between-tensorflow-dynamic-rnn-and-rnn) . That is, the static rnn must be expanded ahead of time. At the time of execution, the graph is fixed and the maximum length is limited. While the dynamic rnn can be cyclically multiplexed during execution.


In [5]:
import tensorflow as tf
import random

input_steps = 50      # specified length size
embedding_size = 64
hidden_size = 100
n_layers = 2
batch_size = 16
vocab_size = 871
slot_size = 122
intent_size = 22
epoch_num = 15

######  E N C O D E R   A N D   D E C O D E R  ######
encoder_inputs = tf.placeholder(tf.int32,[input_steps,batch_size],name='encoder_inputs')

# The actual length of each sentence without padding
encoder_inputs_actual_length = tf.placeholder(tf.int32, [batch_size],
                                                   name='encoder_inputs_actual_length')

decoder_targets = tf.placeholder(tf.int32, [batch_size, input_steps],
                                      name='decoder_targets')

intent_targets = tf.placeholder(tf.int32, [batch_size],
                                     name='intent_targets')

######  E M B E D D I N G  ######
embeddings = tf.Variable(tf.random_uniform([vocab_size, embedding_size],
                                           -0.1, 0.1), dtype=tf.float32, name="embedding")

encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs)
encoder_inputs_embedded

<tf.Tensor 'embedding_lookup:0' shape=(50, 16, 64) dtype=float32>

##### ENCODER

In [6]:
from tensorflow.contrib.rnn import LSTMCell, LSTMStateTuple

# Use a single LSTM cell
encoder_f_cell = LSTMCell(hidden_size)
encoder_b_cell = LSTMCell(hidden_size)

# The size of the following four variables: T*B*D, T*B*D, B*D, B*D
(encoder_fw_outputs, encoder_bw_outputs), (encoder_fw_final_state, encoder_bw_final_state) = \
    tf.nn.bidirectional_dynamic_rnn(cell_fw=encoder_f_cell,
                                    cell_bw=encoder_b_cell,
                                    inputs=encoder_inputs_embedded,
                                    sequence_length=encoder_inputs_actual_length,
                                    dtype=tf.float32, time_major=True)
encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2)

encoder_final_state_c = tf.concat(
    (encoder_fw_final_state.c, encoder_bw_final_state.c), 1)

encoder_final_state_h = tf.concat(
    (encoder_fw_final_state.h, encoder_bw_final_state.h), 1)

encoder_final_state = LSTMStateTuple(
    c=encoder_final_state_c,
    h=encoder_final_state_h)

print("encoder_outputs: ", encoder_outputs)
print("encoder_outputs[0]: ", encoder_outputs[0])
print("encoder_final_state_c: ", encoder_final_state_c)

encoder_outputs:  Tensor("concat:0", shape=(50, 16, 200), dtype=float32)
encoder_outputs[0]:  Tensor("strided_slice:0", shape=(16, 200), dtype=float32)
encoder_final_state_c:  Tensor("concat_1:0", shape=(16, 200), dtype=float32)


##### DECODER

In [7]:
decoder_lengths = encoder_inputs_actual_length

slot_W = tf.Variable(tf.random_uniform([hidden_size * 2, slot_size], -1, 1),
                             dtype=tf.float32, name="slot_W")
slot_b = tf.Variable(tf.zeros([slot_size]), dtype=tf.float32, name="slot_b")
intent_W = tf.Variable(tf.random_uniform([hidden_size * 2, intent_size], -0.1, 0.1),
                               dtype=tf.float32, name="intent_W")
intent_b = tf.Variable(tf.zeros([intent_size]), dtype=tf.float32, name="intent_b")

# start intent
intent_logits = tf.add(tf.matmul(encoder_final_state_h, intent_W), intent_b)
intent = tf.argmax(intent_logits, axis=1)

sos_time_slice = tf.ones([batch_size], dtype=tf.int32, name='SOS') * 2
sos_step_embedded = tf.nn.embedding_lookup(embeddings, sos_time_slice)
pad_step_embedded = tf.zeros([batch_size, hidden_size * 2 + embedding_size],
                             dtype=tf.float32)

Like the Encoder above, the standard `tf.nn.dynamic_rnn` requires all input to be prepended to a tensor in advance.

When the Decoder needs to use the output of the previous time node, it is not possible to package it in advance. The standard dynamic rnn is equivalent to: $s_i = f(s_{i-1}, x_i)$; but if the parameters of this function need to be extended, for example we do: $s_i = f(s_{i-1}, Y_{i-1}, h_i, c_i)$.

So we need Hack: Use `tf.contrib.seq2seq.CustomHelper` to pass in three functions:

- `initial_fn()`：The first point of time input.
- `sample_fn()`：How to determine a certain fixed category id from logit to.
- `next_inputs_fn()`：Determine the input of the general time point.


In [8]:
def initial_fn():
    initial_elements_finished = (0 >= decoder_lengths)  # all False at the initial step
    initial_input = tf.concat((sos_step_embedded, encoder_outputs[0]), 1)
    return initial_elements_finished, initial_input

def sample_fn(time, outputs, state):
    # Select logit's largest subscript as sample
    prediction_id = tf.to_int32(tf.argmax(outputs, axis=1))
    return prediction_id

def next_inputs_fn(time, outputs, state, sample_ids):
    # The output class on the previous time node, gets embedding and enters as the next time node
    pred_embedding = tf.nn.embedding_lookup(embeddings, sample_ids)
    # The input is h_i+o_{i-1}+c_i
    next_input = tf.concat((pred_embedding, encoder_outputs[time]), 1)
    elements_finished = (time >= decoder_lengths)  # this operation produces boolean tensor of [batch_size]
    all_finished = tf.reduce_all(elements_finished)  # -> boolean scalar
    next_inputs = tf.cond(all_finished, lambda: pad_step_embedded, lambda: next_input)
    next_state = state
    return elements_finished, next_inputs, next_state

# Define your own helper
my_helper = tf.contrib.seq2seq.CustomHelper(initial_fn, sample_fn, next_inputs_fn)

def decode(helper, scope, reuse=None):
    with tf.variable_scope(scope, reuse=reuse):
        memory = tf.transpose(encoder_outputs, [1, 0, 2])
        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
            num_units=hidden_size, memory=memory,
            memory_sequence_length=encoder_inputs_actual_length)
        cell = tf.contrib.rnn.LSTMCell(num_units=hidden_size * 2)
        attn_cell = tf.contrib.seq2seq.AttentionWrapper(
            cell, attention_mechanism, attention_layer_size=hidden_size)
        out_cell = tf.contrib.rnn.OutputProjectionWrapper(
            attn_cell, slot_size, reuse=reuse
        )
        decoder = tf.contrib.seq2seq.BasicDecoder(
            cell=out_cell, helper=helper,
            initial_state=out_cell.zero_state(
                dtype=tf.float32, batch_size=batch_size))
        # initial_state=encoder_final_state)
        final_outputs, final_state, final_sequence_lengths = tf.contrib.seq2seq.dynamic_decode(
            decoder=decoder, output_time_major=True,
            impute_finished=True, maximum_iterations=input_steps
        )
        return final_outputs
    
outputs = decode(my_helper, 'decode')

print("outputs: ", outputs)
print("outputs.rnn_output: ", outputs.rnn_output)
print("outputs.sample_id: ", outputs.sample_id)

outputs:  BasicDecoderOutput(rnn_output=<tf.Tensor 'decode/decoder/TensorArrayStack/TensorArrayGatherV3:0' shape=(?, 16, 122) dtype=float32>, sample_id=<tf.Tensor 'decode/decoder/TensorArrayStack_1/TensorArrayGatherV3:0' shape=(?, 16) dtype=int32>)
outputs.rnn_output:  Tensor("decode/decoder/TensorArrayStack/TensorArrayGatherV3:0", shape=(?, 16, 122), dtype=float32)
outputs.sample_id:  Tensor("decode/decoder/TensorArrayStack_1/TensorArrayGatherV3:0", shape=(?, 16), dtype=int32)


In [9]:
decoder_prediction = outputs.sample_id
decoder_max_steps, decoder_batch_size, decoder_dim = tf.unstack(tf.shape(outputs.rnn_output))
decoder_targets_time_majored = tf.transpose(decoder_targets, [1, 0])
decoder_targets_true_length = decoder_targets_time_majored[:decoder_max_steps]
print("decoder_targets_true_length: ", decoder_targets_true_length)

decoder_targets_true_length:  Tensor("strided_slice_1:0", shape=(?, 16), dtype=int32)


In [10]:
# Define mask so that padding does not count towards loss
mask = tf.to_float(tf.not_equal(decoder_targets_true_length, 0))

# Defining the loss of the slot label
loss_slot = tf.contrib.seq2seq.sequence_loss(
    outputs.rnn_output, decoder_targets_true_length, weights=mask)

# Defining loss of intent classification
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
    labels=tf.one_hot(intent_targets, depth=intent_size, dtype=tf.float32),
    logits=intent_logits)
loss_intent = tf.reduce_mean(cross_entropy)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.



##### TRAIN

In [11]:
# METRICS FUNCTIONS
import numpy.ma as ma
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

loss = loss_slot + loss_intent
optimizer = tf.train.AdamOptimizer(name="a_optimizer")
grads, vars = zip(*optimizer.compute_gradients(loss))
gradients, _ = tf.clip_by_global_norm(grads, 5)  # clip gradients
train_op = optimizer.apply_gradients(zip(grads, vars))

def step(sess, mode, trarin_batch):
    """ perform each batch"""
    if mode not in ['train', 'test']:
        print >> sys.stderr, 'mode is not supported'
        sys.exit(1)
    unziped = list(zip(*trarin_batch))
    if mode == 'train':
        output_feeds = [train_op, loss, decoder_prediction,
                        intent]
        feed_dict = {encoder_inputs: np.transpose(unziped[0], [1, 0]),
                     encoder_inputs_actual_length: unziped[1],
                     decoder_targets: unziped[2],
                     intent_targets: unziped[3]}
    if mode in ['test']:
        output_feeds = [decoder_prediction, intent]
        feed_dict = {encoder_inputs: np.transpose(unziped[0], [1, 0]),
                     encoder_inputs_actual_length: unziped[1]}

    results = sess.run(output_feeds, feed_dict=feed_dict)
    return results

def accuracy_score(true_data, pred_data, true_length=None):
    true_data = np.array(true_data)
    pred_data = np.array(pred_data)
    assert true_data.shape == pred_data.shape
    if true_length is not None:
        val_num = np.sum(true_length)
        assert val_num != 0
        res = 0
        for i in range(true_data.shape[0]):
            res += np.sum(true_data[i, :true_length[i]] == pred_data[i, :true_length[i]])
    else:
        val_num = np.prod(true_data.shape)
        assert val_num != 0
        res = np.sum(true_data == pred_data)
    res /= float(val_num)
    return res

def get_data_from_sequence_batch(true_batch, pred_batch, padding_token):
    """Extract data from a sequence of batches：
    [[3,1,2,0,0,0],[5,2,1,4,0,0]] -> [3,1,2,5,2,1,4]"""
    true_ma = ma.masked_equal(true_batch, padding_token)
    pred_ma = ma.masked_array(pred_batch, true_ma.mask)
    true_ma = true_ma.flatten()
    pred_ma = pred_ma.flatten()
    true_ma = true_ma[~true_ma.mask]
    pred_ma = pred_ma[~pred_ma.mask]
    return true_ma, pred_ma

def f1_for_sequence_batch(true_batch, pred_batch, average="micro", padding_token=0):
    true, pred = get_data_from_sequence_batch(true_batch, pred_batch, padding_token)
    labels = list(set(true))
    return f1_score(true, pred, labels=labels, average=average)

def accuracy_for_sequence_batch(true_batch, pred_batch, padding_token=0):
    true, pred = get_data_from_sequence_batch(true_batch, pred_batch, padding_token)
    return accuracy_score(true, pred)

def getBatch(batch_size, train_data):
    random.shuffle(train_data)
    sindex = 0
    eindex = batch_size
    while eindex < len(train_data):
        batch = train_data[sindex:eindex]
        temp = eindex
        eindex = eindex + batch_size
        sindex = temp
        yield batch
        
def getBatch2(batch_size, train_data):
    sindex = 0
    eindex = batch_size
    while eindex < len(train_data):
        batch = train_data[sindex:eindex]
        temp = eindex
        eindex = eindex + batch_size
        sindex = temp
        yield batch

##### MODEL EXECUTION

In [12]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

for epoch in range(epoch_num):
    mean_loss = 0.0
    train_loss = 0.0
    for i, batch in enumerate(getBatch(batch_size, index_train)):
        # Perform a batch training
        _, loss_v, decoder_prediction_v, intent_v = step(sess, "train", batch)
        mean_loss += loss_v
        train_loss += loss_v
        if i % 30 == 0:
            if i > 0:
                mean_loss = mean_loss / 30.0
            print('Average train loss at epoch %d, step %d: %f' % (epoch, i, mean_loss))
            mean_loss = 0
    train_loss /= (i + 1)
    print("[Epoch {}] Average train loss: {}".format(epoch, train_loss))

    # One epoch per training, test once
    pred_slots = []
    for j, batch in enumerate(getBatch(batch_size, index_test)):
        decoder_prediction_v, intent_v = step(sess, "test", batch)
        decoder_prediction_v = np.transpose(decoder_prediction_v, [1, 0])
        if j == 0:
            index = random.choice(range(len(batch)))
            print("Input Sentence        : ", index_seq2word(batch[index][0], index2word))
            print("Slot Truth            : ", index_seq2slot(batch[index][2], index2slot))
            print("Slot Prediction       : ", index_seq2slot(decoder_prediction_v[index], index2slot))
            print("Intent Truth          : ", index2intent[batch[index][3]])
            print("Intent Prediction     : ", index2intent[intent_v[index]])
        
        slot_pred_length = list(np.shape(decoder_prediction_v))[1]
        pred_padded = np.lib.pad(decoder_prediction_v, ((0, 0), (0, input_steps-slot_pred_length)),
                                 mode="constant", constant_values=0)
        pred_slots.append(pred_padded)
        true_slot = np.array((list(zip(*batch))[2]))
        true_length = np.array((list(zip(*batch))[1]))
        true_slot = true_slot[:, :slot_pred_length]
        slot_acc = accuracy_score(true_slot, decoder_prediction_v, true_length)
        intent_acc = accuracy_score(list(zip(*batch))[3], intent_v)
        print("slot accuracy: {}, intent accuracy: {}".format(slot_acc, intent_acc))
    
    pred_slots_a = np.vstack(pred_slots)
    true_slots_a = np.array(list(zip(*index_test))[2])[:pred_slots_a.shape[0]]
    print("F1 score for epoch {}: {}".format(epoch, f1_for_sequence_batch(true_slots_a, pred_slots_a)))

Average train loss at epoch 0, step 0: 7.889609
Average train loss at epoch 0, step 30: 4.790801
Average train loss at epoch 0, step 60: 2.904990
Average train loss at epoch 0, step 90: 2.650847
Average train loss at epoch 0, step 120: 2.308156
Average train loss at epoch 0, step 150: 2.325435
Average train loss at epoch 0, step 180: 2.133970
Average train loss at epoch 0, step 210: 1.873107
Average train loss at epoch 0, step 240: 1.940194
Average train loss at epoch 0, step 270: 1.659800
[Epoch 0] Average train loss: 2.4973366937756967
Input Sentence        :  ['from', 'philadelphia', 'to', 'toronto', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<

slot accuracy: 0.8735632183908046, intent accuracy: 0.8125
slot accuracy: 0.9456521739130435, intent accuracy: 0.9375
slot accuracy: 0.8578947368421053, intent accuracy: 0.8125
slot accuracy: 0.9025974025974026, intent accuracy: 0.8125
slot accuracy: 0.8928571428571429, intent accuracy: 0.875
slot accuracy: 0.9033816425120773, intent accuracy: 1.0
slot accuracy: 0.9141104294478528, intent accuracy: 0.8125
slot accuracy: 0.8789808917197452, intent accuracy: 1.0
slot accuracy: 0.9385474860335196, intent accuracy: 1.0
slot accuracy: 0.8755980861244019, intent accuracy: 1.0
slot accuracy: 0.8461538461538461, intent accuracy: 1.0
slot accuracy: 0.9411764705882353, intent accuracy: 0.8125
slot accuracy: 0.8723404255319149, intent accuracy: 1.0
slot accuracy: 0.8941176470588236, intent accuracy: 0.9375
slot accuracy: 0.8670212765957447, intent accuracy: 0.9375
slot accuracy: 0.8764044943820225, intent accuracy: 0.9375
slot accuracy: 0.8666666666666667, intent accuracy: 0.8125
slot accuracy: 0

slot accuracy: 0.936046511627907, intent accuracy: 0.8125
slot accuracy: 0.9272727272727272, intent accuracy: 1.0
slot accuracy: 0.9217877094972067, intent accuracy: 1.0
slot accuracy: 0.9082125603864735, intent accuracy: 0.875
slot accuracy: 0.93125, intent accuracy: 0.9375
slot accuracy: 0.9526627218934911, intent accuracy: 1.0
slot accuracy: 0.9593023255813954, intent accuracy: 0.9375
slot accuracy: 0.9565217391304348, intent accuracy: 0.9375
F1 score for epoch 4: 0.9380843182219086
Average train loss at epoch 5, step 0: 0.483348
Average train loss at epoch 5, step 30: 0.262663
Average train loss at epoch 5, step 60: 0.266014
Average train loss at epoch 5, step 90: 0.268306
Average train loss at epoch 5, step 120: 0.207508
Average train loss at epoch 5, step 150: 0.240003
Average train loss at epoch 5, step 180: 0.242127
Average train loss at epoch 5, step 210: 0.225100
Average train loss at epoch 5, step 240: 0.244754
Average train loss at epoch 5, step 270: 0.280106
[Epoch 5] Aver

Average train loss at epoch 7, step 180: 0.183308
Average train loss at epoch 7, step 210: 0.170131
Average train loss at epoch 7, step 240: 0.136500
Average train loss at epoch 7, step 270: 0.129970
[Epoch 7] Average train loss: 0.15683271004200836
Input Sentence        :  ['show', 'me', 'all', 'united', 'flights', 'from', 'denver', 'to', 'san', 'francisco', 'for', 'september', 'first', '1991', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>']
Slot Truth            :  ['O', 'O', 'O', 'B-airline_name', 'O', 'O', 'B-fromloc.city_name', 'O', 'B-toloc.city_name', 'I-toloc.city_name', 'O', 'B-depart_date.month_name', 'B-depart_date.day_number', 'B-depart_date.year', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '

slot accuracy: 0.9760479041916168, intent accuracy: 0.9375
slot accuracy: 0.943127962085308, intent accuracy: 1.0
slot accuracy: 0.9886363636363636, intent accuracy: 1.0
slot accuracy: 0.9747474747474747, intent accuracy: 1.0
slot accuracy: 0.9649122807017544, intent accuracy: 0.875
slot accuracy: 0.9876543209876543, intent accuracy: 1.0
slot accuracy: 0.9822485207100592, intent accuracy: 1.0
slot accuracy: 0.9552238805970149, intent accuracy: 1.0
slot accuracy: 0.9842931937172775, intent accuracy: 1.0
slot accuracy: 1.0, intent accuracy: 1.0
slot accuracy: 0.956989247311828, intent accuracy: 1.0
slot accuracy: 0.9782608695652174, intent accuracy: 0.9375
slot accuracy: 0.9701492537313433, intent accuracy: 1.0
slot accuracy: 0.9488636363636364, intent accuracy: 0.9375
slot accuracy: 0.9203980099502488, intent accuracy: 0.9375
slot accuracy: 0.9681528662420382, intent accuracy: 0.9375
slot accuracy: 0.9722222222222222, intent accuracy: 1.0
slot accuracy: 0.9368421052631579, intent accura

slot accuracy: 0.967741935483871, intent accuracy: 0.9375
slot accuracy: 1.0, intent accuracy: 1.0
slot accuracy: 0.9696969696969697, intent accuracy: 1.0
slot accuracy: 0.9555555555555556, intent accuracy: 1.0
slot accuracy: 0.9896373056994818, intent accuracy: 0.9375
slot accuracy: 0.9722222222222222, intent accuracy: 0.875
F1 score for epoch 11: 0.9695736776932602
Average train loss at epoch 12, step 0: 0.027130
Average train loss at epoch 12, step 30: 0.033705
Average train loss at epoch 12, step 60: 0.050137
Average train loss at epoch 12, step 90: 0.042023
Average train loss at epoch 12, step 120: 0.037505
Average train loss at epoch 12, step 150: 0.032571
Average train loss at epoch 12, step 180: 0.036786
Average train loss at epoch 12, step 210: 0.039579
Average train loss at epoch 12, step 240: 0.034880
Average train loss at epoch 12, step 270: 0.055085
[Epoch 12] Average train loss: 0.04075843524221184
Input Sentence        :  ['what', 'kind', 'of', 'aircraft', 'does', 'delta

slot accuracy: 0.9484536082474226, intent accuracy: 0.875
slot accuracy: 0.95, intent accuracy: 1.0
slot accuracy: 0.987012987012987, intent accuracy: 1.0
slot accuracy: 0.973404255319149, intent accuracy: 1.0
slot accuracy: 0.9696969696969697, intent accuracy: 0.9375
slot accuracy: 0.9950248756218906, intent accuracy: 1.0
slot accuracy: 0.9945945945945946, intent accuracy: 1.0
slot accuracy: 0.9936305732484076, intent accuracy: 0.9375
slot accuracy: 0.9712643678160919, intent accuracy: 1.0
slot accuracy: 0.9803921568627451, intent accuracy: 1.0
slot accuracy: 0.9246231155778895, intent accuracy: 0.875
slot accuracy: 0.9880952380952381, intent accuracy: 1.0
slot accuracy: 0.9702970297029703, intent accuracy: 1.0
slot accuracy: 0.9955357142857143, intent accuracy: 0.9375
slot accuracy: 0.9516129032258065, intent accuracy: 0.9375
slot accuracy: 0.974025974025974, intent accuracy: 1.0
slot accuracy: 0.9640287769784173, intent accuracy: 0.9375
slot accuracy: 0.9941520467836257, intent accu

### PREDICTION

In [34]:
# in here type your inquire fpr the airline
#my_sentence = 'list types of aircraft that fly between boston and san francisco'
my_sentence = 'may I know how much is the cheapest flight from Los Angeles to New York'

# this code will adapt your sentence to the input data structure
input_data = ['BOS ' + my_sentence.lower() + '\t\n']
input_data = data_pipeline(input_data, length=50)
print(input_data[0])

(['may', 'i', 'know', 'how', 'much', 'is', 'the', 'cheapest', 'flight', 'from', 'los', 'angeles', 'to', 'new', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>'], ['<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>'], '')


In [35]:
# this will transfor the text into indices
index_sentence = to_index(input_data, word2index, slot2index, intent2index)

# this will augment the input size so it can later fit in the model
index_sentence = index_sentence + index_test.copy()

print('TEST DATA STRUCTURE:')
print('ORIGINAL_index | UNPADDED_seqLength | LABELED_index | INDENT\n')
print(index_sentence[0])

TEST DATA STRUCTURE:
ORIGINAL_index | UNPADDED_seqLength | LABELED_index | INDENT

[[173, 22, 794, 582, 31, 133, 400, 515, 471, 782, 613, 715, 391, 172, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 14, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 0]


In [36]:
#### Output prediction for the text you input manually
for j, batch in enumerate(getBatch2(batch_size, index_sentence)):
    decoder_prediction_v, intent_v = step(sess, "test", batch)
    decoder_prediction_v = np.transpose(decoder_prediction_v, [1, 0])
    if j == 0:
        index = 0
        print("Your Sentence is      : ", my_sentence, "\n")
        print("Input Sentence        : ", index_seq2word(batch[index][0], index2word), "\n")
        print("Slot Prediction       : ", index_seq2slot(decoder_prediction_v[index], index2slot), "\n")
        print("Intent Prediction     : ", index2intent[intent_v[index]])

Your Sentence is      :  may I know how much is the cheapest flight from Los Angeles to New York 

Input Sentence        :  ['may', 'i', 'know', 'how', 'much', 'is', 'the', 'cheapest', 'flight', 'from', 'los', 'angeles', 'to', 'new', '<EOS>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>'] 

Slot Prediction       :  ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-cost_relative', 'O', 'O', 'B-fromloc.city_name', 'I-fromloc.city_name', 'O', 'B-toloc.city_name', 'I-toloc.city_name', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>', '<PAD>'] 

Intent Prediction     :  atis_airfare
