In [1]:
import glob
import numpy as np
import pickle
from tqdm import tqdm
import tensorflow as tf
import tensorflow.contrib.legacy_seq2seq as seq2seq
import random
import json
import os
import time
import load_data
import nltk
from IPython.display import HTML
import utilities
from time import gmtime, strftime
import random
from nltk.translate.bleu_score import sentence_bleu
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

  from ._conv import register_converters as _register_converters


In [2]:
ques_data = load_data.load_ip('../gen_data/Musical_Instruments/Musical_Instruments_question_file.txt')
rev_data = load_data.load_ip('../gen_data/Musical_Instruments/Musical_Instruments_review_file.txt')
ans_data = load_data.load_labels('../gen_data/Musical_Instruments/Musical_Instruments_answer_file.txt')

zipped = list(zip(ques_data, rev_data, ans_data))
random.shuffle(zipped)
ques_data, rev_data, ans_data = zip(*zipped)

data = {"questions": ques_data,
        "reviews": rev_data,
        "answers": ans_data}

In [3]:
def get_unique_words(data):
    unique_words = []

    for index in range(len(data["questions"])):
        sample_data = data["questions"][index] + data["reviews"][index] + data["answers"][index]
        sample_data = list(set(sample_data))
        unique_words.extend(sample_data)
        unique_words = list(set(unique_words))

    unique_words = ["<PAD>"] + ["<UNK>"] + unique_words

    return unique_words

In [4]:
def build_vocabs(unique_words):
    word2idx = {value:index for index, value in enumerate(unique_words)}
    idx2word = {index:value for index, value in enumerate(unique_words)}
    
    return word2idx, idx2word, len(word2idx)

In [5]:
# unique_words = get_unique_words(data)
# with open("unique_words.p", "wb") as pickle_d:
#     pickle.dump(unique_words, pickle_d)

In [6]:
unique_words = pickle.load(open('unique_words.p', 'rb'))
word2idx, idx2word, vocab_size = build_vocabs(unique_words)
print (vocab_size, word2idx["<START>"], word2idx["<PAD>"], word2idx["<EOS>"], word2idx["<UNK>"])

10277 3559 0 2108 1


In [7]:
train_samples = 55000
train_X = [data["questions"][:train_samples], data["reviews"][:train_samples]]
train_Y = data["answers"][:train_samples]
test_X = [data["questions"][train_samples:], data["reviews"][train_samples:]]
test_Y = data["answers"][train_samples:]
print (len(train_X[1]))

55000


In [8]:
def pad_sequences(sequences, lengths, batch_size):
    max_len = max(lengths)
    for i in range (batch_size):
        diff = max_len - lengths[i]
        sequences[i] += [word2idx["<PAD>"]] * diff

    return np.asarray(sequences)

In [9]:
def batch_data(X, Y, batch_size):
    start = 0
    while start + batch_size <= len(X[0]):
        enc_batch_input = list()
        con_enc_batch_input = list()
        dec_batch_input = list()
        dec_batch_target = list()
        enc_inp_lens = list()
        con_enc_inp_lens = list()
        dec_inp_lens = list()
        target_w = list()
        for index in range(start, start + batch_size):
            batch_index = index - start

            enc_batch_input.append([])
            for word_index, word in enumerate(X[0][index]):
                enc_batch_input[-1].append(word2idx[word])
            enc_inp_lens.append(len(enc_batch_input[-1]))
            
            con_enc_batch_input.append([])
            for word_index, word in enumerate(X[1][index]):
                con_enc_batch_input[-1].append(word2idx[word])
            con_enc_inp_lens.append(len(con_enc_batch_input[-1]))

            dec_batch_input.append([])
            for word_index, word in enumerate(Y[index][:-1]):
                dec_batch_input[-1].append(word2idx[word])
            dec_inp_lens.append(len(dec_batch_input[-1]))

            dec_batch_target.append([])
            for word_index, word in enumerate(Y[index][1:]):
                dec_batch_target[-1].append(word2idx[word])
        
        for batch_i in range(batch_size):
            pad = [1] * dec_inp_lens[batch_i]
            diff = max(dec_inp_lens) - dec_inp_lens[batch_i]
            pad.extend([0] * diff)
            target_w.append(pad)

        enc_batch_input = pad_sequences(enc_batch_input, enc_inp_lens, batch_size)
        con_enc_batch_input = pad_sequences(con_enc_batch_input, con_enc_inp_lens, batch_size)
        dec_batch_input = pad_sequences(dec_batch_input, dec_inp_lens, batch_size)
        dec_batch_target = pad_sequences(dec_batch_target, dec_inp_lens, batch_size)

        enc_inp_lens = np.asarray(enc_inp_lens)
        con_enc_inp_lens = np.asarray(con_enc_inp_lens)
        dec_inp_lens = np.asarray(dec_inp_lens)
        target_w = np.asarray(target_w)

        yield enc_batch_input, con_enc_batch_input, dec_batch_input, dec_batch_target, enc_inp_lens, \
            con_enc_inp_lens, dec_inp_lens, target_w
        
        start += batch_size

In [10]:
embedding_size = 300
input_num_units = 96
context_num_units = 32
decoder_num_units = 256
keep_prob = 0.75

assert ((input_num_units+context_num_units) * 2 == decoder_num_units)

In [11]:
tf.reset_default_graph()
sess = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True))

encoder_inputs = tf.placeholder(tf.int32, [None, None], 'encoder_inputs')
context_encoder_inputs = tf.placeholder(tf.int32, [None, None], 'context_encoder_inputs')
decoder_inputs = tf.placeholder(tf.int32, [None, None], 'decoder_inputs')
decoder_targets = tf.placeholder(tf.int32, [None, None], 'decoder_targets')
encoder_lengths = tf.placeholder(tf.int32, [None], 'encoder_lengths')
context_encoder_lengths = tf.placeholder(tf.int32, [None], 'context_encoder_lengths')
decoder_lengths = tf.placeholder(tf.int32, [None], 'decoder_lengths')
target_weights = tf.placeholder(tf.float32, [None, None], 'target_weights')
learning_rate = tf.placeholder(tf.float32, [], 'learning_rate')
batch_size = tf.placeholder(tf.int32, [], 'batch_size')

# Embedding
with tf.variable_scope("embeddings"):
    embedding_encoder = tf.get_variable(
        "embedding_encoder", [vocab_size, embedding_size])

    encoder_emb_inp = tf.nn.embedding_lookup(
        embedding_encoder, encoder_inputs)

    context_encoder_emb_inp = tf.nn.embedding_lookup(
        embedding_encoder, context_encoder_inputs)

    decoder_emb_inp = tf.nn.embedding_lookup(
        embedding_encoder, decoder_inputs)


with tf.variable_scope('encoder_lstm'):
    enc_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(
        input_num_units, state_is_tuple=True, name="enc_fw")
    
    enc_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(
        input_num_units, state_is_tuple=True, name="enc_bw")
    
    enc_fw_cell = tf.contrib.rnn.DropoutWrapper(
        enc_fw_cell, input_keep_prob = keep_prob)
    
    enc_bw_cell = tf.contrib.rnn.DropoutWrapper(
        enc_bw_cell, input_keep_prob = keep_prob)

#     encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
#         encoder_cell, encoder_emb_inp,
#         sequence_length=encoder_lengths, dtype=tf.float32)

    enc_bi_outputs, encoder_state = tf.nn.bidirectional_dynamic_rnn(enc_fw_cell, enc_bw_cell, \
                                                                encoder_emb_inp, \
                                                                sequence_length=encoder_lengths, \
                                                                time_major=False, dtype=tf.float32)
    encoder_outputs = tf.concat(enc_bi_outputs, -1)


with tf.variable_scope('context_encoder_lstm'):
    ce_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(
        context_num_units, state_is_tuple=True, name="ce_fw")

    ce_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(
        context_num_units, state_is_tuple=True, name="ce_bw")
    
    ce_fw_cell = tf.contrib.rnn.DropoutWrapper(
        ce_fw_cell, input_keep_prob = keep_prob)
    
    ce_bw_cell = tf.contrib.rnn.DropoutWrapper(
        ce_bw_cell, input_keep_prob = keep_prob)

#     context_encoder_outputs, context_encoder_state = tf.nn.dynamic_rnn(
#         context_encoder_cell, context_encoder_emb_inp,
#         sequence_length=context_encoder_lengths, dtype=tf.float32)

    ce_bi_outputs, context_encoder_state = tf.nn.bidirectional_dynamic_rnn(ce_fw_cell, ce_bw_cell, \
                                                                context_encoder_emb_inp, \
                                                                sequence_length=context_encoder_lengths, \
                                                                time_major=False, dtype=tf.float32)
    context_encoder_outputs = tf.concat(ce_bi_outputs, -1)

with tf.variable_scope('decoder_lstm'):
    total_c_state = tf.concat(axis=1,values=[encoder_state[0].c, encoder_state[1].c, \
                                             context_encoder_state[0].c, context_encoder_state[1].c])
    total_h_state = tf.concat(axis=1,values=[encoder_state[0].h, encoder_state[1].h, \
                                             context_encoder_state[0].h, context_encoder_state[1].h])

    total_state = tf.contrib.rnn.LSTMStateTuple(total_c_state, total_h_state)

    decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(decoder_num_units, state_is_tuple=True, name="dec_lstm")
    
    decoder_cell = tf.contrib.rnn.DropoutWrapper(
        decoder_cell, input_keep_prob = keep_prob)

    projection_layer = tf.layers.Dense(
        vocab_size, use_bias=False)

    # attention_states: [batch_size, max_time, num_units]

    attention_states = context_encoder_outputs # tf.transpose

    # Create an attention mechanism
    attention_mechanism = tf.contrib.seq2seq.LuongAttention(
        decoder_num_units, attention_states,
        memory_sequence_length=decoder_lengths)
    
    decoder_cell = tf.contrib.seq2seq.AttentionWrapper(
        decoder_cell, attention_mechanism,
        attention_layer_size=decoder_num_units)
    
    initial_state = decoder_cell.zero_state(dtype = tf.float32, batch_size=batch_size)
    initial_state = initial_state.clone(cell_state=total_state)

    # Helper
    helper = tf.contrib.seq2seq.TrainingHelper(
        decoder_emb_inp, decoder_lengths)

    # Decoder
    decoder = tf.contrib.seq2seq.BasicDecoder(
        decoder_cell, helper, initial_state,
        output_layer=projection_layer)

    # Dynamic decoding
    decoder_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder)

    logits = decoder_outputs.rnn_output

with tf.variable_scope('loss'):
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=decoder_targets, logits=logits)

    train_loss = tf.reduce_sum(loss * target_weights)

with tf.variable_scope('optimization'):
    # Calculate and clip gradients
    max_gradient_norm = 1
    params = tf.trainable_variables()
    gradients = tf.gradients(train_loss, params)
    clipped_gradients, _ = tf.clip_by_global_norm(
        gradients, max_gradient_norm)

    # Optimization
    optimizer = tf.train.AdamOptimizer(learning_rate)
    update_step = optimizer.apply_gradients(
        zip(clipped_gradients, params))

saver = tf.train.Saver(max_to_keep=3)

In [12]:
sess.run(tf.global_variables_initializer())

In [13]:
num_epochs = 0

In [14]:
epochs = 10
bs = 35
for epoch_i in range(epochs):
    if epoch_i < 3:
        lr = 0.01
    elif epoch_i >= 3 and epoch_i < 6:
        lr = 0.005
    elif epoch_i >= 7 and epoch_i < epochs:
        lr = 0.001
        
    start_time = time.time()

    for batch_i, (enc_batch_inputs, con_enc_batch_inputs, dec_batch_inputs, dec_batch_targets, \
                  enc_inp_lens, con_enc_inp_lens, dec_inp_lens, target_w) \
    in enumerate(tqdm(batch_data(train_X, train_Y, bs))):
        _, batch_loss, batch_logits = sess.run([update_step, train_loss, logits],
            feed_dict = {encoder_inputs: enc_batch_inputs,
             context_encoder_inputs: con_enc_batch_inputs,
             decoder_inputs: dec_batch_inputs,
             decoder_targets: dec_batch_targets,
             encoder_lengths: enc_inp_lens,
             context_encoder_lengths: con_enc_inp_lens,
             decoder_lengths: dec_inp_lens,
             target_weights: target_w,
             learning_rate: lr,
             batch_size: bs})
    num_epochs += 1
    accuracy = np.mean(batch_logits.argmax(axis=-1) == dec_batch_targets)
    print('Epoch:', epoch_i+1, 'Loss:', batch_loss/bs, 'Accuracy:', accuracy, 'Epoch duration:', (time.time() - start_time), 's')
    saver.save(sess, './checkpoints/epoch_'+str(num_epochs)+"_"+str(strftime("%Y-%m-%d_%H:%M:%S")))

1571it [29:12,  1.12s/it]


Epoch: 1 Loss: 87.00921456473215 Accuracy: 0.0970873786407767 Epoch duration: 1752.9742629528046 s


1571it [29:08,  1.11s/it]


Epoch: 2 Loss: 74.86971261160714 Accuracy: 0.10762829403606103 Epoch duration: 1748.4938309192657 s


1571it [29:02,  1.11s/it]


Epoch: 3 Loss: 73.48782784598214 Accuracy: 0.10901525658807212 Epoch duration: 1742.0963327884674 s


1571it [29:02,  1.11s/it]


Epoch: 4 Loss: 50.211802455357144 Accuracy: 0.1319001386962552 Epoch duration: 1742.6694738864899 s


1571it [29:03,  1.11s/it]


Epoch: 5 Loss: 44.30073939732143 Accuracy: 0.1378640776699029 Epoch duration: 1743.3598012924194 s


1571it [29:04,  1.11s/it]


Epoch: 6 Loss: 42.50959821428572 Accuracy: 0.140499306518724 Epoch duration: 1744.85400724411 s


1571it [29:05,  1.11s/it]


Epoch: 7 Loss: 43.1927001953125 Accuracy: 0.1407766990291262 Epoch duration: 1745.5899007320404 s


1571it [29:05,  1.11s/it]


Epoch: 8 Loss: 28.077804129464287 Accuracy: 0.15963938973647712 Epoch duration: 1745.6755084991455 s


1571it [29:05,  1.11s/it]


Epoch: 9 Loss: 27.729481724330356 Accuracy: 0.15908460471567268 Epoch duration: 1746.0738470554352 s


1571it [29:03,  1.11s/it]


Epoch: 10 Loss: 25.946843610491072 Accuracy: 0.16033287101248267 Epoch duration: 1743.9524500370026 s


In [11]:
def num2sent(pred, mode, seq_len=None):
    res = ""
    if mode == "q":
        pred = pred[:seq_len]        
        for idx in pred:
            res += idx2word[idx] + " "
    elif mode == "t":
        pred = pred[:seq_len]
        for idx in pred:
            res += idx2word[idx] + " "
    elif mode == "a":
        pred = pred[1:-1]
        for idx in pred:
            res += idx2word[idx] + " "
    return res, pred

In [12]:
def test_sample(test_sess, ques, ques_len, review, rev_len, bs=1):
    dec_input = np.zeros((1, 1)) + word2idx['<START>']
    dec_len = [1]
    while dec_input[0, -1] != word2idx['<EOS>']:
        batch_logits = test_sess.run("decoder_lstm/decoder/transpose:0",
                       feed_dict = {"encoder_inputs:0": [ques],
                                    "context_encoder_inputs:0": [review],
                                    "decoder_inputs:0": dec_input,
                                    "encoder_lengths:0": [ques_len],
                                    "context_encoder_lengths:0": [rev_len],
                                    "decoder_lengths:0": dec_len,
                                    "batch_size:0": bs})
        prediction = batch_logits[:,-1].argmax(axis=-1)
        dec_len[0] += 1

        dec_input = np.hstack([dec_input, prediction[:,None]])

    return dec_input[0]

In [26]:
# Testing
test_batch_size = 35
model = "epoch_10_2018-04-23_09:25:56"
with tf.Session() as test_sess:
    saver = tf.train.import_meta_graph('./checkpoints/'+model+'.meta')
    saver.restore(test_sess, tf.train.latest_checkpoint('./checkpoints/'))
    bleu_scores = []
    for batch_i, (enc_batch_inputs, con_enc_batch_inputs, dec_batch_inputs, dec_batch_targets, \
        enc_inp_lens, con_enc_inp_lens, dec_inp_lens, target_w) \
        in enumerate(tqdm(batch_data([test_X[0][-105:], test_X[1][-105:]], test_Y[-105:], test_batch_size))):

        for index, sample in enumerate(enc_batch_inputs):
            pred = test_sample(test_sess, sample, enc_inp_lens[index], con_enc_batch_inputs[index], \
                               con_enc_inp_lens[index])

            ip_str, ip_list = num2sent(sample, mode="q", seq_len=enc_inp_lens[index])
            target_str, target_list = num2sent(dec_batch_targets[index], mode="t", seq_len=dec_inp_lens[index])
            pred_str, pred_list = num2sent(pred, mode="a")

            bleu_scores.append(sentence_bleu([target_list], pred_list))

    print("Average BLEU score:", np.mean(bleu_scores))

INFO:tensorflow:Restoring parameters from ./checkpoints/epoch_10_2018-04-23_09:25:56



Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().

1it [00:36, 36.64s/it][A
Corpus/Sentence contains 0 counts of 4-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().

3it [01:12, 24.30s/it][A
[A

Average BLEU score: 0.16741537486766345


In [14]:
# Demo
model = "epoch_10_2018-04-23_09:25:56"
with tf.Session() as demo_sess:
    saver = tf.train.import_meta_graph('./checkpoints/'+model+'.meta')
    saver.restore(demo_sess, tf.train.latest_checkpoint('./checkpoints/'))
    writer = tf.summary.FileWriter('logs', demo_sess.graph)
    ques = input("\nEnter question ('!q' to quit):\t")
    while ques != "!q":
        ques = nltk.word_tokenize(ques)
        ques = [token.lower() for token in ques]

        for idx, word in enumerate(ques):
            if word in word2idx:
                ques[idx] = word2idx[word]
            else:
                ques[idx] = word2idx["<UNK>"]

        ques += [word2idx['<EOS>']]

        review = input("\nEnter review:\t")
        
        review = nltk.word_tokenize(review)
        review = [token.lower() for token in review]

        for idx, word in enumerate(review):
            if word in word2idx:
                review[idx] = word2idx[word]
            else:
                review[idx] = word2idx["<UNK>"]

        review += [word2idx['<EOS>']]
        
        predict = test_sample(demo_sess, ques, len(ques), review, len(review))
        prediction, _ = num2sent(predict, mode="a")

        print ("\nAnswer:\t", prediction)

        ques = input("\nEnter question ('!q' to quit):\t")
    writer.close()

INFO:tensorflow:Restoring parameters from ./checkpoints/epoch_10_2018-04-23_09:25:56

Enter question ('!q' to quit):	Will this work for bass guitars as well ?

Enter review:	I use a lot of these guys and if they work when you get them, they havn't failed yet. I like it and have used HOSA products in the past with confidence. Thick, a bit stiff and seems to be shielded. I use this cord in my church to run from my pedalboard to my amp (which is in a soundproof box).

Answer:	 no 

Enter question ('!q' to quit):	Is plug and play simple ?

Enter review:	Record companies and file hosts are increasingly anachronistic due to tech like this JAM, and maybe soon enough we'll see artists keeping ALL of the profits from their art. Given the cost of an new iPhone or iPad, the cost of the JAM is rather trivial in comparison. It's simple to use, has no detectable latency, and delivers excellent sound quality. BUT: I haven't encountered anything so Plug & Play simple as the Apogee JAM 96k since Gefen 