In [4]:
import os
import sys
import numpy as np
import pandas as pd
sys.path.append('/home/kyle.shaffer/dialog_model_experiments/src')
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

from attention import *
from keras.models import Model
from keras.layers import Input, Embedding, Dense, Dropout, LSTM, Lambda
from keras.layers.merge import Add, Concatenate
from keras.layers.wrappers import TimeDistributed

In [13]:
import json

bpe_vocab_path = '/data/users/kyle.shaffer/dialog_data/cornell_movie/bpe/bpe_vocab15k.txt'

d = {}
with open(bpe_vocab_path, mode='r') as infile:
    ix = 1
    for line in infile:
        w, _ = line.strip().split('\t')
        d[w] = ix
        ix += 1
        
d['_pad_'] = 0
print(len(d))
print(d['_pad_'])
print('max ID:', max(d.values()))
print('min ID:', min(d.values()))

with open('/data/users/kyle.shaffer/dialog_data/cornell_movie/cakechat_model/tokens_index/movie_tok_ids.json', mode='w') as outfile:
    json.dump(d, outfile)

15029
0
max ID: 15028
min ID: 0


In [None]:
import keras.backend as K
from keras.layers import InputSpec
from keras.engine.topology import Layer
from keras import initializers as initializers, regularizers, constraints

class AttLayer(Layer):
    def __init__(self, attention_dim, **kwargs):
        self.init = initializers.get('normal')
        self.supports_masking = True
        self.attention_dim = attention_dim
        super(AttLayer, self).__init__()

    def build(self, input_shape):
        # print(input_shape)
        assert len(input_shape) == 3, "Expected input_shape == 3, found input_shape == {}!".format(len(input_shape))
        self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
        self.b = K.variable(self.init((self.attention_dim, )))
        self.u = K.variable(self.init((self.attention_dim, 1)))
        self.trainable_weights = [self.W, self.b, self.u]
        super(AttLayer, self).build(input_shape)

    def compute_mask(self, inputs, mask=None):
        return mask

    def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)

        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)
        # assert len(output.shape) == 2, "Found output shape: {}".format(len(output.shape))
        return output

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])

In [None]:
import numpy as np

W = np.random.normal(size=(200, 200))
b = np.random.normal(size=(200,))
u = np.random.normal(size=(200, 1))
x = np.random.normal(size=(10, 20, 200))

uit = np.dot(x, W)
print('uit:', uit.shape)
uit = np.add(uit, b)
print('uit:', uit.shape)

ait = np.dot(uit, u)
print('ait:', ait.shape)
ait = np.squeeze(ait, -1)
print('ait:', ait.shape)
ait = np.exp(ait)
print('ait:', ait.shape)
ait /= np.sum(ait, axis=1, keepdims=True)
print('ait:', ait.shape)
ait = np.expand_dims(ait, axis=-1)
print('ait:', ait.shape)

weighted_input = x * ait
print('weighted_input:', weighted_input.shape)

output = np.sum(weighted_input, axis=1)
print('output:', output.shape)

In [None]:
context_in = Input(shape=(None,), name='context_input')
current_in = Input(shape=(None,), name='current_input')
decoder_in = Input(shape=(None,), name='decoder_input')

embed_layer = Embedding(input_dim=20000, output_dim=200, mask_zero=True, name='embedding')
encoder = LSTM(units=200, return_sequences=True, return_state=True, name='word_encoder')
att_layer = AttLayer(attention_dim=200, name='word_attention')

# Encoder
context_embed = embed_layer(context_in)
current_embed = embed_layer(current_in)
context_encode, _, _ = encoder(context_embed)
current_encode, state_h, state_c = encoder(current_embed)
context_attend = att_layer(context_encode)
current_attend = att_layer(current_encode)
context_attend = Lambda(lambda x: K.expand_dims(x, -1))(context_attend)
current_attend = Lambda(lambda x: K.expand_dims(x, -1))(current_attend)
utterance_attend = Concatenate(axis=-1)([context_attend, current_attend])
utt_rnn = LSTM(units=100, return_sequences=True, name='utt_encoder')(utterance_attend)
cross_utterance_attention = AttLayer(attention_dim=100)(utt_rnn)

# Decoder
# decoder_embed = embed_layer(decoder_in)
# decoder_rnn = LSTM(units=200, return_sequences=True)(decoder_embed)
# decoder_combined_context = Concatenate(axis=-1)([cross_utterance_attention, decoder_rnn])
# logits_out = Dense(units=20000, activation='linear', name='logits')(decoder_combined_context)

model = Model(inputs=[context_in, current_in, decoder_in], outputs=cross_utterance_attention)
model.summary()

In [None]:
from keras.layers import RepeatVector
from keras.layers import Activation, Dot

def custom_repeat(args):
    seq_layer = args[0]
    repeat_layer = args[1]
    print('Seq layer:', seq_layer.shape)
    print('Repeat layer:', repeat_layer.shape)
    return RepeatVector(seq_layer.shape[1])(repeat_layer)

word_input = Input(shape=(None,), name='word_input')
decoder_input = Input(shape=(None,), name='decoder_input')
conversation_input = Input(shape=(None, None), name='conversation_input')

# ENCODER
# Word-level encoder params
embed_layer = Embedding(input_dim=20000, output_dim=200, mask_zero=True, name='embedding')
word_encoder = LSTM(units=200, return_sequences=True, return_state=False, name='word_encoder')
word_att_layer = AttLayer(attention_dim=200, name='word_attention')
# Utterance-level encoder params
utterance_encoder = LSTM(units=100, return_sequences=True, name='utterance_encoder')
utt_att_layer = AttLayer(attention_dim=100, name='utterance_attention')

word_embed = embed_layer(word_input)
word_encode = word_encoder(word_embed)
h_att_word = word_att_layer(word_encode)
print('word_encode:', word_encode.shape)
print('h_att_word:', h_att_word.shape)
sent_encoder = Model(inputs=word_input, outputs=h_att_word)

context_encoder = TimeDistributed(sent_encoder)(conversation_input)
utt_encode = utterance_encoder(context_encoder)
# h_att_utt = utt_att_layer(utt_encode)

# DECODER
decoder_embed = embed_layer(decoder_input)
# print('decoder_embed:', decoder_embed.shape)
decoder_output = LSTM(units=100, return_sequences=True)(decoder_embed)
attention = Dot(axes=[2, 2], name='decoder_encoder_dot')([decoder_output, utt_encode])
attention = Activation('softmax', name='attention_probs')(attention)
context = Dot(axes=[2, 1], name='att_encoder_context')([attention, utt_encode])
decoder_combined_context = Concatenate(name='decoder_context_concat')([context, decoder_output])
logits = Dense(units=20000, activation='linear')(decoder_combined_context)

model = Model(inputs=[conversation_input, decoder_input], outputs=logits)
model.summary()

In [None]:
import numpy as np

l1 = [list(range(10)), list(range(10))]
l2 = [list(range(10)), list(range(10))]

m1 = np.asarray(l1)
m2 = np.asarray(l2)

m = np.asarray([m1, m2])
print(m.shape)

## Data Generator

In [2]:
sys.path.append('/home/kyle.shaffer/dialog_model_experiments/src')
from data_utils import *

vocab_path = '/data/users/kyle.shaffer/dialog_data/cornell_movie/bpe/bpe_vocab15k.txt'
vocab = get_vocab(vocab_path, min_freq=2)

train_path = '/data/users/kyle.shaffer/dialog_data/cornell_movie/bpe/cornell_movie_context_train.txt'
valid_path = '/data/users/kyle.shaffer/dialog_data/cornell_movie/bpe/cornell_movie_context_valid.txt'
s2s_data = HanS2SProcessing(train_path, valid_path, vocab, batch_size=10, model_type='recurrent')
datagen = s2s_data.generate_s2s_batches()

VOCAB SIZE = 15032
Highest word ID: 15031
<PAD> index: 0


In [None]:
rev_vocab = {v: k for k, v in vocab.items()}

In [None]:
list(vocab.items())[:50]

In [None]:
s2s_data_orig = S2SProcessing(train_path, valid_path, vocab, batch_size=10, model_type='recurrent')
datagen2 = s2s_data_orig.generate_s2s_batches()

In [None]:
def show_next2(d):
    x, y = next(d)
    print(len(x))
    x_in, y_in = x
    print(x_in.shape, y_in.shape, y.shape)
    print(x_in.mean(), y_in.mean(), y.mean())

In [None]:
show_next2(datagen2)

In [None]:
show_next2(datagen2)

In [None]:
def show_next(d):
    x, y = next(d)
    context, current, y_in = x
    print(context.shape, current.shape, y_in.shape, y.shape)
    print(context.mean(), current.mean(), y_in.mean(), y.mean())

In [None]:
show_next(datagen)

In [None]:
show_next(datagen)

In [None]:
x, y = next(datagen)
context, current, y_in = x
context[0]

In [None]:
current[0]

In [None]:
y_in[0]

In [None]:
y[0]

In [None]:
print([rev_vocab[w] for w in context[0]])
print([rev_vocab[w] for w in current[0]])
print([rev_vocab[w] for w in y_in[0]])
print([rev_vocab[w] for w in y[0]])

In [None]:
print(x[0].shape, x[-1].shape)
y.shape

## Model Inspection

In [None]:
class Args:
    gpu = -1
    model_name = ''
    train_file = '/data/users/kyle.shaffer/ased_data/combined_multilabel_train.jl'
    valid_file = '/data/users/kyle.shaffer/ased_data/combined_multilabel_valid.jl'
    vocab_file = ''
    min_vocab_freq = 0
    n_epochs = 10
    batch_size = 256
    model_type = 'han_rnn'
    encoder_type = 'uni'
    train_from = ''
    optimizer = 'adam'
    rec_cell = 'lstm'
    embedding_dim = 300
    encoder_dim = 256
    decoder_dim = 256
    num_encoder_layers = 2
    num_decoder_layers = 1
    n_train_examples = 158037
    n_valid_examples = 17561
    
args = Args()

In [None]:
from recurrent import *
from attention import *

prev_model_path = '/data/users/kyle.shaffer/chat_models/bpe_lstm_context_chatbot_epoch03_loss4.424.h5'
han = HanRnnSeq2Seq(args=args, vocab=vocab)
han.load_trained_model(prev_model_path)

In [None]:
l = [15030, 559, 2, 4, 23, 21, 138, 1]
[rev_vocab[i] for i in l]

In [None]:
i = "Can you believe she actually said those things ?\tOh, yeah she's always like that ."
han.greedy_decode(i, use_bpe=True)

In [None]:
t = 157450
v = 17505
bs = 256
e = 50

train_steps = (t // bs) * e
valid_steps = (v // bs) * e

print(train_steps, valid_steps)

## BPE Processing

In [None]:
from subword_nmt.apply_bpe import BPE

class BPEArgs:
    codes = open('/data/users/kyle.shaffer/dialog_data/cornell_movie/bpe/bpe_vocab_mapping15k.txt', mode='r')
    merges = -1
    separator = '@@'
    vocabulary = None
    glossaries = None
    
bpe_args = BPEArgs()

bpe = BPE(bpe_args.codes, bpe_args.merges, bpe_args.separator,
          bpe_args.vocabulary, bpe_args.glossaries)

In [None]:
i0 = 'I am good , but could use some recommendations on some things .'
print(i0)
bpe.segment_tokens(i0.split())

In [None]:
from random import shuffle

t = []

with open(train_path, mode='r') as infile:
    for line in infile:
        t.append(line.strip())

print('Number of training lines:', len(t))
idx = list(range(len(t)))
shuffle(idx)
print(idx[:5])

with open('/data/users/kyle.shaffer/dialog_data/cornell_movie_context_shuffle_train.txt', mode='w') as outfile:
    for i in idx:
        outfile.write(t[i])
        outfile.write('\n')

In [8]:
train_df = pd.read_csv(train_path, sep='\t', names=['context', 'current', 'response'])
print(train_df.shape)
train_df.head(10)

(157677, 3)


Unnamed: 0,context,current,response
0,Can we make this quick ? Ro@@ x@@ anne Kor@@ r...,"Well , I thought we'd start with pron@@ un@@ c...",Not the ha@@ cking and ga@@ gging and sp@@ itt...
1,"Well , I thought we'd start with pron@@ un@@ c...",Not the ha@@ cking and ga@@ gging and sp@@ itt...,Okay . . . then how 'bout we try out some Fren...
2,<PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD...,You're asking me out . That's so cute . What's...,Forget it .
3,<PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD...,How do you get your hair to look like that ?,E@@ ber's Deep Con@@ diti@@ oner every two day...
4,"Hey , since when do you play Thomas E@@ dison ...","It is . . . was . It's a z@@ app@@ er , it mig...","Yeah , or turn you into toast ."
5,"Well hello there , young employee of the S@@ i...","Look , I already told you I'm not going to giv...","What can you tell me , young man , about the v..."
6,"Look , I already told you I'm not going to giv...","What can you tell me , young man , about the v...","Look , I'll be done in a minute . Just wait ou..."
7,"What can you tell me , young man , about the v...","Look , I'll be done in a minute . Just wait ou...",I'm afraid I don't understand . I simply wish ...
8,<PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD...,So Josh . . .,"Look , can we talk in a minute ? I'm almost do..."
9,That guy rules !,"Who , Doug ? He spends more time here than I d...","So Josh , will you give us a ride ? Please ? P..."
