### Dialogue Generation

In [1]:
import os
from six.moves import cPickle
import numpy as np

#### Step 1: Load the vocabulary from the data/vocab.pkl file

In [2]:
vocab_file = os.path.join('data', "vocab.pkl")

In [3]:
# Load vocabulary 
with open(vocab_file, "rb") as f:
    word_counts, word_to_index, index_to_word = cPickle.load(f)

vocab_size = len(word_counts)

#### Step 2: Build the LSTM model with pre-trained model weights

In [4]:
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Dropout
from keras.layers import LSTM, Input, Bidirectional
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.metrics import categorical_accuracy
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

def bidirectional_lstm_model(seq_length, vocab_size):
    rnn_size = 256 # size of RNN
    learning_rate = 0.001 #learning rate
    
    print('Build LSTM model.')
    model = Sequential()
    model.add(Bidirectional(LSTM(rnn_size, activation="relu"),input_shape=(seq_length, vocab_size)))
    model.add(Dropout(0.2))
    model.add(Dense(vocab_size))
    model.add(Activation('softmax'))
    
    optimizer = Adam(lr=learning_rate)
    callbacks=[EarlyStopping(patience=2, monitor='loss')]
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=[categorical_accuracy])
    return model

In [5]:
seq_length = 20

def load_model(weight_file):
    # Load the network weights 
    model = bidirectional_lstm_model(seq_length, vocab_size)
    model.load_weights(weight_file)
    model.summary()
    
    return model

#### Step 3: Provide the seed sententense and start text generation

In [6]:
def remove_special_chara(text):
    SPECIAL_WORDS = {'PADDING': '<PAD>'}
    token_dict = {
            '.': '||Period||',
            ',': '||Comma||',
            '"': '||Quotation_Mark||',
            ';': '||Semicolon||',
            '!': '||Exclamation_Mark||',
            '?': '||Question_Mark||',
            '(': '||Left_Parentheses||',
            ')': '||Right_Parentheses||',
            '-': '||Dash||',
            '\n': '||Return||'
            }
    for key, token in token_dict.items():
            text = text.replace(" {}".format(token), key)
    
    text = text.replace('<PAD>', '\n')
    
    return text

In [12]:
# Replacing special characters in the text and pend <PAD> by the end of each conversation
# Parse the string to list of tokens 
def tokenizer(text):
    SPECIAL_WORDS = {'PADDING': '<PAD>'}
    token_dict = {
            '.': '||Period||',
            ',': '||Comma||',
            '"': '||Quotation_Mark||',
            ';': '||Semicolon||',
            '!': '||Exclamation_Mark||',
            '?': '||Question_Mark||',
            '(': '||Left_Parentheses||',
            ')': '||Right_Parentheses||',
            '-': '||Dash||',
            '\n': '||Return||'
            }
    for key, token in token_dict.items():
            text = text.replace(key, ' {} '.format(token))
            
    text = text.split()
    text = text + list(SPECIAL_WORDS.values())
    return text

In [7]:
def load_data(data_path):
    file = open(data_path, "r")
    lines = file.readlines()
    training_data = []
    
    for line in lines:
        training_data.extend(tokenizer(line))
    
    return training_data

def prepare_training_data(data, seq_length=20): 
    sequences_step = 1
    X_train = []
    Y_train = []
    for i in range(0, len(data) - seq_length, sequences_step):
        X_train.append(data[i: i + seq_length])
        Y_train.append(data[i + seq_length])

    print('Total sequences:', len(X_train))
    return X_train, Y_train

In [8]:
import sys 

def dialogue_prediction(model, seed=None, prediction_length=100, data_path=None):
    if seed == None:  
        # Pick a random seed from the training set 
        X_train, Y_train = prepare_training_data(load_data(data_path))
        start = np.random.randint(1, len(X_train)-1)
        seed = X_train[start]
    else:
        seed = tokenizer(seed)[:20]
        
    print("Seed:")
    print(remove_special_chara(" " + " ".join(seed)))

    prediction = []
    for i in range(prediction_length):
        x = np.zeros((1, seq_length, vocab_size), dtype=np.bool)        
        for j, word in enumerate(seed):
            x[0, j, word_to_index[word]] = 1

        y_hat = model.predict(x)
        index = np.argmax(y_hat)
        result = index_to_word[index]

        prediction.append(result)
        seed.append(result)
        seed = seed[1:len(seed)]   

    print("\n Predicted diaglogue: \n")
    print(remove_special_chara(" " + " ".join(prediction)))

 - Load the model for specific characters and predict the character speech based on seed sentence. 

In [12]:
model = load_model("save/model_lstm_jerry_128.40-0.41.hdf5")
dialogue_prediction(model, "jerry: well, senator, id just like to know, what you knew and when you knew it.")

Build LSTM model.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 512)               44347392  
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 21397)             10976661  
_________________________________________________________________
activation_1 (Activation)    (None, 21397)             0         
Total params: 55,324,053
Trainable params: 55,324,053
Non-trainable params: 0
_________________________________________________________________
Seed:
 jerry: well, senator, id just like to know, what you knew and when you knew it.

 Predicted diaglogue: 


 
 jerry: can you relax, its a cup of coffee. claire is a professional waitress.
 
 jerry: well, theres this 

In [10]:
model = load_model("save/model_lstm_elaine_128.28-0.95.hdf5")
dialogue_prediction(model, "elaine: i think, i think we were in my house where i grew up, and you were standing there, you were looking out the window...")

Build LSTM model.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 512)               44347392  
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 21397)             10976661  
_________________________________________________________________
activation_1 (Activation)    (None, 21397)             0         
Total params: 55,324,053
Trainable params: 55,324,053
Non-trainable params: 0
_________________________________________________________________
Seed:
 elaine: i think, i think we were in my house where i grew up, and you were standing

 Predicted diaglogue: 

 there, you were getting involved in the same.
 
 elaine: uh, no. no. i was just in the invitation, or a h

In [11]:
model = load_model("save/model_lstm_jerry_128.40-0.41.hdf5")
dialogue_prediction(model, seed=None, prediction_length=100, data_path="data/jerry_test.txt")

Build LSTM model.
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, 512)               44347392  
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 21397)             10976661  
_________________________________________________________________
activation_1 (Activation)    (None, 21397)             0         
Total params: 55,324,053
Trainable params: 55,324,053
Non-trainable params: 0
_________________________________________________________________
Total sequences: 32339
Seed:
 a second!( they fight over the paper.)
 
 jerry: all right, here!


 Predicted diaglogue: 

 
 jerry:( reading the guard, reading) hey!
 
 jerry: look at this shirt!
 
 jerry: i got a little! 

- Load the multi-character dialogue 
- Predict the dialogue 

In [10]:
model = load_model("save/model_lstm_dialogue_128.14-1.87.hdf5")
dialogue_prediction(model, seed=None, prediction_length=100, data_path="data/main_character_script.txt")

Build LSTM model.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, 512)               44347392  
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 21397)             10976661  
_________________________________________________________________
activation (Activation)      (None, 21397)             0         
Total params: 55,324,053
Trainable params: 55,324,053
Non-trainable params: 0
_________________________________________________________________
Total sequences: 645871
Seed:
 their buying bone.
 
 jerry: hey, you know what? this is all your mail. they're

 Predicted diaglogue: 

 puttin' it in my face.
 
 jerry: i don't know what you do.
 
 george: i can't believe t

### Evaluate the prediction dialogue 

Load the Training data, we will random select the seed sentense and compute the BLEU and Rouge score of the text generation. Take Jerry's line as an example. 

In [9]:
# Generate the prediction sentence together with the reference 

def reference_prediction_gen(model, X_train, Y_train, prediction_length=100):
    # Pick a random seed from the training set 
    start = np.random.randint(1, len(X_train)-1)
    seed = X_train[start]

    prediction = []
    for i in range(prediction_length):
        x = np.zeros((1, seq_length, vocab_size), dtype=np.bool)        
        for j, word in enumerate(seed):
            x[0, j, word_to_index[word]] = 1

        y_hat = model.predict(x)
        index = np.argmax(y_hat)
        result = index_to_word[index]

        prediction.append(result)
        seed.append(result)
        seed = seed[1:len(seed)]   

    reference = Y_train[start:start+prediction_length]
    
    return reference, prediction

In [10]:
model = load_model("save/model_lstm_jerry_128.40-0.41.hdf5")

Build LSTM model.
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, 512)               44347392  
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 21397)             10976661  
_________________________________________________________________
activation (Activation)      (None, 21397)             0         
Total params: 55,324,053
Trainable params: 55,324,053
Non-trainable params: 0
_________________________________________________________________


In [13]:
X_train, Y_train = prepare_training_data(load_data("data/jerry_test.txt"))

Total sequences: 32339


### BLEU Score 
- BLEU is a precision focused metrics calculating n-gram overlap between the reference and prediction text. 
- We evaluate the BLEU-1 and BLEU-4 scores for multiple simulation. 

In [14]:
import nltk

def BLEU_score(simulation_time, prediction_length):
    avg_BLEU_1 = 0
    avg_BLEU_4 = 0
    
    for i in range(simulation_time):
        reference, prediction = reference_prediction_gen(model, X_train, Y_train, prediction_length)
        BLEU_1 = nltk.translate.bleu_score.sentence_bleu([reference], prediction, weights=(1,0,0,0))
        BLEU_4 = nltk.translate.bleu_score.sentence_bleu([reference], prediction)
        
        avg_BLEU_1 += BLEU_1
        avg_BLEU_4 += BLEU_4
        
    avg_BLEU_1 = avg_BLEU_1 / simulation_time
    avg_BLEU_4 = avg_BLEU_4 / simulation_time
    
    print(f"Cumulative 1-gram: {avg_BLEU_1}")
    print(f"Cumulative 4-gram: {avg_BLEU_4}")

In [15]:
BLEU_score(simulation_time = 100, prediction_length=100)

The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Cumulative 1-gram: 0.45539999999999997
Cumulative 4-gram: 0.1934442068434737


### Rouge Score
- Versus the precision forcused BLEU score, Rouge score focues more on recall. 
- Rouge-N: overlap of N-gram (we tested for unigram and bigram)
- Rouge-L: Longest common sequence (LCS)
- Rouge-W: Weighted LCS-based statistics that favors consecutive LCSes
- Rouge-S4: Skip-gram concurrence with skip gap 4
- Rouge-SU4: Skip-gram plus unigram concurrence with skip gap 4 


In [15]:
from rouge_metric import PyRouge

def Rouge_score(simulation_time, prediction_length):
    
    for i in range(simulation_time):
        reference, prediction = reference_prediction_gen(model, X_train, Y_train, prediction_length)
        reference = remove_special_chara(" " + " ".join(reference))
        prediction = remove_special_chara(" " + " ".join(prediction))

        rouge = PyRouge(rouge_n=(1, 2), rouge_l=True, rouge_w=True,
                    rouge_w_weight=1.2, rouge_s=True, rouge_su=True, skip_gap=4)
        scores = rouge.evaluate([prediction], [[reference]])
        print(scores)

In [16]:
Rouge_score(1, 200)

{'rouge-1': {'r': 0.3284671532846715, 'p': 0.3125, 'f': 0.3202846975088968}, 'rouge-2': {'r': 0.022058823529411766, 'p': 0.02097902097902098, 'f': 0.021505376344086023}, 'rouge-l': {'r': 0.31386861313868614, 'p': 0.2986111111111111, 'f': 0.30604982206405695}, 'rouge-w-1.2': {'r': 0.10404684205378563, 'p': 0.17210700842124219, 'f': 0.1296899586281442}, 'rouge-s4': {'r': 0.041791044776119404, 'p': 0.03971631205673759, 'f': 0.04072727272727273}, 'rouge-su4': {'r': 0.08933002481389578, 'p': 0.08490566037735849, 'f': 0.08706166868198306}}


### Perplexity 
- we caluclate the unigram perplexity 

$p(w) = \frac{count(w)}{count(vcab)}$

$PP(s) = 2^{log_{2}^{PP(s)}} = 2^{-\frac{1}{n}log(p(s))}$

$\frac{1}{n}log(p(s)) = \frac{1}{n}(logp(w_{1})+logp(w_{2})+...+logp(w_{n}))$


In [40]:
# Build the unigram probability in the vocabulary 
# word_counts load from the vocab.pkl file contains the word and its count in vocab 

unigram_prob = {}
for w in word_counts:
    unigram_prob[w[0]] = w[1]

def perplexity_score(simulation_time, prediction_length):
    score = 0.0
    for i in range(simulation_time):
        reference, prediction = reference_prediction_gen(model, X_train, Y_train, prediction_length)
        l = 0.0
        for w in prediction:
            l += np.log2(unigram_prob[w])
        l = l / prediction_length
        score += np.power(2, -l)
    score /= simulation_time
    
    return score
    

In [42]:
perplexity_score(10, 100)

0.00019151019468558275