In [361]:
import pandas as pd
import numpy as np
import tensorflow as tf
import re
# load lines dictionary 
lines = open('dataset/chatbot/movie_lines.txt', encoding='utf-8', errors='ignore').read().split('\n')

# load conversations
convs= open('dataset/chatbot/movie_conversations.txt', encoding='utf-8', errors='ignore').read().split('\n')

print('\n'.join(lines[:3]))
print()
print('\n'.join(convs[:3]))

L1045 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ They do not!
L1044 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ They do to!
L985 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I hope so.

u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L194', 'L195', 'L196', 'L197']
u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L198', 'L199']
u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L200', 'L201', 'L202', 'L203']


# 1.Text preprocessing


1)把縮寫還原，並去掉特殊符號與標點符號。太長的句子與出現頻率不高的字詞丟掉。最後把token與字詞轉換成integer，寫進dictionary 存起來以利之後

2)在每句都加上token並把不在dictionary內的字詞替換成UNK，在最後要丟進去model時，把含有UNK的句子丟掉。

In [362]:
id_conv = {}
for line in lines:
    line_split = line.split(' +++$+++ ')
    if len(line_split) == 5:
        id_conv[line_split[0]] = line_split[4]

In [363]:
convs_idlist = [ ]

for line in convs[:]:
    _line = line.split(' +++$+++ ')[-1][1:-1].replace("'","").replace(" ","").split(',')
    convs_idlist.append(_line)

In [364]:
questions = []
answers = []

for conv in convs_idlist:
    for i in range(len(conv)-1):
        questions.append(id_conv[conv[i]])
        answers.append(id_conv[conv[i+1]])

In [365]:
def clean_text(text):

    text = text.lower()
    
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"it's", "it is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "that is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"\'ll", " will", text)
    text = re.sub(r"\'ve", " have", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"\'d", " would", text)
    text = re.sub(r"\'re", " are", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"n't", " not", text)
    text = re.sub(r"n'", "ng", text)
    text = re.sub(r"'bout", "about", text)
    text = re.sub(r"'til", "until", text)
    text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)
    
    return text

In [366]:
clean_questions = []
for question in questions:
    clean_questions.append(clean_text(question))
    
clean_answers = []    
for answer in answers:
    clean_answers.append(clean_text(answer))

In [367]:
min_line_length = 2
max_line_length = 15

short_questions_temp = []
short_answers_temp = []

i = 0
for question in clean_questions:
    if len(question.split()) >= min_line_length and len(question.split()) <= max_line_length:
        short_questions_temp.append(question)
        short_answers_temp.append(clean_answers[i])
    i += 1

short_questions = []
short_answers = []

i = 0
for answer in short_answers_temp:
    if len(answer.split()) >= min_line_length and len(answer.split()) <= max_line_length:
        short_answers.append(answer)
        short_questions.append(short_questions_temp[i])
    i += 1

In [368]:
vocab = {}
for question in short_questions:
    for word in question.split():
        if word not in vocab:
            vocab[word] = 1
        else:
            vocab[word] += 1
            
for answer in short_answers:
    for word in answer.split():
        if word not in vocab:
            vocab[word] = 1
        else:
            vocab[word] += 1

In [369]:
threshold = 10
count = 0
for k,v in vocab.items():
    if v >= threshold:
        count += 1

In [370]:
questions_vocab_to_int = {}

word_num = 0
for word, count in vocab.items():
    if count >= threshold:
        questions_vocab_to_int[word] = word_num
        word_num += 1
        
answers_vocab_to_int = {}

word_num = 0
for word, count in vocab.items():
    if count >= threshold:
        answers_vocab_to_int[word] = word_num
        word_num += 1

In [371]:
codes = ['<PAD>','<END>','<UNK>','<BEG>']

for code in codes:
    questions_vocab_to_int[code] = len(questions_vocab_to_int)+1
    
for code in codes:
    answers_vocab_to_int[code] = len(answers_vocab_to_int)+1

In [372]:
##store word_to_int dictionary
np.save('questions_vocab_to_int.npy',questions_vocab_to_int)
np.save('answers_vocab_to_int.npy',answers_vocab_to_int)
questions_int_to_vocab = {v_i: v for v, v_i in questions_vocab_to_int.items()}
answers_int_to_vocab = {v_i: v for v, v_i in answers_vocab_to_int.items()}
np.save('questions_int_to_vocab.npy', questions_int_to_vocab)
np.save('answers_int_to_vocab.npy', answers_int_to_vocab)
#print(questions_int_to_vocab)

In [373]:
# Add the end of sentence token to the end of every answer.
ques_vocab = np.load('./questions_vocab_to_int.npy').tolist()
ans_vocab = np.load('./answers_vocab_to_int.npy').tolist()
ques_rev = np.load('./questions_int_to_vocab.npy').tolist()
ans_rev = np.load('./answers_int_to_vocab.npy').tolist()

for i in range(len(short_answers)):
    short_answers[i] = '<BEG> '+short_answers[i]+' <END>'
for i in range(len(short_questions)):
    short_questions[i] = '<BEG> '+short_questions[i]+' <END>'

In [374]:
# Convert the words to integers. 
# Replace the words that are not in the respective vocabulary with <UNK> 
questions_int = []
for question in short_questions:
    ints = []
    for word in question.split():
        if word not in questions_vocab_to_int:
            ints.append(questions_vocab_to_int['<UNK>'])
        else:
            ints.append(questions_vocab_to_int[word])
    questions_int.append(ints)
    
answers_int = []
for answer in short_answers:
    ints = []
    for word in answer.split():
        if word not in answers_vocab_to_int:
            ints.append(answers_vocab_to_int['<UNK>'])
        else:
            ints.append(answers_vocab_to_int[word])
    answers_int.append(ints)

In [375]:
en_corpus_clean = []
ch_corpus_clean = []

for i in range(len(questions_int)):
    if not(ques_vocab['<UNK>'] in questions_int[i] or ans_vocab['<UNK>'] in answers_int[i]): # remove '<UNK>' sentence
        en_corpus_clean.append(questions_int[i])
        ch_corpus_clean.append(answers_int[i])
for i in range(4):
    print(' '.join([ques_rev[en_corpus_clean[i][j]]  for j in range(len(en_corpus_clean[i]))]))
    print(' '.join([ans_rev[ch_corpus_clean[i][j]]  for j in range(len(ch_corpus_clean[i]))]))
    print('')

<BEG> you are asking me out that is so cute that is your name again <END>
<BEG> forget it <END>

<BEG> gosh if only we could find kat a boyfriend <END>
<BEG> let me see what i can do <END>

<BEG> that is because it is such a nice one <END>
<BEG> forget french <END>

<BEG> you have my word as a gentleman <END>
<BEG> you are sweet <END>



In [376]:
en_max_len = 0
ch_max_len = 0

for i in range(len(en_corpus_clean)): # caculate max length
    en_max_len = max(en_max_len, len(en_corpus_clean[i]))
    ch_max_len = max(ch_max_len, len(ch_corpus_clean[i]))

print(en_max_len, ch_max_len)

17 17


## 2.Batch preparation


沿用LAB notebook的data generator

In [377]:
class BatchGenerator:
    def __init__(self, en_corpus, ch_corpus, en_pad, ch_pad, en_max_len, ch_max_len, batch_size):
        assert len(en_corpus) == len(ch_corpus)
        
        batch_num = len(en_corpus)//batch_size
        n = batch_num*batch_size
        
        self.xs = [np.zeros(n, dtype=np.int32) for _ in range(en_max_len)] # encoder inputs
        self.ys = [np.zeros(n, dtype=np.int32) for _ in range(ch_max_len)] # decoder inputs
        self.gs = [np.zeros(n, dtype=np.int32) for _ in range(ch_max_len)] # decoder outputs
        self.ws = [np.zeros(n, dtype=np.float32) for _ in range(ch_max_len)] # decoder weight for loss caculation
        
        self.en_max_len = en_max_len
        self.ch_max_len = ch_max_len
        self.batch_size = batch_size
        
        for b in range(batch_num):
            for i in range(b*batch_size, (b+1)*batch_size):
                for j in range(len(en_corpus[i])-2):
                    self.xs[j][i] = en_corpus[i][j+1]
                for j in range(j+1, en_max_len):
                    self.xs[j][i] = en_pad
                
                for j in range(len(ch_corpus[i])-1):
                    self.ys[j][i] = ch_corpus[i][j]
                    self.gs[j][i] = ch_corpus[i][j+1]
                    self.ws[j][i] = 1.0
                for j in range(j+1, ch_max_len): # don't forget padding and let loss weight zero
                    self.ys[j][i] = ch_pad
                    self.gs[j][i] = ch_pad
                    self.ws[j][i] = 0.0
    
    def get(self, batch_id):
        x = [self.xs[i][batch_id*self.batch_size:(batch_id+1)*self.batch_size] for i in range(self.en_max_len)]
        y = [self.ys[i][batch_id*self.batch_size:(batch_id+1)*self.batch_size] for i in range(self.ch_max_len)]
        g = [self.gs[i][batch_id*self.batch_size:(batch_id+1)*self.batch_size] for i in range(self.ch_max_len)]
        w = [self.ws[i][batch_id*self.batch_size:(batch_id+1)*self.batch_size] for i in range(self.ch_max_len)]
        
        return x, y, g, w

batch = BatchGenerator(en_corpus_clean, ch_corpus_clean, 
                       ques_vocab['<PAD>'], ans_vocab['<PAD>'], en_max_len, ch_max_len, 4)

print("Encoder input")
print("Decoder input")
print("Decoder output")
print()

x, y, g, w = batch.get(2)
for i in range(4):
    print(' '.join([ques_rev[x[j][i]] for j in range(en_max_len)]))
    print(' '.join([ans_rev[y[j][i]] for j in range(ch_max_len)]))
    print(' '.join([ans_rev[g[j][i]] for j in range(ch_max_len)]))
    print('')

Encoder input
Decoder input
Decoder output

do you listen to this crap <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>
<BEG> what crap <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>
what crap <END> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>

i figured you would get to the good stuff eventually <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>
<BEG> what good stuff <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>
what good stuff <END> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>

what good stuff <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>
<BEG> the real you <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>
the real you <END> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>

she okay <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <P

## 3.Model training


沿用LAB notebook 上 seq2seq的model，train 100個 Epochs，最後用Cherry Pick看結果，前面輸出的結果問答對應都頗合適。

In [378]:
class MachineTranslationSeq2Seq:
    def __init__(self, en_max_len, ch_max_len, en_size, ch_size):
        self.en_max_len = en_max_len
        self.ch_max_len = ch_max_len
        
        with tf.variable_scope('seq2seq_intput/output'):
            self.enc_inputs = [tf.placeholder(tf.int32, [None]) for i in range(en_max_len)] # time mojor feed
            self.dec_inputs = [tf.placeholder(tf.int32, [None]) for i in range(ch_max_len)]
            self.groundtruths = [tf.placeholder(tf.int32, [None]) for i in range(ch_max_len)]
            self.weights = [tf.placeholder(tf.float32, [None]) for i in range(ch_max_len)]
            
        with tf.variable_scope('seq2seq_rnn'): # training by teacher forcing
            self.out_cell = tf.contrib.rnn.LSTMCell(512)
            self.outputs, _ = tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(self.enc_inputs, self.dec_inputs, 
                                                                                    self.out_cell, 
                                                                                    en_size, ch_size, 300)
        with tf.variable_scope('seq2seq_rnn', reuse=True): # predict by feeding previous
            self.pred_cell = tf.contrib.rnn.LSTMCell(512, reuse=True) # reuse cell for train and test
            self.predictions, _ = tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(self.enc_inputs, self.dec_inputs, 
                                                                                        self.pred_cell, 
                                                                                        en_size, ch_size, 300, 
                                                                                        feed_previous=True)
        
        with tf.variable_scope('loss'):
            # caculate weighted loss
            self.loss = tf.reduce_mean(tf.contrib.legacy_seq2seq.sequence_loss_by_example(self.outputs, 
                                                                                          self.groundtruths, 
                                                                                          self.weights))
            self.optimizer = tf.train.AdamOptimizer(0.002).minimize(self.loss)
        
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=config)
        self.saver = tf.train.Saver()
        self.sess.run(tf.global_variables_initializer())
    
    def train(self, x, y, g, w):
        fd = {}
        for i in range(self.en_max_len):
            fd[self.enc_inputs[i]] = x[i] # show how to feed a list
        
        for i in range(self.ch_max_len):
            fd[self.dec_inputs[i]] = y[i]
            fd[self.groundtruths[i]] = g[i]
            fd[self.weights[i]] = w[i]
        
        loss, _ = self.sess.run([self.loss, self.optimizer], fd)
        
        return loss

    def output(self, x, y):
        fd = {}
        for i in range(self.en_max_len):
            fd[self.enc_inputs[i]] = x[i]
        
        for i in range(self.ch_max_len):
            fd[self.dec_inputs[i]] = y[i]
        
        out = self.sess.run(self.outputs, fd)
        
        return out
    
    def predict(self, x, ch_beg):
        fd = {}
        for i in range(self.en_max_len):
            fd[self.enc_inputs[i]] = x[i]
        
        for i in range(self.ch_max_len): # when feed previous, the fist token should be '<BEG>', and others are useless
            if i==0:
                fd[self.dec_inputs[i]] = np.ones(y[i].shape, dtype=np.int32)*ch_beg
            else:
                fd[self.dec_inputs[i]] = np.zeros(y[i].shape, dtype=np.int32)
        
        pd = self.sess.run(self.predictions, fd)
        
        return pd
    
    def save(self, e):
        self.saver.save(self.sess, 'model/seq2seq/seq2seq_%d.ckpt'%(e+1))
    
    def restore(self, e):
        self.saver.restore(self.sess, 'model/seq2seq/seq2seq_%d.ckpt'%(e))

In [379]:
tf.reset_default_graph()
model = MachineTranslationSeq2Seq(en_max_len, ch_max_len, len(ques_vocab), len(ans_vocab))

In [380]:
EPOCHS = 100
BATCH_SIZE = 128
batch_num = len(en_corpus_clean)//BATCH_SIZE

batch = BatchGenerator(en_corpus_clean, ch_corpus_clean, 
                       ques_vocab['<PAD>'], ans_vocab['<PAD>'], 
                       en_max_len, ch_max_len, BATCH_SIZE)

In [381]:
rec_loss = []
for e in range(EPOCHS):
    train_loss = 0
    
    for b in range(batch_num):
        x, y, g, w = batch.get(b)
        batch_loss = model.train(x, y, g, w)
        train_loss += batch_loss
    
    train_loss /= batch_num
    rec_loss.append(train_loss)
    print("epoch %d loss: %f" % (e, train_loss))
    
    model.save(e)
    
np.save('./model/seq2seq/rec_loss.npy', rec_loss)


'\nrec_loss = []\nfor e in range(EPOCHS):\n    train_loss = 0\n    \n    for b in range(batch_num):\n        x, y, g, w = batch.get(b)\n        batch_loss = model.train(x, y, g, w)\n        train_loss += batch_loss\n    \n    train_loss /= batch_num\n    rec_loss.append(train_loss)\n    print("epoch %d loss: %f" % (e, train_loss))\n    \n    model.save(e)\n    \nnp.save(\'./model/seq2seq/rec_loss.npy\', rec_loss)\n'

In [453]:
model.restore(98)

INFO:tensorflow:Restoring parameters from model/seq2seq/seq2seq_98.ckpt


In [454]:
import nltk

def cherry_pick(records, n, upper_bound=1.0):
    bleus = []
    
    for en, ch_gr, ch_pd in records:
        # caculate BLEU by nltk
        bleu = nltk.translate.bleu_score.sentence_bleu([ch_gr], ch_pd)
        bleus.append(bleu)
    
    lst = [i for i in range(len(records)) if bleus[i]<=upper_bound]
    lst = sorted(lst, key=lambda i: bleus[i], reverse=True) # sort by BLEU score
    
    return [records[lst[i]] for i in range(n)]

In [455]:
import random as rd

records = []

for i in range(10):
    i = rd.randint(0, batch_num-1) # random pick one to translate
    
    x, y, g, w = batch.get(i)
    out = model.output(x, y)
    pd = model.predict(x, ans_vocab['<BEG>'])

    for j in range(10):
        j = rd.randint(0, BATCH_SIZE-1)
        
        en = [ques_rev[x[i][j]] for i in range(en_max_len)]
        en = en[:en.index('<PAD>')]
        ch_gr = [ans_rev[g[i][j]] for i in range(ch_max_len)]
        if '<END>' in ch_gr:
            ch_gr = ch_gr[:ch_gr.index('<END>')]
        ch_pd = [ques_rev[np.argmax(pd[i][j, :])] for i in range(ch_max_len)]
        if '<END>' in ch_pd:
            ch_pd = ch_pd[:ch_pd.index('<END>')]
        
        records.append([en, ch_gr, ch_pd])

n = 12 # how many result we show
rec_cherry = cherry_pick(records, n)

print("Encoder input")
print("Ground truth")
print("Decoder output")
print()

for i in range(n):
    for j in range(3):
        print(' '.join(rec_cherry[i][j]))
    
    print('')

Encoder input
Ground truth
Decoder output

i have to get my teeth cleaned this week
oh that is nice
oh that is nice

i do not see anything maybe just a little
holy shit i am a freak
holy shit i am a freak

bad luck to see death in the snow
but what happened to the little deer
but what happened to the little deer

i hate tears
is mary in trouble
is mary in trouble

yeah i guess it is all right
are you all right
are you all right

mommy please help me
do not be afraid
do not be afraid

just not always in the sexual sense
you are hurting me
you are hurting me

do not you ever have any problems
i have one now
i have one now

oh mrs crawford do not believe in them
well maybe she better start
well maybe she better start

you tell maggie
no you tell her
no you tell her

actually he sounded nice
oh oh really now we are getting down to it
oh oh really now we are getting down to it

you tell maggie
no you tell her
no you tell her



## 4.Let your model do the conversations below


仿照前面的處理，再丟進model 做 predict，不知道為甚麼shape會與batch size一樣，所以在此把要回答的問題，重複組成一個128的大小

In [456]:
ques_input = ["hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going", "you look great", "good night",
              "hello", "how are you", "where are you going"]

for i in range(len(ques_input)):
    ques_input[i] = '<BEG> '+ques_input[i]+' <END>'
#print(ques_input)
ques_input_toInt = []
for question in ques_input:
    ints = []
    for word in question.split():
        if word not in questions_vocab_to_int:
            ints.append(questions_vocab_to_int['<UNK>'])
        else:
            ints.append(questions_vocab_to_int[word])
    ques_input_toInt.append(ints)
#print(ques_input_toInt)


In [457]:
ques_xs = [np.zeros(len(ques_input_toInt), dtype=np.int32) for _ in range(en_max_len)]
count = 0
for i in range(len(ques_input)):
    for j in range(len(ques_input_toInt[i])-2):
        ques_xs[j][i] = ques_input_toInt[i][j+1]
    for j in range(j + 1, en_max_len):
        ques_xs[j][i] = ques_vocab['<PAD>']


for i in range(5):
    print(' '.join([ques_rev[ques_xs[j][i]] for j in range(en_max_len)]))


hello <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>
how are you <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>
where are you going <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>
you look great <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>
good night <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD> <PAD>


In [458]:
pd = model.predict(ques_xs, ans_vocab['<BEG>'])

In [459]:
for j in range(5):
    qes = [ques_rev[ques_xs[i][j]] for i in range(max_line_length)]
    ans = [ques_rev[np.argmax(pd[i][j, :])] for i in range(max_line_length)]
    ques_final = []
    ans_final = []
    for item in qes:
        if not item == '<PAD>':                      
            ques_final.append(item)
    for item in ans:
        if not item == '<END>':                      
            ans_final.append(item)
    print("A : {}" .format(ques_final))
    print("B : {}" .format(ans_final))
    print("")

A : ['hello']
B : ['hi', 'late', 'i', 'am', 'late', 'for', 'christmas', 'this', 'morning']

A : ['how', 'are', 'you']
B : ['another', 'young', 'man']

A : ['where', 'are', 'you', 'going']
B : ['to', 'complete', 'my', 'cure', 'i', 'am', 'just', 'hiding']

A : ['you', 'look', 'great']
B : ['thanks', 'i', 'just', 'get', 'into', 'this', 'calling', 'jake']

A : ['good', 'night']
B : ['good', 'night']



## Response properly


第 1 個對話場景 : B 今天早上要去 A (Late) 的家開聖誕轟趴，可是萬萬沒想到他媽媽需要出門買菜，叫他照顧年幼的弟弟，結果快到中午才趕到Late的家，B進了A家門後，開始了這段對話。

第 2 個對話場景 : B可能是老年癡呆，A是醫生看他最近回答有沒有進步........QQ 掰不下去。

第 3 個對話場景 : A和B在醫院外面遇到，A就問"where are you going"，B就說"to complete my cure i am just hiding "。因為A和B是麻吉，B就把他隱瞞已久的病情跟A講。

第 4 個對話場景 : A在路上看到B，B頂著一頭酷炫的頭髮，A就說"you look great"，B就說 "thanks i just get into this calling jake" 謝謝我才剛進去這間理髮店叫jake。

第 5 個對話場景 : A和B是正在講電話的情侶，最後準備要睡時，他們就互道晚安，掛了電話。
