In [1]:
import torch 
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))

USE_CUDA=torch.cuda.is_available()
device=torch.device("cuda" if USE_CUDA else "cpu")
print(device)

1
GeForce GTX 1080 Ti
cuda


In [2]:
import os
corpus_dir="data/cornell movie-dialogs corpus"
def printLines(file,n=10):
    with open(file,"rb") as datafile:
        lines=datafile.readlines()
    for line in lines[:n]:
        print(line)
printLines(os.path.join(corpus_dir,"movie_lines.txt"))

b'L1045 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ They do not!\n'
b'L1044 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ They do to!\n'
b'L985 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I hope so.\n'
b'L984 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ She okay?\n'
b"L925 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Let's go.\n"
b'L924 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ Wow\n'
b"L872 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Okay -- you're gonna need to learn how to lie.\n"
b'L871 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ No\n'
b'L870 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I\'m kidding.  You know how sometimes you just become this "persona"?  And you don\'t know how to quit?\n'
b'L869 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Like my fear of wearing pastels?\n'


In [3]:
import codecs
datafile=os.path.join(corpus_dir,"formatted_movie_lines.txt")
delimiter="\t"
delimiter=str(codecs.decode(delimiter,"unicode_escape"))

lines={}
conversations={}
MOVIE_LINES_FIELDS=["lineId","characterId","movieId","character","text"]
MOVIE_CONVERSATIONS_FIELDS=["character1Id","character2Id","movieId","utteranceIds"]

print("Processing corpus...")
def loadLines(fileName,fields):
    lines={}
    with open(fileName,"r",encoding="iso-8859-1") as f:
        for line in f:
            values=line.split(" +++$+++ ")
            lineObj={}
            for i,field in enumerate(fields):
                lineObj[field]=values[i]
            lines[lineObj["lineId"]]=lineObj
    return lines
lines=loadLines(os.path.join(corpus_dir,"movie_lines.txt"),MOVIE_LINES_FIELDS)
lines_keys=list(lines.keys())
print(lines[lines_keys[0]])

Processing corpus...
{'characterId': 'u2532', 'text': "His wife's influence.\n", 'character': 'COULMIER', 'lineId': 'L456595', 'movieId': 'm164'}


In [4]:
def loadConversations(fileName,lines,fields):
    conversations=[]
    with open(fileName,"r",encoding="iso-8859-1") as f:
        for line in f:
            values=line.split(" +++$+++ ")
            convObj={}
            for i,field in enumerate(fields):
                convObj[field]=values[i]
            linesIds=eval(convObj["utteranceIds"])
            convObj["lines"]=[]
            for lineId in linesIds:
                convObj["lines"].append(lines[lineId])
            conversations.append(convObj)
    return conversations
conversations=loadConversations(os.path.join(corpus_dir,"movie_conversations.txt"),lines,MOVIE_CONVERSATIONS_FIELDS)
print(conversations[0])

{'utteranceIds': "['L194', 'L195', 'L196', 'L197']\n", 'lines': [{'characterId': 'u0', 'text': 'Can we make this quick?  Roxanne Korrine and Andrew Barrett are having an incredibly horrendous public break- up on the quad.  Again.\n', 'character': 'BIANCA', 'lineId': 'L194', 'movieId': 'm0'}, {'characterId': 'u2', 'text': "Well, I thought we'd start with pronunciation, if that's okay with you.\n", 'character': 'CAMERON', 'lineId': 'L195', 'movieId': 'm0'}, {'characterId': 'u0', 'text': 'Not the hacking and gagging and spitting part.  Please.\n', 'character': 'BIANCA', 'lineId': 'L196', 'movieId': 'm0'}, {'characterId': 'u2', 'text': "Okay... then how 'bout we try out some French cuisine.  Saturday?  Night?\n", 'character': 'CAMERON', 'lineId': 'L197', 'movieId': 'm0'}], 'character2Id': 'u2', 'character1Id': 'u0', 'movieId': 'm0'}


In [5]:
import csv
def extractSentencePairs(conversations):
    qa_pairs=[]
    for conversation in conversations:
        for i in range(len(conversation["lines"])-1):
            inputLine=conversation["lines"][i]["text"].strip()
            targetLine=conversation["lines"][i+1]["text"].strip()
            if inputLine and targetLine:
                qa_pairs.append([inputLine,targetLine])
    return qa_pairs
print("Writing newly formatted file...")
with open(datafile,"w",encoding="utf-8") as outputfile:
    writer=csv.writer(outputfile,delimiter=delimiter,lineterminator="\n")
    pairs=extractSentencePairs(conversations)
    for pair in pairs:
        writer.writerow(pair)
print("Sample lines from file:")
printLines(datafile)

Writing newly formatted file...
Sample lines from file:
b"Can we make this quick?  Roxanne Korrine and Andrew Barrett are having an incredibly horrendous public break- up on the quad.  Again.\tWell, I thought we'd start with pronunciation, if that's okay with you.\r\n"
b"Well, I thought we'd start with pronunciation, if that's okay with you.\tNot the hacking and gagging and spitting part.  Please.\r\n"
b"Not the hacking and gagging and spitting part.  Please.\tOkay... then how 'bout we try out some French cuisine.  Saturday?  Night?\r\n"
b"You're asking me out.  That's so cute. What's your name again?\tForget it.\r\n"
b"No, no, it's my fault -- we didn't have a proper introduction ---\tCameron.\r\n"
b"Cameron.\tThe thing is, Cameron -- I'm at the mercy of a particularly hideous breed of loser.  My sister.  I can't date until she does.\r\n"
b"The thing is, Cameron -- I'm at the mercy of a particularly hideous breed of loser.  My sister.  I can't date until she does.\tSeems like she coul

In [6]:
import re
import unicodedata

PAD_token=0
SOS_token=1
EOS_token=2
MAX_LENGTH=10

corpus_name = "cornell movie-dialogs corpus"

class Voc:
    def __init__(self,name):
        self.name=name
        self.trimmed=False
        self.word2index={}
        self.index2word={PAD_token:"PAD",SOS_token:"SOS",EOS_token:"EOS"}
        self.word2count={}
        self.num_words=3
    def addWord(self,word):
        if word not in self.word2index:
            self.word2index[word]=self.num_words
            self.index2word[self.num_words]=word
            self.num_words+=1
            self.word2count[word]=1
        else:
            self.word2count[word]+=1
    def addSentence(self,sentence):
        for word in sentence.split(" "):
            self.addWord(word)
    def trim(self,min_count):
        if self.trimmed:
            return
        self.trimmed=True
        keep_words=[]
        for k,v in self.word2count.items():
            if v>=min_count:
                keep_words.append(k)
        print("Keep words {}/{} = {:.4f}".format(
                len(keep_words),
                len(self.word2count),
                len(keep_words)/len(self.word2count)))
        
        self.word2count={}
        self.word2index={}
        self.index2word={PAD_token:"PAD",SOS_token:"SOS",EOS_token:"EOS"}
        self.num_words=3
        for word in keep_words:
            self.addWord(word)
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )            
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    s = re.sub(r"\s+", r" ", s).strip()
    return s

def readVocs(datafile,corpus_name):
    print("Reading lines from : ",datafile)
    lines=open(datafile,encoding="utf-8").read().strip().split("\n")
    pairs=[[normalizeString(s) for s in line.split("\t")]for line in lines]
    voc=Voc(corpus_name)
    return voc,pairs
def filterPair(p):
    return len(p[0].split(" "))<MAX_LENGTH and len(p[1].split(" "))<MAX_LENGTH
def filterPairs(pairs):
    return [p for p in pairs if filterPair(p)]

def loadPrepareData(corpus_name,datafile):
    voc,pairs=readVocs(datafile,corpus_name)
    pairs=filterPairs(pairs)
    for pair in pairs:
        voc.addSentence(pair[0])
        voc.addSentence(pair[1])
    print("Counted words:",voc.num_words)
    return voc,pairs

voc,pairs=loadPrepareData(corpus_name,datafile)
for pair in pairs[:10]:
    print(pair)
    
    

Reading lines from :  data/cornell movie-dialogs corpus\formatted_movie_lines.txt
Counted words: 18008
['there .', 'where ?']
['you have my word . as a gentleman', 'you re sweet .']
['hi .', 'looks like things worked out tonight huh ?']
['you know chastity ?', 'i believe we share an art instructor']
['have fun tonight ?', 'tons']
['well no . . .', 'then that s all you had to say .']
['then that s all you had to say .', 'but']
['but', 'you always been this selfish ?']
['do you listen to this crap ?', 'what crap ?']
['what good stuff ?', 'the real you .']


In [7]:
MIN_COUNT=3
def trimRareWords(voc,pairs,MIN_COUNT):
    voc.trim(MIN_COUNT)
    keep_pairs=[]
    for pair in pairs:
        input_sentence=pair[0]
        output_sentence=pair[1]
        keep_input=True
        keep_output=True
        for word in input_sentence.split(" "):
            if word not in voc.word2index:
                keep_input=False
                break
        for word in output_sentence.split(" "):
            if word not in voc.word2index:
                keep_output=False
                break
        if keep_input and keep_output:
            keep_pairs.append(pair)
    print("Trimmed from {} to {}, {:.4f} in total.".format(len(pairs),len(keep_pairs),len(keep_pairs)/len(pairs)))
    return keep_pairs
pairs=trimRareWords(voc,pairs,MIN_COUNT)

Keep words 7823/18005 = 0.4345
Trimmed from 64271 to 53165, 0.8272 in total.


In [8]:
import itertools
import torch
import numpy as np

def indexFromSentence(voc,sentence):
    return [voc.word2index[word] for word in sentence.split(" ")]+[EOS_token]
def zeroPadding(l,fill_value=PAD_token):
    return list(itertools.zip_longest(*l,fillvalue=fill_value))
def binaryMatrix(l,value=PAD_token):
    m=[]
    for i,seq in enumerate(l):
        m.append([])
        for token in seq:
            if token==value:
                m[i].append(0)
            else:
                m[i].append(1)
    return m

def inputVar(input_batch,voc):
    indexs_batch=[indexFromSentence(voc,sentence) for sentence in input_batch]
    length=torch.tensor([len(index) for index in indexs_batch])
    padList=zeroPadding(indexs_batch)
    padVar=torch.LongTensor(padList)
    return padVar,length
def outputVar(output_batch,voc):
    indexs_batch=[indexFromSentence(voc,sentence) for sentence in output_batch]
    max_target_len=max([len(index) for index in indexs_batch])
    padList=zeroPadding(indexs_batch)
    mask=binaryMatrix(padList)
    mask=torch.ByteTensor(mask)
    padVar=torch.LongTensor(padList)
    return padVar,mask,max_target_len

import time
def batch2TrianData(voc,pair_batch):
    start=time.time()
    pair_batch.sort(key=lambda x:len(x[0].split(" ")),reverse=True)
    input_batch,output_batch=[],[]
    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
    inp,lengths=inputVar(input_batch,voc)
    output,mask,max_target_len=outputVar(output_batch,voc)
    return inp,lengths,output,mask,max_target_len

small_batchsize=5
print([np.random.choice(pairs) for _ in range(small_batchsize)])

inp,lengths,output,mask,max_target_len=batch2TrianData(voc,
                                                       [np.random.choice(pairs) 
                                                        for _ in range(small_batchsize)])
print(inp)
print(lengths)
print(output)
print(mask)
print(max_target_len)

[['what happened . . . ?', 'don t ask .'], ['i put you on the floor .', 'oh .'], ['sleeping in the back .', 'she asked me to pick her up .'], ['great .', 'actually . i could only help him .'], ['wow !', 'wendy !']]
tensor([[1347, 1819, 4537,  308, 2216],
        [1157, 4334, 4580, 5230, 2879],
        [7691, 1239, 1913, 5859,    2],
        [3237, 1295, 1479, 2879,    0],
        [6400, 2961,    2,    2,    0],
        [1792,  416,    0,    0,    0],
        [6459, 1479,    0,    0,    0],
        [ 184,    2,    0,    0,    0],
        [1479,    0,    0,    0,    0],
        [   2,    0,    0,    0,    0]])
tensor([10,  8,  5,  5,  3])
tensor([[ 298, 5610, 3052, 7590, 7691],
        [3688, 4580, 2406, 7035, 4235],
        [2406, 5663,    2, 2609, 6278],
        [   2, 1479,    0, 1479, 1295],
        [   0,    2,    0,    2, 7213],
        [   0,    0,    0,    0, 1479],
        [   0,    0,    0,    0,    2]])
tensor([[1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],

In [9]:
from torch import nn

hidden_size=500

embedding=nn.Embedding(voc.num_words,hidden_size)
print(embedding.weight.shape)

torch.Size([7826, 500])


In [10]:
class EncoderRNN(nn.Module):
    def __init__(self,hidden_size,embedding,n_layers=1,dropout=0):
        super(EncoderRNN,self).__init__()
        self.embedding=embedding
        self.hidden_size=hidden_size
        self.n_layers=n_layers
        
        self.gru=nn.GRU(hidden_size,hidden_size,n_layers,
                        dropout=(0 if n_layers==1 else dropout),bidirectional=True)
        
    def forward(self,input_seq,input_length,hidden=None):
        embed=self.embedding(input_seq)
        packed=torch.nn.utils.rnn.pack_padded_sequence(embed,input_length)
        outputs,hidden=self.gru(packed,hidden)
        outputs,_=torch.nn.utils.rnn.pad_packed_sequence(outputs)
        outputs=outputs[:,:,:self.hidden_size]+outputs[:,:,self.hidden_size:]
        return outputs,hidden
_encoder=EncoderRNN(hidden_size,embedding,n_layers=2)  
_encoder_outputs,_encoder_hidden=_encoder.forward(inp,lengths)

In [11]:
import torch.nn.functional as F

class Attn(nn.Module):
    def __init__(self,method,hidden_size):
        super(Attn,self).__init__()
        self.method=method
        if self.method not in ["dot","general","concat"]:
            raise ValueError(self.method, "is not an appropriate attention method.")
        self.hidden_size=hidden_size
    def dot_score(self,hidden,encoder_output):
        tmp=hidden*encoder_output
        tmp2=torch.sum(tmp,dim=2)
        return tmp2
    def forward(self,hidden,encoder_output):
        if self.method=="dot":
            attn_energies=self.dot_score(hidden,encoder_output)
        attn_energies=attn_energies.t()
        return F.softmax(attn_energies,dim=1).unsqueeze(1)

In [12]:
class LuongAttnDecoderRNN(nn.Module):
    def __init__(self,attn_model,embedding,hidden_size,output_size,n_layers=1,dropout=0.1):
        super(LuongAttnDecoderRNN,self).__init__()
        
        self.attn_model=attn_model
        self.hidden_size=hidden_size
        self.output_size=output_size
        self.n_layers=n_layers
        self.dropout=dropout
        
        self.embedding=embedding
        self.embedding_dropout=nn.Dropout(dropout)
        self.gru=nn.GRU(hidden_size,hidden_size,n_layers,dropout=(0 if n_layers==1 else dropout))
        self.concat=nn.Linear(hidden_size*2,hidden_size)
        self.out=nn.Linear(hidden_size,output_size)
        
        self.attn=Attn(attn_model,hidden_size)
    def forward(self,input_step,last_hidden,encoder_outputs):
        embedded=self.embedding(input_step)
        embedded=self.embedding_dropout(embedded)
        rnn_output,hidden=self.gru(embedded,last_hidden)

        attn_weights=self.attn(rnn_output,encoder_outputs)
        context=attn_weights.bmm(encoder_outputs.transpose(0,1))
        rnn_output=rnn_output.squeeze(0)
        context=context.squeeze(1)
        concat_input=torch.cat((rnn_output,context),1)
        concat_output=torch.tanh(self.concat(concat_input))
        
        output=self.out(concat_output)
        output=F.softmax(output,dim=1)
        
        return output,hidden
_decoder=LuongAttnDecoderRNN("dot",embedding,hidden_size,voc.num_words,n_layers=2)
_decoder_input_step=torch.LongTensor([[SOS_token for _ in range(small_batchsize)]])
_decoder_hidden=_encoder_hidden[:_decoder.n_layers]
_decoder_output,_decoder_hidden=_decoder.forward(_decoder_input_step,_decoder_hidden,_encoder_outputs)

In [13]:
def maskNLLLoss(inp,target,mask):
    nTotal=mask.sum()
    crossEntropy=-torch.log(torch.gather(inp,1,target.view(-1,1)).squeeze(1))
    loss=crossEntropy.masked_select(mask).mean()
    loss=loss.to(device)
    return loss,nTotal.item()

def train(input_variable,lengths,target_variable,mask,max_target_len,encoder,decoder,embedding,
         encoder_optimizer,decoder_optimizer,batch_size,clip,max_length=MAX_LENGTH):
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    input_variable=input_variable.to(device)
    lengths=lengths.to(device)
    target_variable=target_variable.to(device)
    mask=mask.to(device)
    
    loss=0
    print_losses=[]
    n_totals=0
    
    encoder_outputs,encoder_hidden=encoder(input_variable,lengths)
    
    decoder_input=torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    decoder_input=decoder_input.to(device)
    
    decoder_hidden=encoder_hidden[:decoder.n_layers]
    
    use_teacher_forcing=True if np.random.random()<teacher_forcing_ratio else False
    
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output,decoder_hidden=decoder(decoder_input,decoder_hidden,encoder_outputs)
            
            decoder_input=target_variable[t].view(1,-1)
            
            mask_loss,nTotal=maskNLLLoss(decoder_output,target_variable[t],mask[t])
            loss+=mask_loss
            print_losses.append(mask_loss.item()*nTotal)
            
            n_totals+=nTotal
    else:
        for t in range(max_target_len):
            decoder_output,decoder_hidden=decoder(decoder_input,decoder_hidden,encoder_outputs)
            
            _,topi=decoder_output.topk(1)
            
            decoder_input=torch.LongTensor([[topi[i][0] for i in range(batch_size)]])
            decoder_input=decoder_input.to(device)
            
            mask_loss,nTotal=maskNLLLoss(decoder_output,target_variable[t],mask[t])
            loss+=mask_loss
            print_losses.append(mask_loss.item()*nTotal)
            n_totals+=nTotal
    loss.backward()
    
    _=torch.nn.utils.clip_grad_norm_(encoder.parameters(),clip)
    _=torch.nn.utils.clip_grad_norm_(decoder.parameters(),clip)
    
    encoder_optimizer.step()
    decoder_optimizer.step()
    
    return sum(print_losses)/n_totals

In [14]:
import random
class GreedySearchDecoder(nn.Module):
    def __init__(self,encoder,decoder):
        super(GreedySearchDecoder,self).__init__()
        self.encoder=encoder
        self.decoder=decoder
    def forward(self,input_seq,input_length,max_length):
        encoder_output,encoder_hidden=self.encoder(input_seq,input_length)
        decoder_hidden=encoder_hidden[:self.decoder.n_layers]
        
        decoder_input=torch.ones(1,1,device=device,dtype=torch.long)*SOS_token
        
        all_tokens=torch.zeros([0],device=device,dtype=torch.long)
        all_scores=torch.zeros([0],device=device)
        
        for _ in range(max_length):
            decoder_output,decoder_hidden=self.decoder(decoder_input,decoder_hidden,encoder_output)
            decoder_scores,decoder_input=torch.max(decoder_output,dim=1)
            
            all_tokens=torch.cat((all_tokens,decoder_input),dim=0)
            all_scores=torch.cat((all_scores,decoder_scores),dim=0)
            
            decoder_input=torch.unsqueeze(decoder_input,0)
        return all_tokens,all_scores
    
def trainIters(model_name,voc,pairs,encoder,decoder,encoder_optimizer,decoder_optimizer,
              embedding,encoder_n_layers,decoder_n_layers,save_dir,n_iteration,batch_size,
              print_every,save_every,clip,corpus_name,loadFileName):
    '''print("Loading batches...")
    train_batches=[batch2TrianData(voc,[random.choice(pairs) for _ in range(batch_size)])
                  for _ in range(n_iteration)]'''
    
    print("Initializing...")
    start_iteration=1
    print_loss=0
    if loadFileName:
        start_iteration=checkpoint['iteration']+1
    
    decoder_searcher=GreedySearchDecoder(encoder,decoder)
    print("Training...")
    for iteration in range(start_iteration,n_iteration+1):
        training_batch=batch2TrianData(voc,[random.choice(pairs) for _ in range(batch_size)])
        
        input_variable,lengths,target_variable,mask,max_target_len=training_batch

        loss=train(input_variable,lengths,target_variable,mask,max_target_len,encoder,
                  decoder,embedding,encoder_optimizer,decoder_optimizer,batch_size,clip)
        
        print_loss+=loss
        
        if iteration%print_every==0:
            print_loss_avg=print_loss/print_every
            print("Iteration:{}\t Percent complete: {:.1f}%\tAverage loss:{:.4f}".format(
                iteration,iteration/n_iteration*100,print_loss_avg))
            print_loss=0
        if iteration%save_every==0:
            
            test_inp,test_len,test_target,test_max_len=input_variable[:,0:1],lengths[0:1],target_variable[:,0:1],max_target_len

            test_out,test_scores=decoder_searcher.forward(test_inp.to(device),test_len.to(device),test_max_len)

            test_inp_list=[voc.index2word[i] for i in test_inp.numpy()[:,0]]
            test_target_list=[voc.index2word[i] for i in test_target.numpy()[:,0]]
            test_out_list=[voc.index2word[i] for i in test_out.cpu().numpy()]
            
            test_inp_str=""
            test_target_str=""
            test_out_str=""
            
            for w in test_inp_list:test_inp_str+=w+" "
            for w in test_target_list:test_target_str+=w+" "
            for w in test_out_list:test_out_str+=w+" "
                
            print("="*30)
            print("[In] : "+test_inp_str)
            print("[Target] : "+test_target_str)
            print("[Out] : "+test_out_str)
            
            directory=os.path.join(save_dir,model_name,corpus_name,"{}-{}_{}".format(encoder_n_layers,decoder_n_layers,hidden_size))
            if not os.path.exists(directory):
                os.makedirs(directory)
            torch.save({
                    'iteration':iteration,
                    'encoder':encoder.state_dict(),
                    'decoder':decoder.state_dict(),
                    'encoder_optimizer':encoder_optimizer.state_dict(),
                    'decoder_optimizer':decoder_optimizer.state_dict(),
                    'loss':loss,
                    'voc_dict':voc.__dict__,
                    'embedding':embedding.state_dict()
                },os.path.join(directory,'{}_{}.tar'.format(iteration,"checkpoint")))

In [16]:
model_name="seq2seq_model"
attn_model="dot"
 
hidden_size=500
encoder_n_layers=2
decoder_n_layers=2
dropout=0.1
batch_size=256

loadFileName="data/save/seq2seq_model/cornell movie-dialogs corpus/2-2_500/50000_checkpoint-baseline.tar"## loading my pretrained model
#loadFileName=None ##training your own model
checkpoint_iter=4000

if loadFileName:
    checkpoint=torch.load(loadFileName)
    encoder_sd=checkpoint["encoder"]
    decoder_sd=checkpoint["decoder"]
    encoder_optimizer_sd=checkpoint["encoder_optimizer"]
    decoder_optimizer_sd=checkpoint["decoder_optimizer"]
    embedding_sd=checkpoint["embedding"]
    voc.__dict__=checkpoint["voc_dict"]
embedding=nn.Embedding(voc.num_words,hidden_size)
if loadFileName:
    embedding.load_state_dict(embedding_sd)
encoder=EncoderRNN(hidden_size,embedding,encoder_n_layers,dropout)
decoder=LuongAttnDecoderRNN(attn_model,embedding,hidden_size,voc.num_words,decoder_n_layers,dropout)
if loadFileName:
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
    
encoder=encoder.to(device)
decoder=decoder.to(device)
print("Preparing encoder decoder done...")
clip=50.
teacher_forcing_ratio=1.
learning_rate=0.0001
decoder_learning_ratio=5.0
n_iteration=50000
save_every=1000
print_every=500
save_dir = os.path.join("data", "save")

encoder.train()
decoder.train()

encoder_optimizer=torch.optim.Adam(encoder.parameters(),lr=learning_rate)
decoder_optimizer=torch.optim.Adam(decoder.parameters(),lr=learning_rate*decoder_learning_ratio)

if loadFileName:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)

Preparing encoder decoder done...


In [78]:


trainIters(model_name,voc,pairs,encoder,decoder,encoder_optimizer,decoder_optimizer,
          embedding,encoder_n_layers,decoder_n_layers,save_dir,n_iteration,batch_size,print_every,
          save_every,clip,corpus_name,loadFileName)


Preparing encoder decoder done...
Initializing...
Training...
Iteration:500	 Percent complete: 1.0%	Average loss:4.0027
Iteration:1000	 Percent complete: 2.0%	Average loss:3.3212
[In] : i just don t think it would work . EOS 
[Target] : why wouldn t it ? EOS PAD PAD PAD PAD 
[Out] : i know . EOS . EOS . EOS . EOS 
Iteration:1500	 Percent complete: 3.0%	Average loss:3.0322
Iteration:2000	 Percent complete: 4.0%	Average loss:2.7486
[In] : right . well thanks . for the lift . EOS 
[Target] : no sweat . only do me a favor ? EOS 
[Out] : thank you . EOS me . EOS me . EOS 
Iteration:2500	 Percent complete: 5.0%	Average loss:2.4806
Iteration:3000	 Percent complete: 6.0%	Average loss:2.1994
[In] : shit too bad we in base camp anyway . EOS 
[Target] : what you got there beers ? EOS PAD PAD PAD 
[Out] : what s the matter ? EOS deep dead . EOS 
Iteration:3500	 Percent complete: 7.0%	Average loss:1.9343
Iteration:4000	 Percent complete: 8.0%	Average loss:1.6916
[In] : you re one of william s kids 

In [17]:
encoder.train(mode=False)
decoder.train(mode=False)
def chatbot_question(q):
    q=normalizeString(q)
    q_seq=[]
    for w in q.split(" "):
        if w not in voc.word2index:
            raise ValueError(w,"not in vocabulary.")
        q_seq.append(voc.word2index[w])
    q_seq=np.array(q_seq)
    q_length=np.array(len(q_seq))
    q_seq=torch.LongTensor(np.expand_dims(q_seq,-1))
    q_length=torch.LongTensor(np.expand_dims(q_length,-1))
    q_seq=q_seq.to(device)
    q_length=q_length.to(device)
    
    encoder_output,encoder_hidden=encoder.forward(q_seq,q_length)
    
    decoder_hidden=encoder_hidden[:decoder.n_layers]
    decoder_input=torch.LongTensor(np.zeros((1,1))+SOS_token)
    decoder_input=decoder_input.to(device)
    
    answer_tokens=[]
    for t in range(MAX_LENGTH):
        decoder_output,decoder_hidden=decoder(decoder_input,decoder_hidden,encoder_output)
        
        token_score,token_idx=torch.max(decoder_output,dim=1)
        answer_tokens.append(token_idx.cpu().numpy()[0])
        
        decoder_input=torch.unsqueeze(token_idx,1)
        
        if token_idx==EOS_token:
            break
    return ' '.join([voc.index2word[i] for i in answer_tokens])
q="how are you ?"
a=chatbot_question(q)
print(a)

fine . EOS


In [20]:
qs=["How are you? Frank","I'm fine. Mary.",
    "What's wrong?","What happened?",
    "It's a nice day!","I like the weather.",
    "You are such a sweet girl.","He is the best teacher I have seen.",
    "Let's do the cleaning.","Let's go!",
   "Can you pay for me?","I want that car.",
    "It feels good to talk to a man.","Being iron man is good.",
    "I want to be the king.","I want to have a cigarette."]
for q in qs:
    a=chatbot_question(q)
    print("[Question] "+q,end="\t")
    print("[Bot] "+a)
    #print("="*30)

[Question] How are you? Frank	[Bot] fine fine . EOS
[Question] I'm fine. Mary.	[Bot] so am i . . . EOS
[Question] What's wrong?	[Bot] nothing . EOS
[Question] What happened?	[Bot] i don t know ! EOS
[Question] It's a nice day!	[Bot] oh . . . EOS
[Question] I like the weather.	[Bot] i like such a different world . EOS
[Question] You are such a sweet girl.	[Bot] you think i better twenty a girl ? EOS
[Question] He is the best teacher I have seen.	[Bot] there s a lot of jack . EOS
[Question] Let's do the cleaning.	[Bot] we re getting married . EOS
[Question] Let's go!	[Bot] okay . let s go . EOS
[Question] Can you pay for me?	[Bot] that s okay . EOS
[Question] I want that car.	[Bot] what s this ? EOS
[Question] It feels good to talk to a man.	[Bot] you re a certain son son . EOS
[Question] Being iron man is good.	[Bot] faith . EOS
[Question] I want to be the king.	[Bot] that s the last week . EOS
[Question] I want to have a cigarette.	[Bot] i am . EOS
