In [1]:
import os
import re
import codecs
import csv
import collections
import unicodedata
import itertools
import torch
from torch import nn, optim
import torch.nn.functional as F
import pickle
import random

In [2]:
USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")

In [3]:
data_file = "cornell_movie_dialogs_corpus"
data = os.path.join('data', data_file)

#### Print first few lines of dataset

In [4]:
def print_lines(dataset):
    with open(dataset, encoding='iso-8859-1') as file:
        print("\n".join(file.readlines()[:5]))
print("First few lines in movie_lines.txt")
print_lines(os.path.join(data,"movie_lines.txt"))
print("First few lines in movie converstations file")
print_lines(os.path.join(data, "movie_conversations.txt"))

First few lines in movie_lines.txt
L1045 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ They do not!

L1044 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ They do to!

L985 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ I hope so.

L984 +++$+++ u2 +++$+++ m0 +++$+++ CAMERON +++$+++ She okay?

L925 +++$+++ u0 +++$+++ m0 +++$+++ BIANCA +++$+++ Let's go.

First few lines in movie converstations file
u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L194', 'L195', 'L196', 'L197']

u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L198', 'L199']

u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L200', 'L201', 'L202', 'L203']

u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L204', 'L205', 'L206']

u0 +++$+++ u2 +++$+++ m0 +++$+++ ['L207', 'L208']



### untils functions to process coversations in the files

In [5]:
def load_lines_to_dict(file_name, fields):
    """
    parse attributes of lines into dict
    """
    lines = {}
    with open(file_name, 'r', encoding='iso-8859-1') as file:
        for line in file:
            lineObj = {}
            attrs = line.split(' +++$+++ ')
            for j, field in enumerate(fields):
                lineObj[field] = attrs[j]
            lines[lineObj["lineID"]] = lineObj
    return lines
                

def load_conversations(file_name, fields, lines):
    """
    group each conversation into dict
    """
    conversations = []
    with open(file_name, 'r', encoding='iso-8859-1') as file:
        for line in file:
            convObj={}
            attrs = line.split(' +++$+++ ')
            for i, field in enumerate(fields):
                convObj[field] = attrs[i]
            regex = re.compile('L[0-9]+')
            lineIds = regex.findall(convObj["utteranceIDs"])
            convObj['lines'] = []
            for lineId in lineIds:
                convObj['lines'].append(lines[lineId])
            conversations.append(convObj)
    return conversations


def load_conv_pairs(conversations):
    """
    get each pairs of utterence and reply from conversations
    """
    conv_pairs = []
    for convObj in conversations:
        for i in range(len(convObj['lines'])-1):
            utter = convObj['lines'][i]["text"].strip()
            reply = convObj['lines'][i+1]["text"].strip()
            if utter and reply:
                conv_pairs.append([utter, reply])
    return conv_pairs

In [5]:
corpus = os.path.join('data', 'formated_corpus.txt')

MOVIE_LINES_FIELDS = ["lineID", "characterID", "movieID", "character", "text"]
MOVIE_CONVERSATIONS_FIELDS = ["character1ID", "character2ID", "movieID", "utteranceIDs"]

print("\nProcessing corpus...")
lines = load_lines_to_dict(os.path.join(data,"movie_lines.txt"), MOVIE_LINES_FIELDS)
print('\n loading conversations')
conversations = load_conversations(os.path.join(data, "movie_conversations.txt"), MOVIE_CONVERSATIONS_FIELDS,
                                   lines)



Processing corpus...

 loading conversations


In [6]:
delimiter = '\t'
delimiter = str(codecs.decode(delimiter, "unicode_escape"))
print('writing conversation pair to csv file')
with open(corpus, 'w', encoding='utf-8') as file:
    writer = csv.writer(file, delimiter=delimiter, lineterminator='\n')
    for pair in load_conv_pairs(conversations):
        writer.writerow(pair)

writing conversation pair to csv file


In [6]:
print_lines(corpus)

NameError: name 'corpus' is not defined

In [7]:
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token
MAX_length = 10

class Vocab:
    def __init__(self):
        self.voc = []
        self.word_to_index = {'PAD': PAD_token, 'SOS': SOS_token, 'EOS': EOS_token }
        self.index_to_word = {}
        self.trim = True
        self.min_count = 3
        self.max_length = 10
        self.vocab_pairs = []
    

    def unicodeToAscii(self, s):
        return ''.join(
            c for c in unicodedata.normalize('NFD', s)
            if unicodedata.category(c) != 'Mn'
        )
    
    def normalizeSentence(self, sentence):
        s = self.unicodeToAscii(sentence.lower().strip())
        s = re.sub(r"([.!?])", r" \1", s)
        s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
        s = re.sub(r"\s+", r" ", s).strip()
        return s
    
    def update_vocab(self, sentence):
        sentence = self.normalizeSentence(sentence)
        self.voc.extend(sentence.split())
    
    def generate_vocab(self):
        reserve_words_count = len(self.word_to_index)
        word_count = dict(collections.Counter(self.voc).most_common())
        if self.trim:
            word_count = dict(filter(lambda x: x[1] >= self.min_count, word_count.items()))
        self.word_to_index.update({word:i+reserve_words_count for i, word in enumerate(word_count.keys())})
        self.index_to_word = {i:word for word, i in self.word_to_index.items()}
    
    def filter_pairs(self, pair):
        pair[0] = self.normalizeSentence(pair[0])
        pair[1] = self.normalizeSentence(pair[1])
        eliminate = False
        if (len(pair[0].split()) >= self.max_length) or (len(pair[1].split()) >= self.max_length):
            return
        for sentence in pair:
            if len(set(sentence.split()) - self.word_to_index.keys()) > 0:
                eliminate = True
        if not eliminate:
            self.vocab_pairs.append(pair)

In [11]:
## Pickle file available
# voc = Vocab()
# with open(corpus,'r',encoding='utf-8') as file:
#     for line in file:
#         pair = line.split('\t')
#         voc.update_vocab(pair[0])
#         voc.update_vocab(pair[1])
# voc.generate_vocab()

In [12]:
# with open(corpus, 'r', encoding='utf-8') as file:
#     for line in file:
#         pair = line.split('\t')
#         voc.filter_pairs(pair)

In [8]:
# with open('voc.pickle', 'wb') as file:
#     pickle.dump(voc, file)
with open('voc.pickle', 'rb') as file:
    voc = pickle.load(file)

In [9]:
for pairs in voc.vocab_pairs[:10]:
    print(pairs)

['there .', 'where ?']
['you have my word . as a gentleman', 'you re sweet .']
['hi .', 'looks like things worked out tonight huh ?']
['you know chastity ?', 'i believe we share an art instructor']
['have fun tonight ?', 'tons']
['well no . . .', 'then that s all you had to say .']
['then that s all you had to say .', 'but']
['but', 'you always been this selfish ?']
['do you listen to this crap ?', 'what crap ?']
['what good stuff ?', 'the real you .']


In [10]:
class TextTransform:
    def __init__(self):
        pass
    
    def index_from_sentence(self, word_to_index, sentence):
        return [word_to_index[word] for word in sentence.split()] + [EOS_token]
    
    def zero_padding(self, encoded_list, fill_value=PAD_token):
        return list(itertools.zip_longest(*encoded_list, fillvalue=PAD_token))
    
    def mask_matrix(self, l, value=PAD_token):
        m = []
        for i, seq in enumerate(l):
            m.append([])
            for index in seq:
                if index==value:
                    m[i].append(0)
                else:
                    m[i].append(1)
        return m
     
    def inputVar(self, voc, l):
        index_batch = [self.index_from_sentence(voc, sentence) for sentence in l]
        lengths = torch.tensor([len(indexes) for indexes in index_batch])
        zero_pad = self.zero_padding(index_batch)
        padVar = torch.LongTensor(zero_pad)
        return padVar, lengths
    
    def outputVar(self, voc, l):
        index_batch = [self.index_from_sentence(voc, sentence) for sentence in l]
        max_target_length = max([len(indexes) for indexes in index_batch])
        zero_pad = self.zero_padding(index_batch)
        mask = self.mask_matrix(zero_pad)
        mask = torch.BoolTensor(mask)
        padVar = torch.LongTensor(zero_pad)
        return padVar, mask, max_target_length
    
    def batch2Train(self, voc, pair_batch):
        pair_batch.sort(key = lambda x:len(x[0].split()), reverse = True)
        input_batch, output_batch = [], []
        for pair in pair_batch:
            input_batch.append(pair[0])
            output_batch.append(pair[1])
        inp, lengths = self.inputVar(voc, input_batch)
        out, out_mask, target_length = self.outputVar(voc, output_batch)
        return inp, lengths, out, out_mask, target_length

In [11]:
textTransform = TextTransform()

In [12]:
small_batch_size = 5
batches = textTransform.batch2Train(voc.word_to_index, voc.vocab_pairs[:5])
input_variable, lengths, target_variable, mask, max_target_len = batches

print("input_variable:", input_variable)
print("lengths:", lengths)
print("target_variable:", target_variable)
print("mask:", mask)
print("max_target_len:", max_target_len)

input_variable: tensor([[    4,     4,    30,    47,   412],
        [   30,    28,   590,     3,     3],
        [   32, 10972,   282,     2,     2],
        [  370,     6,     6,     0,     0],
        [    3,     2,     2,     0,     0],
        [   77,     0,     0,     0,     0],
        [    9,     0,     0,     0,     0],
        [ 1604,     0,     0,     0,     0],
        [    2,     0,     0,     0,     0]])
lengths: tensor([9, 5, 5, 3, 3])
target_variable: tensor([[    4,     5,  4241,    86,   381],
        [   29,   172,     2,     6,    45],
        [  753,    21,     0,     2,   161],
        [    3,  1094,     0,     0,   660],
        [    2,    83,     0,     0,    51],
        [    0,   987,     0,     0,   282],
        [    0, 13728,     0,     0,   211],
        [    0,     2,     0,     0,     6],
        [    0,     0,     0,     0,     2]])
mask: tensor([[ True,  True,  True,  True,  True],
        [ True,  True,  True,  True,  True],
        [ True,  True, Fal

In [13]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding
            
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, 
                          dropout = (0 if n_layers == 1 else dropout),
                         bidirectional = True)
        
    def forward(self, input_seq, input_length, hidden = None):
        embedded = self.embedding(input_seq)
        packed = nn.utils.rnn.pack_padded_sequence(embedded, input_length)
        outputs, hidden = self.gru(packed, hidden)
        outputs, _ = nn.utils.rnn.pad_packed_sequence(outputs)
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        return outputs, hidden      

In [14]:
class Attn(nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        if self.method not in ['dot', 'general', 'concat']:
            raise ValueError(self.method, "is not an appropriate attention method.")
        
        self.hidden_size = hidden_size
        if self.method == "general":
            self.attn = nn.Linear(self.hidden_size, hidden_size)
        elif self.method == "concat":
            self.attn = nn.Linear(self.hidden_size * 2, hidden_size)
            self.v = torch.FloatTensor(hidden_size)
        
    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(endoder_output)
        return torch.sum(energy * hidden, dim=2)

    def concat_score(self, hidden, encoder_output):
        energy = self.attn(torch.cat((hidden.expand(encoder_output.size(0), -1, -1), 
                                      encoder_output), 2)).tanh()
        return torch.sum(self.v * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        if self.method == "general":
            attn_energies = self.general_score(hidden, encoder_outputs)
        elif self.method == "concat":
            attn_energies = self.concat_score(hidden, encoder_outputs)
        elif self.method == "dot":
            attn_energies = self.dot_score(hidden, encoder_outputs)

        attn_energies = attn_energies.t()

        return F.softmax(attn_energies, dim=1).unsqueeze(1)

In [15]:
class LuongAttnDecoderRNN(nn.Module):
    def __init__(self, attn_model, embedding, hidden_size, output_size,  n_layers=1, dropout=0.1):
        super(LuongAttnDecoderRNN, self).__init__()
        
        self.attn_model = attn_model
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout
        self.hidden_size = hidden_size
        
        self.embedding = embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout = (0 if n_layers==1 else dropout))
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.attn = Attn(attn_model, hidden_size)
        
    
    def forward(self, input_step, last_hidden, encoder_outputs):
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)
        
        rnn_output, decoder_hidden = self.gru(embedded, last_hidden)
        attn_weights = self.attn(rnn_output, encoder_outputs)
        context = attn_weights.bmm(encoder_outputs.transpose(0,1))
        rnn_output = rnn_output.squeeze(0)
        context = context.squeeze(1)
        concat_input = torch.cat((rnn_output, context), 1)
        concat_output = torch.tanh(self.concat(concat_input))
        
        output = self.out(concat_output)
        output = F.softmax(output, dim=1)
        
        return output, decoder_hidden
        
        
    

In [17]:
hidden_size = 100
n_layers = 2
dropout = 0.1
batch_size = 5
num_words = len(voc.word_to_index)
embedding = nn.Embedding(num_words, 100)

encoder = EncoderRNN(100, embedding)

outputs, hidden = encoder(input_variable, lengths)

decoder_hidden = hidden[::2]
decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
decoder_input = embedding(decoder_input)

In [445]:
attn = Attn('concat', 100)
gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
rnn_output, _hidden = gru(decoder_input, decoder_hidden)
scores = attn(rnn_output, outputs)
torch.cat((rnn_output.expand(1, 5,100), outputs[:1,:,:]), 2).shape
#rnn_output.squeeze(0).shape

torch.Size([1, 5, 100])

In [400]:
out = nn.Linear(hidden_size, num_words)
concat = nn.Linear(hidden_size * 2, hidden_size)
context = scores.bmm(outputs.transpose(0,1))
rnn_output = rnn_output.squeeze(0)
context = context.squeeze(1)
concat_output = torch.cat((rnn_output, context), 1)
concat_output = torch.tanh(concat(concat_output))

output = out(concat_output)
output = F.softmax(output, dim=1)

In [401]:
output.shape

torch.Size([5, 28204])

In [402]:
t = torch.tensor([[1,2],[3,4]])
torch.gather(t, 1, torch.tensor([[0],[1]]))

tensor([[1],
        [4]])

In [18]:
def maskNLLLoss(inp, target, mask):
    nTotal = mask.sum()
    crossEntropy = -torch.log(torch.gather(inp, 1 , target.view(-1,1)).squeeze(1))
    loss = crossEntropy.masked_select(mask).mean()
    loss = loss.to(device)
    return loss, nTotal.item()

In [19]:
def train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding,
          encoder_optimizer, decoder_optimizer, batch_size, clip):
    
    loss = 0
    n_total = 0
    losses_time_step = []
    
    input_variable = input_variable.to(device)
    lengths = lengths.to(device)
    target_variable = target_variable.to(device)
    mask = mask.to(device)
    
    
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    encoder_outputs, encoder_hidden, _ = encoder(input_variable, lengths)
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    decoder_hidden = encoder_hidden[::2]
    decoder_input = torch.LongTensor([[SOS_token for _ in range(batch_size)]])
    
    if use_teacher_forcing:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
            decoder_input = target_variable[t].view(1,-1)
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            losses_time_step.append(mask_loss * nTotal)
            n_total += nTotal
            loss+=mask_loss
        
    else:
        for t in range(max_target_len):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden, encoder_outputs)
            _, top_i = decoder_output.topk(1)
            decoder_input = torch.LongTensor([[top_i[i][0] for i in range(batch_size)]])
            decoder_input = decoder_input.to(device)
            mask_loss, nTotal = maskNLLLoss(decoder_output, target_variable[t], mask[t])
            loss_time_step.append(mask_loss * nTotal)
            n_total += nTotal
            loss+=mask_loss
    
    loss.backward()
    
    _ = nn.utils.clip_grad_norm(encoder.parameters(), clip)
    _ = nn.utils.clip_grad_norm(decoder.parameters(), clip)
    
    
    encoder_optimizer.step()
    decoder_optimizer.step()
    
    return sum(losses_time_step)/n_total

In [20]:
def iterTrain(model_name, voc, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding, encoder_n_layers, decoder_n_layers, save_dir, n_iteration, batch_size, 
              print_every, save_every, clip, corpus_name, loadFilename):
    
    training_batches = [textTransform.batch2Train(voc.word_to_index, [random.choice(voc.vocab_pairs) for _ in range(batch_size)])
                        for _ in range(n_iteration)]
    
    print("Initializing ...")
    start_iter = 1
    if loadFilename:
        start_iter = checkpoint["iteration"] + 1
    
    print_loss = 0
    for iteration in range(start_iter, n_iteration+1):
        training_batch = training_batches[iteration-1]
        input_variable, lengths, target_variable, mask, max_target_length = training_batch
        loss = train(input_variable, lengths, target_variable, mask, max_target_len, encoder,
                     decoder, embedding, encoder_optimizer, decoder_optimizer, batch_size, clip)
        
        print_loss += loss
        
        if iteration % print_every == 0:
            print_avg_loss = print_loss/print_every
            print("Iteration: {}; Percent complete: {:.1f}%; Average loss: {:.4f}"
            .format(iteration, iteration / n_iteration * 100, print_avg_loss))
            print_loss = 0
            
        
        if iteration % save_every == 0:
            directory = os.path.join(save_dir, model_name, corpus_name, "{}-{}-{}" 
                                     .format(encoder_n_layers, decoder_n_layers, hidden_size))
            
            if not os.path.exist(directory):
                os.makedirs(directory)
            torch.save({
                'iteration': iteration,
                'en': encoder.state_dict(),
                'de': decoder.state_dict(),
                'en_opt': encoder_optimizer.state_dict(),
                'de_opt': decoder_optimizer.state_dict(),
                'loss': loss,
                'voc_dict': voc.__dict__,
                'embedding': embedding.state_dict()
            }, os.path.join(directory, '{}_{}.tar'.format(iteration, 'checkpoint')))

In [470]:
class GreedySearchDecoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
            
    def forward(self, input_seq, length, max_output_length):
        encoder_outputs, encoder_hidden = self.encoder(input_seq, length)
        decoder_hidden = encoder_hidden[::2]

        decoder_input = torch.LongTensor([[SOS_token]], device=device)

        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)

        for _ in range(max_length):
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
            decoder_score, deoder_input = torch.max(decoder_output, dim=1)
            all_tokens = torch.cat((all_tokens, decoder_input), dim = 0)
            all_scores = torch.cat((all_scores, decoder_score), dim = 0)

            decoder_input = torch.unsqueeze(decoder_input, dim = 0)

        return all_tokens, all_scores

In [471]:
def evaluate(encoder, decoder, searcher, voc, sentence, max_length=MAX_length):
    index_batch = [textTransform.index_from_sentence(sentence)]
    length = torch.Tensor([len(indexes) for index in index_batch])
    
    input_batch = torch.LongTensor(index_batch).traspose(0,1)
    
    input_batch = input_batch.to(device)
    length = length.to(device)
    
    all_tokens, all_scores = searcher(input_batch, length, max_length)
    
    decoded_words = [voc.index_to_word[token.item()] for token in all_tokens]
    return decoded_words

def evaluate_input(encoder, decoder, searcher):
    input_sentence = ''
    while(1):
        try:
            input_sentence = input('>')
            if input_sentence == 'q' or input_sentence == 'quit':
                break
            input_sentence = voc.normalizeSentence(input_sentence)
            output_words = searcher(encoder, decoder, input_sentence, max_length)
            output_words[:] = [x for x in output_words if not (x == 'EOS' or x == 'PAD')]
            
            print('BOT: {}'.format(' '.join(output_words)))
        
        except KeyError:
            print("Error: Encountered unknown word")
        

In [None]:
model_name = 'cb_model'
attn_model = 'dot'
hidden_size = 500
encoder_n_layer = 2
decoder_n_layer = 2
dropout = 1
batch_size = 64

loadFileName = None
checkpoint_iter = 4000

if loadFileName:
    checkpoint = torch.load(loadFileName)
    
    encoder_sd = checkpoint["en"]
    decoder_sd = checkpoint["de"]
    
    encoder_optimizer_sd = checkpoint["en_opt"]
    decoder_optimizer_sd = checkpoint["de_opt"]
    
    embedding_sd = checkpoint["embedding"]
    voc.__dict__ = checkpoint['voc_dict']
    

print("Building encoder and decoder...")

embedding = nn.Embedding(num_words, hidden_size)
encoder = EncoderRNN(hidden_size, embedding, n_layers, dropout)
decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, 
                              decoder_n_layer, num_words, dropout)

if loadFileName:
    embedding = embedding.load_state_dict(embedding_sd)
    encoder = encoder.load_state_dict(encoder_sd)
    decoder = decoder.load_state_dict(decoder_sd)

encoder = encoder.to(device)
decoder = decoder.to(device)

print("Model is ready for training!!!")
    


Building encoder and decoder...


In [458]:
clip = 50.0
teacher_forcing_ratio = 1.0
learning_rate = 0.0001
decoder_learning_ratio = 5.0
n_iterations = 4000
print_every = 1
save_every = 500
save_dir = 'models'
corpus_name = "cornell_movie_dialogs_corpus"


encoder.train()
decoder.train()

print("Building optimizers")
encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate * decoder_learning_ratio)

if loadFileName:
    encoder_optimizer.load_state_dict(encoder_optimizer_sd)
    decoder_optimizer.load_state_dict(decoder_optimizer_sd)


for state in encoder_optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.cuda()

for state in decoder_optimizer.state.values():
    for k, v in state.items():
        if isinstance(v, torch.Tensor):
            state[k] = v.cuda()


Building optimizers


In [459]:
# train(input_variable, lengths, target_variable, mask, max_target_len, encoder, decoder, embedding, encoder_optimizer
#       ,decoder_optimizer, 5, clip)

In [463]:
iterTrain(model_name, voc, encoder, decoder, encoder_optimizer, decoder_optimizer, embedding,
         encoder_n_layer, decoder_n_layer, save_dir, n_iterations, batch_size, print_every,
         save_every, clip, corpus_name, loadFileName)

Initializing ...




Iteration: 1; Percent complete: 0.0%; Average loss: 10.2326
Iteration: 2; Percent complete: 0.1%; Average loss: 10.2289
Iteration: 3; Percent complete: 0.1%; Average loss: 10.2277
Iteration: 4; Percent complete: 0.1%; Average loss: 10.2222
Iteration: 5; Percent complete: 0.1%; Average loss: 10.2183
Iteration: 6; Percent complete: 0.1%; Average loss: 10.2137
Iteration: 7; Percent complete: 0.2%; Average loss: 10.2096
Iteration: 8; Percent complete: 0.2%; Average loss: 10.1951
Iteration: 9; Percent complete: 0.2%; Average loss: 10.1967


KeyboardInterrupt: 

In [362]:
len(voc.vocab_pairs)

140800