In [1]:
import unicodedata
import re
import math
import psutil
import time
import datetime
from io import open
import random
from random import shuffle
import argparse
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import torch.cuda

import sys; sys.argv=['']; del sys

In [2]:
use_cuda = torch.cuda.is_available()
print(use_cuda)

False


In [3]:

def uniToAscii(sentence):
    return ''.join(
        c for c in unicodedata.normalize('NFD', sentence)
        if unicodedata.category(c) != 'Mn'
    )


"""Lowercase etc, kan worden uitgebreid naar dataset requirements""" 
def normalizeString(s):

    s = uniToAscii(s.lower().strip())

    return s   

'''eventueel te gebruiken als je bepaalde starttoken zinnen te verwijderen'''
prefix_filtered = (
)

"""Filters pair over max length en eventueel gespecificeerde start characters prefix_filtered"""
def filterPair(p, max_length, start_filter):
    filtered = len(p[0].split(' ')) < max_length and \
        len(p[1].split(' ')) < max_length 
    if start_filter:
        return filtered and p[1].startswith(prefixes_filter)
    else:
        return filtered

"""filter pairs (pytorch tutorial)"""
def filterPairs(pairs, max_length, start_filter):
    return [pair for pair in pairs if filterPair(pair, max_length, start_filter)]


In [4]:
"""start of sentence """
SOS_token = 0

"""end of sentence"""
EOS_token = 1

"""unknown word"""
UNK_token = 2


"""Lang class, storing in pytorch"""
class Lang:
    def __init__(self, language):
        self.language_name = language
        self.word_to_index = {"SOS":SOS_token, "EOS":EOS_token, "<UNK>":UNK_token}
        self.word_to_count = {}
        self.index_to_word = {SOS_token: "SOS", EOS_token: "EOS", UNK_token: "<UNK>"}
        self.vocab_size = 3
        self.cutoff_point = -1


    def countSentence(self, sentence):
        for word in sentence.split(' '):
            self.countWords(word)

    """aantal woorden in dataset"""
    def countWords(self, word):
        if word not in self.word_to_count:
            self.word_to_count[word] = 1
        else:
            self.word_to_count[word] += 1

    """als max aantal woorden > aantal woorden worden infrequente woorden verwijderd uit vocab"""
    def createCutoff(self, max_vocab_size):
        word_freqs = list(self.word_to_count.values())
        word_freqs.sort(reverse=True)
        if len(word_freqs) > max_vocab_size:
            self.cutoff_point = word_freqs[max_vocab_size]

    """woord -> index"""
    def addSentence(self, sentence):
        new_sentence = ''
        for word in sentence.split(' '):
            unk_word = self.addWord(word)
            if not new_sentence:
                new_sentence =unk_word
            else:
                new_sentence = new_sentence + ' ' + unk_word
        return new_sentence

    """woord--> vocab""" 
    def addWord(self, word):
        if self.word_to_count[word] > self.cutoff_point:
            if word not in self.word_to_index:
                self.word_to_index[word] = self.vocab_size
                self.index_to_word[self.vocab_size] = word
                self.vocab_size += 1
            return word
        else:
            return self.index_to_word[2]

In [5]:
'''prepares input & output naar Lang van .txt dataset'''

def prepareLangs(lang1, lang2, file_path, reverse=False):
    print("Reading lines...")
    lines = open(file_path, encoding='utf-8').\
    read().strip().split('\n')
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]

    if reverse:
        pairs = [list(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [6]:

def prepareData(lang1, lang2, file_path, max_vocab_size=50000, 
                reverse=False, trim=0, start_filter=False, perc_train_set=0.9, 
                print_to=None):
    
    input_lang, output_lang, pairs = prepareLangs(lang1, lang2, 
                                                  file_path, reverse)
    
    print("Read %s sentence pairs" % len(pairs))
    
    if print_to:
        with open(print_to,'a') as f:
            f.write("Read %s sentence pairs \n" % len(pairs))
    
    print("Counting words...")
    for pair in pairs:
        input_lang.countSentence(pair[0])
        output_lang.countSentence(pair[1])


    input_lang.createCutoff(max_vocab_size)
    output_lang.createCutoff(max_vocab_size)

    pairs = [(input_lang.addSentence(pair[0]),output_lang.addSentence(pair[1])) 
             for pair in pairs]

    shuffle(pairs)
    
    train_pairs = pairs[:math.ceil(perc_train_set*len(pairs))]
    test_pairs = pairs[math.ceil(perc_train_set*len(pairs)):]

    print("Train pairs: %s" % (len(train_pairs)))
    print("Test pairs: %s" % (len(test_pairs)))
    print("Counted Words -> Trimmed Vocabulary Sizes (w/ EOS and SOS tags):")
    print("%s, %s -> %s" % (input_lang.language_name, len(input_lang.word_to_count),
                            input_lang.vocab_size,))
    print("%s, %s -> %s" % (output_lang.language_name, len(output_lang.word_to_count), 
                            output_lang.vocab_size))
    print(random.choice(pairs))

    if print_to:
        with open(print_to,'a') as f:
            f.write("Train pairs: %s" % (len(train_pairs)))
            f.write("Test pairs: %s" % (len(test_pairs)))
            f.write("Counted Words -> Trimmed Vocabulary Sizes (w/ EOS and SOS tags):")
            f.write("%s, %s -> %s" % (input_lang.language_name, 
                                      len(input_lang.word_to_count),
                                      input_lang.vocab_size,))
            f.write("%s, %s -> %s \n" % (output_lang.language_name, len(output_lang.word_to_count), 
                            output_lang.vocab_size))
        
    return input_lang, output_lang, train_pairs, test_pairs
    return input_lang, output_lang, train_pairs, test_pairs

In [7]:
"""converts a sentence to one hot encoding vectors"""

def indexesFromSentence(lang, sentence):
    indexes = []
    for word in sentence.split(' '):
        try:
            indexes.append(lang.word_to_index[word])
        except:
            indexes.append(lang.word_to_index["<UNK>"])
    return indexes


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    result = torch.LongTensor(indexes).view(-1)
    if use_cuda:
        return result.cuda()
    else:
        return result
      
"""converts a pair of sentence (input and target) to a pair of tensors"""
def tensorsFromPair(input_lang, output_lang, pair):
    input_variable = tensorFromSentence(input_lang, pair[0])
    target_variable = tensorFromSentence(output_lang, pair[1])
    return (input_variable, target_variable)
  

"""converts from tensor of one hot encoding vector indices to sentence"""
def sentenceFromTensor(lang, tensor):
    raw = tensor.data
    words = []
    for num in raw:
        words.append(lang.index_to_word[num.item()])
    return ' '.join(words)

In [8]:
"""verdeling in batches"""
def batchify(data, input_lang, output_lang, batch_size, shuffle_data=True):
    if shuffle_data == True:
        shuffle(data)
    number_of_batches = len(data) // batch_size
    batches = list(range(number_of_batches))
    longest_elements = list(range(number_of_batches))
    
    for batch_number in range(number_of_batches):
        longest_input = 0
        longest_target = 0
        input_variables = list(range(batch_size))
        target_variables = list(range(batch_size))
        index = 0      
        for pair in range((batch_number*batch_size),((batch_number+1)*batch_size)):
            input_variables[index], target_variables[index] = tensorsFromPair(input_lang, output_lang, data[pair])
            if len(input_variables[index]) >= longest_input:
                longest_input = len(input_variables[index])
            if len(target_variables[index]) >= longest_target:
                longest_target = len(target_variables[index])
            index += 1
        batches[batch_number] = (input_variables, target_variables)
        longest_elements[batch_number] = (longest_input, longest_target)
    return batches , longest_elements, number_of_batches


"""pads batches to allow for sentences of variable lengths to be computed in parallel"""
def pad_batch(batch):
    padded_inputs = torch.nn.utils.rnn.pad_sequence(batch[0],padding_value=EOS_token)
    padded_targets = torch.nn.utils.rnn.pad_sequence(batch[1],padding_value=EOS_token)
    return (padded_inputs, padded_targets)

In [9]:
class EncoderRNN(nn.Module):
	def __init__(self,input_size,hidden_size,layers=1,dropout=0.1,
               bidirectional=True):
		super(EncoderRNN, self).__init__()

		if bidirectional:
			self.directions = 2
		else:
			self.directions = 1
		self.input_size = input_size
		self.hidden_size = hidden_size
		self.num_layers = layers
		self.dropout = dropout
		self.embedder = nn.Embedding(input_size,hidden_size)
		self.dropout = nn.Dropout(dropout)
		self.lstm = nn.LSTM(input_size=hidden_size,hidden_size=hidden_size,
                        num_layers=layers,dropout=dropout,
                        bidirectional=bidirectional,batch_first=False)
		self.fc = nn.Linear(hidden_size*self.directions, hidden_size)

	def forward(self, input_data, h_hidden, c_hidden):
		embedded_data = self.embedder(input_data)
		embedded_data = self.dropout(embedded_data)
		hiddens, outputs = self.lstm(embedded_data, (h_hidden, c_hidden))

		return hiddens, outputs

	"""creates initial hidden states for encoder corresponding to batch size"""
	def create_init_hiddens(self, batch_size):
		h_hidden = Variable(torch.zeros(self.num_layers*self.directions, 
                                    batch_size, self.hidden_size))
		c_hidden = Variable(torch.zeros(self.num_layers*self.directions, 
                                    batch_size, self.hidden_size))
		if torch.cuda.is_available():
			return h_hidden.cuda(), c_hidden.cuda()
		else:
			return h_hidden, c_hidden

In [10]:
class DecoderAttn(nn.Module):
	def __init__(self, hidden_size, output_size, layers=1, dropout=0.1, bidirectional=True):
		super(DecoderAttn, self).__init__()

		if bidirectional:
			self.directions = 2
		else:
			self.directions = 1
		self.output_size = output_size
		self.hidden_size = hidden_size
		self.num_layers = layers
		self.dropout = dropout
		self.embedder = nn.Embedding(output_size,hidden_size)
		self.dropout = nn.Dropout(dropout)
		self.score_learner = nn.Linear(hidden_size*self.directions, 
                                   hidden_size*self.directions)
		self.lstm = nn.LSTM(input_size=hidden_size,hidden_size=hidden_size,
                        num_layers=layers,dropout=dropout,
                        bidirectional=bidirectional,batch_first=False)
		self.context_combiner = nn.Linear((hidden_size*self.directions)
                                      +(hidden_size*self.directions), hidden_size)
		self.tanh = nn.Tanh()
		self.output = nn.Linear(hidden_size, output_size)
		self.soft = nn.Softmax(dim=1)
		self.log_soft = nn.LogSoftmax(dim=1)


	def forward(self, input_data, h_hidden, c_hidden, encoder_hiddens):

		embedded_data = self.embedder(input_data)
		embedded_data = self.dropout(embedded_data)	
		batch_size = embedded_data.shape[1]
		hiddens, outputs = self.lstm(embedded_data, (h_hidden, c_hidden))	
		top_hidden = outputs[0].view(self.num_layers,self.directions,
                                 hiddens.shape[1],
                                 self.hidden_size)[self.num_layers-1]
		top_hidden = top_hidden.permute(1,2,0).contiguous().view(batch_size,-1, 1)

		prep_scores = self.score_learner(encoder_hiddens.permute(1,0,2))
		scores = torch.bmm(prep_scores, top_hidden)
		attn_scores = self.soft(scores)
		con_mat = torch.bmm(encoder_hiddens.permute(1,2,0),attn_scores)
		h_tilde = self.tanh(self.context_combiner(torch.cat((con_mat,
                                                         top_hidden),dim=1)
                                              .view(batch_size,-1)))
		pred = self.output(h_tilde)
		pred = self.log_soft(pred)

		
		return pred, outputs

In [11]:
'''training per batch'''

def train_batch(input_batch, target_batch, encoder, decoder, 
                encoder_optimizer, decoder_optimizer, loss_criterion):
	encoder_optimizer.zero_grad()
	decoder_optimizer.zero_grad()
	loss = 0
	enc_h_hidden, enc_c_hidden = encoder.create_init_hiddens(input_batch.shape[1])

	enc_hiddens, enc_outputs = encoder(input_batch, enc_h_hidden, enc_c_hidden)

	decoder_input = Variable(torch.LongTensor(1,input_batch.shape[1]).
                           fill_(output_lang.word_to_index.get("SOS")).cuda()) if use_cuda \
					else Variable(torch.LongTensor(1,input_batch.shape[1]).
                        fill_(output_lang.word_to_index.get("SOS")))

	dec_h_hidden = enc_outputs[0]
	dec_c_hidden = enc_outputs[1]
	
	for i in range(target_batch.shape[0]):
		pred, dec_outputs = decoder(decoder_input, dec_h_hidden, 
                                dec_c_hidden, enc_hiddens)

		decoder_input = target_batch[i].view(1,-1)
		dec_h_hidden = dec_outputs[0]
		dec_c_hidden = dec_outputs[1]
		
		loss += loss_criterion(pred,target_batch[i])


	loss.backward()

	torch.nn.utils.clip_grad_norm_(encoder.parameters(),args.clip)
	torch.nn.utils.clip_grad_norm_(decoder.parameters(),args.clip)

	encoder_optimizer.step()
	decoder_optimizer.step()

	return loss.item() / target_batch.shape[0]

In [12]:
''' training epochs'''
def train(train_batches, encoder, decoder, encoder_optimizer, decoder_optimizer, loss_criterion):

	round_loss = 0
	i = 1
	for batch in train_batches:
		i += 1
		(input_batch, target_batch) = pad_batch(batch)
		batch_loss = train_batch(input_batch, target_batch, encoder, decoder, encoder_optimizer, decoder_optimizer, loss_criterion)
		round_loss += batch_loss

	return round_loss / len(train_batches)

In [13]:
'''Evaluate'''

def test_batch(input_batch, target_batch, encoder, decoder, loss_criterion):
	
	loss = 0

	#create initial hidde state for encoder
	enc_h_hidden, enc_c_hidden = encoder.create_init_hiddens(input_batch.shape[1])

	enc_hiddens, enc_outputs = encoder(input_batch, enc_h_hidden, enc_c_hidden)

	decoder_input = Variable(torch.LongTensor(1,input_batch.shape[1]).
                           fill_(output_lang.word_to_index.get("SOS")).cuda()) if use_cuda \
					else Variable(torch.LongTensor(1,input_batch.shape[1]).
                        fill_(output_lang.word_to_index.get("SOS")))
	dec_h_hidden = enc_outputs[0]
	dec_c_hidden = enc_outputs[1]
	
	for i in range(target_batch.shape[0]):
		pred, dec_outputs = decoder(decoder_input, dec_h_hidden, dec_c_hidden, enc_hiddens)

		topv, topi = pred.topk(1,dim=1)
		ni = topi.view(1,-1)
		
		decoder_input = ni
		dec_h_hidden = dec_outputs[0]
		dec_c_hidden = dec_outputs[1]

		loss += loss_criterion(pred,target_batch[i])
		
	return loss.item() / target_batch.shape[0]

In [14]:
'''Loss overtest_batches'''

def test(test_batches, encoder, decoder, loss_criterion):

	with torch.no_grad():
		test_loss = 0

		for batch in test_batches:
			(input_batch, target_batch) = pad_batch(batch)
			batch_loss = test_batch(input_batch, target_batch, encoder, decoder, loss_criterion)
			test_loss += batch_loss

	return test_loss / len(test_batches)

In [15]:

def evaluate(encoder, decoder, sentence, cutoff_length):
	with torch.no_grad():
		input_variable = tensorFromSentence(input_lang, sentence)
		input_variable = input_variable.view(-1,1)
		enc_h_hidden, enc_c_hidden = encoder.create_init_hiddens(1)

		enc_hiddens, enc_outputs = encoder(input_variable, enc_h_hidden, enc_c_hidden)

		decoder_input = Variable(torch.LongTensor(1,1).fill_(output_lang.word_to_index.get("SOS")).cuda()) if use_cuda \
						else Variable(torch.LongTensor(1,1).fill_(output_lang.word_to_index.get("SOS")))
		dec_h_hidden = enc_outputs[0]
		dec_c_hidden = enc_outputs[1]

		decoded_words = []

		for di in range(cutoff_length):
			pred, dec_outputs = decoder(decoder_input, dec_h_hidden, dec_c_hidden, enc_hiddens)

			topv, topi = pred.topk(1,dim=1)
			ni = topi.item()
			if ni == output_lang.word_to_index.get("EOS"):
				decoded_words.append('<EOS>')
				break
			else:
				decoded_words.append(output_lang.index_to_word[ni])

			decoder_input = Variable(torch.LongTensor(1,1).fill_(ni).cuda()) if use_cuda \
							else Variable(torch.LongTensor(1,1).fill_(ni))
			dec_h_hidden = dec_outputs[0]
			dec_c_hidden = dec_outputs[1]

		output_sentence = ' '.join(decoded_words)
    
		return output_sentence

In [16]:
'''Evaluate: format
                  > input sentence
                  = correct translation
                  < predicted translation'''

def evaluate_randomly(encoder, decoder, pairs, n=2, trim=100):
	for i in range(n):
		pair = random.choice(pairs)
		print('>', pair[0])
		print('=', pair[1])
		output_sentence = evaluate(encoder, decoder, pair[0], cutoff_length=trim)
		print('<', output_sentence)
		print('')    
		if create_txt:
			f = open(print_to, 'a')
			f.write("\n \
				> %s \n \
				= %s \n \
				< %s \n" % (pair[0], pair[1], output_sentence))
			f.close()

In [17]:
'''Used to plot the progress of training. Plots the loss value vs. time'''
def showPlot(times, losses, fig_name):
    x_axis_label = 'Minutes'
    colors = ('red','blue')
    if max(times) >= 120:
    	times = [mins/60 for mins in times]
    	x_axis_label = 'Hours'
    i = 0
    for key, losses in losses.items():
    	if len(losses) > 0:
    		plt.plot(times, losses, label=key, color=colors[i])
    		i += 1
    plt.legend(loc='upper left')
    plt.xlabel(x_axis_label)
    plt.ylabel('Loss')
    plt.title('Training Results')
    plt.savefig(fig_name+'.png')
    plt.close('all')
    
'''prints the current memory consumption'''
def mem():
	if use_cuda:
		mem = torch.cuda.memory_allocated()/1e7
	else:
		mem = psutil.cpu_percent()
	print('Current mem usage:')
	print(mem)
	return "Current mem usage: %s \n" % (mem)

'''converts a time measurement in seconds to hours'''
def asHours(s):
	m = math.floor(s / 60)
	h = math.floor(m / 60)
	s -= m * 60
	m -= h * 60
	return '%dh %dm %ds' % (h, m, s)

In [26]:
def train_and_test(epochs, test_eval_every, plot_every, learning_rate, 
                   lr_schedule, train_pairs, test_pairs, input_lang, 
                   output_lang, batch_size, test_batch_size, encoder, decoder, 
                   loss_criterion, trim, save_weights):
	
	times = []
	losses = {'train set':[], 'test set': []}

	test_batches, longest_seq, n_o_b = batchify(test_pairs, input_lang, 
                                              output_lang, test_batch_size, 
                                              shuffle_data=False)

	start = time.time()
	for i in range(1,epochs+1):
    
		'''adjust the learning rate according to the learning rate schedule
		specified in lr_schedule'''
		if i in lr_schedule.keys():
			learning_rate /= lr_schedule.get(i)


		encoder.train()
		decoder.train()

		encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
		decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)

		batches, longest_seq, n_o_b = batchify(train_pairs, input_lang, 
                                           output_lang, batch_size, 
                                           shuffle_data=True)
		train_loss = train(batches, encoder, decoder, encoder_optimizer, 
                       decoder_optimizer, loss_criterion)
		
		now = time.time()
		print("Iter: %s \nLearning Rate: %s \nTime: %s \nTrain Loss: %s \n" 
          % (i, learning_rate, asHours(now-start), train_loss))

		if create_txt:
			with open(print_to, 'a') as f:
				f.write("Iter: %s \nLeaning Rate: %s \nTime: %s \nTrain Loss: %s \n" \
					% (i, learning_rate, asHours(now-start), train_loss))

		if i % test_eval_every == 0:
			if test_pairs:
				test_loss = test(test_batches, encoder, decoder, criterion)
				print("Test set loss: %s" % (test_loss))
				if create_txt:
					with open(print_to, 'a') as f:
						f.write("Test Loss: %s \n" % (test_loss))
				evaluate_randomly(encoder, decoder, test_pairs)
			else:
				evaluate_randomly(encoder, decoder, train_pairs)

		if i % plot_every == 0:
			times.append((time.time()-start)/60)
			losses['train set'].append(train_loss)
			if test_pairs:
				losses['test set'].append(test_loss)
			showPlot(times, losses, output_file_name)
			if save_weights:
				torch.save(encoder.state_dict(), output_file_name+'_enc_weights.pt')
				torch.save(decoder.state_dict(), output_file_name+'_dec_weights.pt')


In [24]:
'''Dataset en parametrs inladen'''
file_path = 'data/prefix-equation611.txt'
input_lang_name = 'equation'
output_lang_name = 'prefix'

"""name of your dataset"""
dataset = 'orig'

'''kan hier ook twee files inladen als je talen gescheiden zijn'''
raw_data_file_path = (file_path)

"""True als je richting van vertaling wilt omdraaien bijv eng-fra naar fra-eng"""
reverse=True

'''verwijder zinnen met hoeveel woorden? Hier niet echt van toepassing, maar voor Math word problem'''
trim = 100

max_vocab_size= 1000

"""start filter kan gebruikt worden om bijvoorbeeld alle paren te verwijderen met een bepaalde prefix,
   hier niet gespecificeerd (van pytorch tutorial)"""
start_filter = False

"""train/test split (procent training data)"""
perc_train_set = 0.8

In [20]:
"""OUTPUT OPTIONS"""

"""om hoeveeel epochs losse berekenen"""
test_eval_every = 1

"""hoe vaak plotten"""
plot_every = 1

"""txt file met output"""
create_txt = True

"""if true saves the encoder and decoder weights to seperate .pt files e"""
save_weights = True

In [21]:
#HYPERPARAMETERS

"""bidirectional LSTM of één richting, in geval van vertaling bidirectional"""
bidirectional = True
if bidirectional:
	directions = 2
else:
	directions = 1


layers = 2
hidden_size = 100
dropout = 0.2

batch_size = 32
test_batch_size = 32

epochs = 5
learning_rate= 0.1

"""Learning rate schedule. Met dit schema na 5 epochs learning rate gedeeld door 10"""
lr_schedule = {5:10}

criterion = nn.NLLLoss()

In [27]:
use_cuda = torch.cuda.is_available()


"""for plotting of the loss"""
plt.switch_backend('agg')

output_file_name = "testdata.%s_trim.%s_vocab.%s_directions.%s_layers.%s_hidden.%s_dropout.%s_learningrate.%s_batch.%s_epochs.%s" % (dataset,trim,max_vocab_size,directions,layers,hidden_size,dropout,learning_rate,batch_size,epochs)

if create_txt:
	print_to = output_file_name+'.txt'
	with open(print_to, 'w+') as f:
		f.write("Starting Training \n")
else:
	print_to = None

input_lang, output_lang, train_pairs, test_pairs = prepareData(
    input_lang_name, output_lang_name, raw_data_file_path, 
    max_vocab_size=max_vocab_size, reverse=reverse, trim=trim, 
    start_filter=start_filter, perc_train_set=perc_train_set, print_to=print_to)
print('Train Pairs #')
print(len(train_pairs))


"""for gradient clipping from 
https://github.com/pytorch/examples/blob/master/word_language_model/main.py"""
parser = argparse.ArgumentParser(description='PyTorch Wikitext-2 RNN/LSTM Language Model')
parser.add_argument('--clip', type=float, default=0.25,
                    help='gradient clipping')
args = parser.parse_args()

mem()

if create_txt:
	with open(print_to, 'a') as f:
		f.write("\nRandom Train Pair: %s \n\nRandom Test Pair: %s \n\n" 
            % (random.choice(train_pairs),random.choice(test_pairs) 
               if test_pairs else "None"))
		f.write(mem())


"""create the Encoder"""
encoder = EncoderRNN(input_lang.vocab_size, hidden_size, layers=layers, 
                     dropout=dropout, bidirectional=bidirectional)

"""create the Decoder"""
decoder = DecoderAttn(hidden_size, output_lang.vocab_size, layers=layers, 
                      dropout=dropout, bidirectional=bidirectional)

print('Encoder and Decoder Created')
mem()

if use_cuda:
	print('Cuda being used')
	encoder = encoder.cuda()
	decoder = decoder.cuda()

print('Number of epochs: '+str(epochs))

if create_txt:
	with open(print_to, 'a') as f:
		f.write('Encoder and Decoder Created\n')
		f.write(mem())
		f.write("Number of epochs %s \n" % (epochs))

train_and_test(epochs, test_eval_every, plot_every, learning_rate, lr_schedule, 
               train_pairs, test_pairs, input_lang, output_lang, batch_size, 
               test_batch_size, encoder, decoder, criterion, trim, save_weights)

Reading lines...
Read 578 sentence pairs
Counting words...
Train pairs: 463
Test pairs: 115
Counted Words -> Trimmed Vocabulary Sizes (w/ EOS and SOS tags):
prefix, 65 -> 68
equation, 29 -> 32
('how many is 9 * 6 times 1?', '* (* 9 6) 1')
Train Pairs #
463
Current mem usage:
44.0
Current mem usage:
0.0
Encoder and Decoder Created
Current mem usage:
100.0
Number of epochs: 5
Current mem usage:
0.0
Iter: 1 
Learning Rate: 0.1 
Time: 0h 0m 3s 
Train Loss: 3.4224116915748235 

Test set loss: 3.3805842929416237
> i want to calculate nine / 6 divided by ten?
= / (/ 9 6) 10
< <EOS>

> how many is five divided by seven multiplied by 1?
= * (/ 5 7) 1
< <EOS>

Iter: 2 
Learning Rate: 0.1 
Time: 0h 0m 7s 
Train Loss: 3.3483253660656156 

Test set loss: 3.3007528516981335
> what is nine times 10 - seven?
= - (* 9 10) 7
< <EOS>

> i would like to know what is two plus seven + ten?
= + (+ 2 7) 10
< <EOS>

Iter: 3 
Learning Rate: 0.1 
Time: 0h 0m 13s 
Train Loss: 3.2744802066258023 

Test set loss: 3

In [None]:
outside_sent = "what is one plus two times three"
outside_sent = normalizeString(outside_sent)
evaluate(encoder, decoder, outside_sent, cutoff_length=10)