In [1]:
import pandas as pd
import spacy
pd.set_option('display.max_colwidth', None)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Read the data
data = pd.read_csv('../data/libras_dictionary.csv')

In [6]:
# Print the first 5 rows of the data
data.head()

Unnamed: 0,word,subject,interpretation,example_portuguese_sentence,example_libras_sentence,grammar_class,word_origin,video_link,image_link,hand_image_link
0,A,NENHUM,Primeira letra do alfabeto da língua portuguesa; sinal gráfico elementar com que se representam os vocábulos na língua escrita.,Invente qualquer palavra que comece com a letra A.,VOCÊ INVENTAR QUALQUER PALAVRA COMEÇAR A.,SUBSTANTIVO,Nacional,https://www.ines.gov.br/dicionario-de-libras/public/media/palavras/videos/aSm_Prog001.mp4,https://www.ines.gov.br/dicionario-de-libras/public/media/palavras/images/aSm_Prog001.jpg,https://www.ines.gov.br/dicionario-de-libras/public/media/mao/cg01.jpg
1,ABACATE,FRUTA,"O fruto do abacateiro. Comestível, tem a polpa amarelada e macia. É consumido puro, com açúcar, em pratos salgados ou em vitaminas.",Você gosta de abacate com leite?,VOCÊ GOSTAR ABACATE LEITE JUNTO?,SUBSTANTIVO,Nacional,https://www.ines.gov.br/dicionario-de-libras/public/media/palavras/videos/abacateSm_Prog001.mp4,https://www.ines.gov.br/dicionario-de-libras/public/media/palavras/images/abacateSm_Prog001.jpg,https://www.ines.gov.br/dicionario-de-libras/public/media/mao/cg53a.jpg
2,ABACAXI,FRUTA,"Fruta de casca grossa e áspera. Sua polpa pode ser consumida pura, em forma de sucos, doces e sorvetes.","Hoje tomei suco de abacaxi, ele estava ácido.",HOJE S-U-C-O ABACAXI BEBER ÁCID@.,SUBSTANTIVO,Nacional,https://www.ines.gov.br/dicionario-de-libras/public/media/palavras/videos/abacaxiSm_Prog001.mp4,https://www.ines.gov.br/dicionario-de-libras/public/media/palavras/images/abacaxiSm_Prog001.jpg,https://www.ines.gov.br/dicionario-de-libras/public/media/mao/cg47.jpg
3,ABAFAR,NENHUM,"Cobrir ou fechar, para manter o calor.","Se você quer abafar seu quarto, é melhor fechar tudo.",S-I VOCÊ QUERER QUARTO SE@ ABAFAR A-R? MELHOR FECHAR-TUDO.,VERBO,Nacional,https://www.ines.gov.br/dicionario-de-libras/public/media/palavras/videos/abafarSm_Prog001.mp4,https://www.ines.gov.br/dicionario-de-libras/public/media/palavras/images/abafarSm_Prog001.jpg,https://www.ines.gov.br/dicionario-de-libras/public/media/mao/cg07.jpg
4,ABAIXO,NENHUM,"Lugar, posição ou situação inferior, em relação a outros de nível mais elevado.","Não é no primeiro apartamento abaixo, é no segundo.",APARTAMENTO PRIMEIR@ NÃO SEGUND@ ABAIXO.,ADV.,Nacional,https://www.ines.gov.br/dicionario-de-libras/public/media/palavras/videos/abaixoSm_Prog001.mp4,https://www.ines.gov.br/dicionario-de-libras/public/media/palavras/images/abaixoSm_Prog001.jpg,https://www.ines.gov.br/dicionario-de-libras/public/media/mao/cg62.jpg


In [15]:
data[['example_portuguese_sentence', 'example_libras_sentence']].head()

Unnamed: 0,example_portuguese_sentence,example_libras_sentence
0,Invente qualquer palavra que comece com a letra A.,VOCÊ INVENTAR QUALQUER PALAVRA COMEÇAR A.
1,Você gosta de abacate com leite?,VOCÊ GOSTAR ABACATE LEITE JUNTO?
2,"Hoje tomei suco de abacaxi, ele estava ácido.",HOJE S-U-C-O ABACAXI BEBER ÁCID@.
3,"Se você quer abafar seu quarto, é melhor fechar tudo.",S-I VOCÊ QUERER QUARTO SE@ ABAFAR A-R? MELHOR FECHAR-TUDO.
4,"Não é no primeiro apartamento abaixo, é no segundo.",APARTAMENTO PRIMEIR@ NÃO SEGUND@ ABAIXO.


In [44]:
# Remove characters that are not letters from the sentences in Libras
data['example_libras_sentence'] = data['example_libras_sentence'].str.replace("@", "o")
data['example_libras_sentence'] = data['example_libras_sentence'].str.replace("-", "")
data['example_libras_sentence'] = data['example_libras_sentence'].str.lower()
data['example_portuguese_sentence'] = data['example_portuguese_sentence'].str.lower()

In [45]:
data[['example_portuguese_sentence', 'example_libras_sentence']].head(10)

Unnamed: 0,example_portuguese_sentence,example_libras_sentence
0,invente qualquer palavra que comece com a letra a.,você inventar qualquer palavra começar a.
1,você gosta de abacate com leite?,você gostar abacate leite junto?
2,"hoje tomei suco de abacaxi, ele estava ácido.",hoje suco abacaxi beber ácido.
3,"se você quer abafar seu quarto, é melhor fechar tudo.",si você querer quarto seo abafar ar? melhor fechartudo.
4,"não é no primeiro apartamento abaixo, é no segundo.",apartamento primeiro não segundo abaixo.
5,os surdos fizeram um abaixo-assinado pedindo mais empregos ao governo.,surdo abaixoassinado pedir governo emprego mais.
6,a lâmpada do abajur queimou.,coisacônicalâmpada lâmpadaqueimar .
7,está vendo aquela velha se abanando? ela é avó da minha amiga.,2solhar3s velho abanarleque lá(me) vovo amigo(md).
8,o carro velho foi abandonado naquela garagem.,aquelo caragem carro velho abandonar.
9,coitada dessa criança tão bonita dormindo na rua. como seus pais tiveram coragem de abandoná-la?!,criança bonito dormir rua coitado! pai mãe delo abandonar coragem?!


In [46]:
data[['example_portuguese_sentence', 'example_libras_sentence']].to_csv('../data/libras_dictionary_cleaned.csv', index=False)

In [44]:
# Load data with manual corrections
data_cleaned = pd.read_csv('../data/libras_dictionary_cleaned.csv')

In [45]:
data_cleaned.head()

Unnamed: 0,example_portuguese_sentence,example_libras_sentence
0,invente qualquer palavra que comece com a letra a.,você inventar qualquer palavra começar a.
1,você gosta de abacate com leite?,você gostar abacate leite junto?
2,"hoje tomei suco de abacaxi, ele estava ácido.",hoje suco abacaxi beber ácido.
3,"se você quer abafar seu quarto, é melhor fechar tudo.",si você querer quarto seo abafar ar? melhor fechartudo.
4,"não é no primeiro apartamento abaixo, é no segundo.",apartamento primeiro não segundo abaixo.


In [2]:
import torch
import torch.nn as nn
import torch.optim as optmi

from torchtext.data import Field, BucketIterator, TabularDataset
from torch.utils.data import random_split, TensorDataset, DataLoader, Dataset
import numpy as np
import pandas as pd
import spacy
import random
from torchtext.data.metrics import bleu_score
from pprint import pprint
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary


from transformers import AutoTokenizer  # Or BertTokenizer
from transformers import AutoModelForPreTraining  # Or BertForPreTraining for loading pretraining heads
from transformers import AutoModel  # or BertModel, for BERT without pretraining heads
from torch.nn import (TransformerEncoder, TransformerEncoderLayer, TransformerDecoder, TransformerDecoderLayer)
import spacy

# Seeding for reproducible results everytime
SEED = 777

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [47]:
model_bert = AutoModelForPreTraining.from_pretrained('neuralmind/bert-large-portuguese-cased')
tokenizer = AutoTokenizer.from_pretrained('neuralmind/bert-large-portuguese-cased', do_lower_case=False)

### Data Preparation and Preprocessing

Let's see some of the process it can do,

* Train/ Valid/ Test Split: partition your data into a specified train/ valid/ test set.

* File Loading: load the text corpus of various formats (.txt,.json,.csv).

* Tokenization: breaking sentences into list of words.

* Vocab: Generate a list of vocabulary from the text corpus.

* Words to Integer Mapper: Map words into integer numbers for the entire corpus and vice versa.

* Word Vector: Convert a word from higher dimension to lower dimension (Word Embedding).

* Batching: Generate batches of sample.

In [3]:
# spacy portuguese tokenizer
spacy_pt = spacy.load('pt_core_news_sm')

In [4]:
import re

def clean_text(text):

    # Remove characters that are not letters
    text = re.sub(r"[^a-zA-ZÀ-ú.!?]+", ' ', text)
    
    # Remove extra spaces
    text = re.sub(r" +", ' ', text)

    # Remove extra spaces at the beginning and end of the sentence
    text = text.strip()

    # Convert to lower case
    text = text.lower()

    return text

In [5]:
def spacy_tokenizer(text):

    text = clean_text(text)

    return [tok.text for tok in spacy_pt.tokenizer(text)]

In [48]:
libras_sentences_source = Field(
    tokenize = spacy_tokenizer,
    lower=True,
    init_token="<sos>",
    eos_token="<eos>",
)

portuguese_sentences_target = Field(
    tokenize = spacy_tokenizer,
    lower=True,
    init_token="<sos>",
    eos_token="<eos>",
)

In [49]:
dataset = TabularDataset(path='../data/data_svo.csv', format='csv', fields=[('trg', portuguese_sentences_target), ('src', libras_sentences_source)], skip_header=False)

In [50]:
train_data, val_data, test_data = dataset.split(split_ratio=[0.8, 0.1, 0.1], random_state=random.seed(SEED))

In [51]:
print(f"Number of training examples: {len(train_data.examples)}")
print(f"Number of validation examples: {len(val_data.examples)}")
print(f"Number of testing examples: {len(test_data.examples)}")

Number of training examples: 43666
Number of validation examples: 5458
Number of testing examples: 5458


In [52]:
print("Tain sentence example - Libras ", train_data.examples[0].src)
print("Tain sentence example - Portuguese ", train_data.examples[0].trg)

Tain sentence example - Libras  ['nós', 'serer', 'voz', 'pessoa']
Tain sentence example - Portuguese  ['nós', 'seremos', 'a', 'voz', 'destas', 'pessoas', 'sem', 'voz', '.']


In [53]:
libras_sentences_source.build_vocab(train_data, min_freq=2)
portuguese_sentences_target.build_vocab(train_data, min_freq=2)

In [54]:
print(f"Unique tokens in source (libras) vocabulary: {len(libras_sentences_source.vocab)}")
print(f"Unique tokens in target (portuguse) vocabulary: {len(portuguese_sentences_target.vocab)}")

Unique tokens in source (libras) vocabulary: 8504
Unique tokens in target (portuguse) vocabulary: 12778


In [55]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [56]:
BATCH_SIZE = 128

train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, val_data, test_data), 
    batch_size = BATCH_SIZE, 
    device = device,
    sort_within_batch=True,
    sort_key=lambda x: len(x.src),
    shuffle=True
)

In [57]:
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(input_dim, emb_dim)
        
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, src):
        
        #src = [src len, batch size]
        
        embedded = self.dropout(self.embedding(src))
        
        #embedded = [src len, batch size, emb dim]
        
        outputs, (hidden, cell) = self.rnn(embedded)
        
        #outputs = [src len, batch size, hid dim * n directions]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #outputs are always from the top hidden layer
        
        return hidden, cell

In [58]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        
        self.output_dim = output_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(output_dim, emb_dim)
        
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
        
        self.fc_out = nn.Linear(hid_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, input, hidden, cell):
        
        #input = [batch size]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #n directions in the decoder will both always be 1, therefore:
        #hidden = [n layers, batch size, hid dim]
        #context = [n layers, batch size, hid dim]
        
        input = input.unsqueeze(0)
        
        #input = [1, batch size]
        
        embedded = self.dropout(self.embedding(input))
        
        #embedded = [1, batch size, emb dim]
                
        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        
        #output = [seq len, batch size, hid dim * n directions]
        #hidden = [n layers * n directions, batch size, hid dim]
        #cell = [n layers * n directions, batch size, hid dim]
        
        #seq len and n directions will always be 1 in the decoder, therefore:
        #output = [1, batch size, hid dim]
        #hidden = [n layers, batch size, hid dim]
        #cell = [n layers, batch size, hid dim]
        
        prediction = self.fc_out(output.squeeze(0))
        
        #prediction = [batch size, output dim]
        
        return prediction, hidden, cell

In [59]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        
        assert encoder.hid_dim == decoder.hid_dim, \
            "Hidden dimensions of encoder and decoder must be equal!"
        assert encoder.n_layers == decoder.n_layers, \
            "Encoder and decoder must have equal number of layers!"
        
    def forward(self, src, trg, teacher_forcing_ratio = 0.5):
        
        #src = [src len, batch size]
        #trg = [trg len, batch size]
        #teacher_forcing_ratio is probability to use teacher forcing
        #e.g. if teacher_forcing_ratio is 0.75 we use ground-truth inputs 75% of the time
        
        batch_size = trg.shape[1]
        trg_len = trg.shape[0]
        trg_vocab_size = self.decoder.output_dim
        
        #tensor to store decoder outputs
        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)
        
        #last hidden state of the encoder is used as the initial hidden state of the decoder
        hidden, cell = self.encoder(src)
        
        #first input to the decoder is the <sos> tokens
        input = trg[0,:]
        
        for t in range(1, trg_len):
            
            #insert input token embedding, previous hidden and previous cell states
            #receive output tensor (predictions) and new hidden and cell states
            output, hidden, cell = self.decoder(input, hidden, cell)
            
            #place predictions in a tensor holding predictions for each token
            outputs[t] = output
            
            #decide if we are going to use teacher forcing or not
            teacher_force = random.random() < teacher_forcing_ratio
            
            #get the highest predicted token from our predictions
            top1 = output.argmax(1) 
            
            #if teacher forcing, use actual next token as next input
            #if not, use predicted token
            input = trg[t] if teacher_force else top1
        
        return outputs

In [60]:
for batch in train_iterator:
    print(batch.src.shape)
    print(batch.trg.shape)

torch.Size([7, 128])
torch.Size([14, 128])
torch.Size([8, 128])
torch.Size([17, 128])
torch.Size([10, 128])
torch.Size([18, 128])
torch.Size([6, 128])
torch.Size([14, 128])
torch.Size([6, 128])
torch.Size([14, 128])
torch.Size([5, 128])
torch.Size([13, 128])
torch.Size([8, 128])
torch.Size([16, 128])
torch.Size([8, 128])
torch.Size([16, 128])
torch.Size([6, 128])
torch.Size([13, 128])
torch.Size([8, 128])
torch.Size([16, 128])
torch.Size([7, 128])
torch.Size([15, 128])
torch.Size([7, 128])
torch.Size([16, 128])
torch.Size([8, 128])
torch.Size([14, 128])
torch.Size([7, 128])
torch.Size([15, 128])
torch.Size([7, 128])
torch.Size([14, 128])
torch.Size([7, 128])
torch.Size([14, 128])
torch.Size([9, 128])
torch.Size([20, 128])
torch.Size([7, 128])
torch.Size([15, 128])
torch.Size([6, 128])
torch.Size([15, 128])
torch.Size([6, 128])
torch.Size([15, 128])
torch.Size([7, 128])
torch.Size([15, 128])
torch.Size([8, 128])
torch.Size([15, 128])
torch.Size([9, 128])
torch.Size([16, 128])
torch.Size

In [61]:
INPUT_DIM = len(libras_sentences_source.vocab)
OUTPUT_DIM = len(portuguese_sentences_target.vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

model = Seq2Seq(enc, dec, device).to(device)

In [62]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
model.apply(init_weights)

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(8504, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder(
    (embedding): Embedding(12778, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)
    (fc_out): Linear(in_features=512, out_features=12778, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [63]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 19,359,722 trainable parameters


In [64]:
optimizer = optmi.Adam(model.parameters(), lr=0.001)

In [65]:
TRG_PAD_IDX = portuguese_sentences_target.vocab.stoi[portuguese_sentences_target.pad_token]

criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

In [66]:
def train(model, iterator, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    
    for i, batch in enumerate(iterator):
        
        src = batch.src
        trg = batch.trg
        
        optimizer.zero_grad()
        
        output = model(src, trg)
        
        #trg = [trg len, batch size]
        #output = [trg len, batch size, output dim]
        
        output_dim = output.shape[-1]
        
        output = output[1:].view(-1, output_dim)
        trg = trg[1:].view(-1)
        
        #trg = [(trg len - 1) * batch size]
        #output = [(trg len - 1) * batch size, output dim]
        
        loss = criterion(output, trg)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [67]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):

            src = batch.src
            trg = batch.trg

            output = model(src, trg, 0) #turn off teacher forcing

            #trg = [trg len, batch size]
            #output = [trg len, batch size, output dim]

            output_dim = output.shape[-1]
            
            output = output[1:].view(-1, output_dim)
            trg = trg[1:].view(-1)

            #trg = [(trg len - 1) * batch size]
            #output = [(trg len - 1) * batch size, output dim]

            loss = criterion(output, trg)
            
            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [68]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [69]:
import math
import time


N_EPOCHS = 2
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'slt_libras_v2.pt')
    
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

Epoch: 01 | Time: 0m 32s
	Train Loss: 5.663 | Train PPL: 287.909
	 Val. Loss: 5.356 |  Val. PPL: 211.838
Epoch: 02 | Time: 0m 32s
	Train Loss: 5.245 | Train PPL: 189.606
	 Val. Loss: 5.117 |  Val. PPL: 166.808


In [70]:
model.load_state_dict(torch.load('slt_libras_v2.pt'))

test_loss = evaluate(model, test_iterator, criterion)

In [71]:
test_loss

5.126490005227023

In [72]:
print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')

| Test Loss: 5.126 | Test PPL: 168.425 |


In [73]:
def translate_sentence(model, sentence, libras, portuguese, device, max_length=50):

    if type(sentence) == str:
        tokens = tokenizer.tokenize(sentence)
    else:
        tokens = [token.lower() for token in sentence]
    tokens.insert(0, libras.init_token)
    tokens.append(libras.eos_token)
    text_to_indices = [libras.vocab.stoi[token] for token in tokens]
    sentence_tensor = torch.LongTensor(text_to_indices).unsqueeze(1).to(device)

    # Build encoder hidden, cell state
    with torch.no_grad():
        hidden, cell = model.encoder(sentence_tensor)

    outputs = [portuguese.vocab.stoi["<sos>"]]

    for _ in range(max_length):
        previous_word = torch.LongTensor([outputs[-1]]).to(device)

        with torch.no_grad():
            output, hidden, cell = model.decoder(previous_word, hidden, cell)
            best_guess = output.argmax(1).item()

        outputs.append(best_guess)

        # Model predicts it's the end of the sentence
        if output.argmax(1).item() == portuguese.vocab.stoi["<eos>"]:
            break

    translated_sentence = [portuguese.vocab.itos[idx] for idx in outputs]
    return translated_sentence[1:]

In [74]:
model.eval()

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(8504, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder(
    (embedding): Embedding(12778, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)
    (fc_out): Linear(in_features=512, out_features=12778, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [76]:
print("Test sentence: ", test_data[150].src)
print("Correct translation: ", test_data[150].trg)

Test sentence:  ['ele', 'tipo', 'aprender', 'lição']
Correct translation:  ['que', 'tipo', 'de', 'lições', 'ela', 'vai', 'aprender', 'com', 'eles', '.']


In [77]:
test_sentence = ["eu férias julho planejar viajar europa."]
correct_sentence = ["eu tenho férias em julho e vou planejar uma viagem `a europa."]

In [78]:
print("Translated sentence: ", translate_sentence(model, test_sentence, libras_sentences_source, portuguese_sentences_target, device))

Translated sentence:  ['e', 'é', 'o', 'de', 'de', 'de', '.', '.', '<eos>']


In [85]:
example_idx = 1496
example = val_data.examples[example_idx]
print('source sentence: ', ' '.join(example.src))
print('target sentence: ', ' '.join(example.trg))

src_tensor = libras_sentences_source.process([example.src]).to(device)
trg_tensor = portuguese_sentences_target.process([example.trg]).to(device)
print(trg_tensor.shape)

model.eval()
with torch.no_grad():
    outputs = model(src_tensor, trg_tensor, teacher_forcing_ratio=0.1)

print(f"Shape: {outputs.shape}")

output_idx = outputs[1:].squeeze(1).argmax(1)
' '.join([portuguese_sentences_target.vocab.itos[idx] for idx in output_idx])



source sentence:  tornar ponto virada também
target sentence:  e se tornou um ponto de virada para mim também .
torch.Size([13, 1])
Shape: torch.Size([13, 1, 12778])


'e é o que é a de . . <eos> <eos> <eos>'