# English to Portuguese Machine Translation with Attention 

In [151]:

import os
import numpy as np
import tensorflow as tf
from collections import Counter
import tensorflow_text as tf_text
import pathlib


# 1) Preprocessing sentences (Load, Vectorization, Train-Val split)

In [152]:
path_to_file = pathlib.Path("por-eng/por.txt")

def load_data(path):
    text = path.read_text(encoding="utf-8")

    lines = text.splitlines()
    pairs = [line.split("\t") for line in lines]

    context = np.array([context for target, context, _ in pairs])
    target = np.array([target for target, context, _ in pairs])

    return context, target


portuguese_sentences, english_sentences = load_data(path_to_file)


In [153]:
BUFFER_SIZE = len(english_sentences)
BATCH_SIZE = 64

is_train = np.random.uniform(size=(len(portuguese_sentences),)) < 0.8

train_raw = (
    tf.data.Dataset.from_tensor_slices(
        (english_sentences[is_train], portuguese_sentences[is_train]) 
    )
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE)
)
val_raw = (
    tf.data.Dataset.from_tensor_slices(
        (english_sentences[~is_train], portuguese_sentences[~is_train])
    )
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE)
)


In [154]:
def tf_lower_and_split_punct(text):
    text = tf_text.normalize_utf8(text, "NFKD")
    text = tf.strings.lower(text)
    text = tf.strings.regex_replace(text, "[^ a-z.?!,¿]", "")
    text = tf.strings.regex_replace(text, "[.?!,¿]", r" \0 ")
    text = tf.strings.strip(text)
    text = tf.strings.join(["[SOS]", text, "[EOS]"], separator=" ")
    return text


In [155]:

max_vocab_size = 12000

english_vectorizer = tf.keras.layers.TextVectorization(
    standardize=tf_lower_and_split_punct, max_tokens=max_vocab_size, ragged=True
)

english_vectorizer.adapt(train_raw.map(lambda context, target: context))

portuguese_vectorizer = tf.keras.layers.TextVectorization(
    standardize=tf_lower_and_split_punct, max_tokens=max_vocab_size, ragged=True
)

portuguese_vectorizer.adapt(train_raw.map(lambda context, target: target))


2024-07-28 13:57:53.838359: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [151967]
	 [[{{node Placeholder/_0}}]]
2024-07-28 13:57:53.838797: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [151967]
	 [[{{node Placeholder/_1}}]]
2024-07-28 13:57:56.010854: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_1' with dtype string and shape [

In [156]:

print(f"First 10 words of the english vocabulary:\n\n{english_vectorizer.get_vocabulary()[:10]}\n")
print(f"First 10 words of the portuguese vocabulary:\n\n{portuguese_vectorizer.get_vocabulary()[:10]}")


First 10 words of the english vocabulary:

['', '[UNK]', '[SOS]', '[EOS]', '.', 'tom', 'i', 'to', 'you', 'the']

First 10 words of the portuguese vocabulary:

['', '[UNK]', '[SOS]', '[EOS]', '.', 'tom', 'que', 'o', 'nao', 'eu']


In [157]:
# Size of the vocabulary
vocab_size_por = portuguese_vectorizer.vocabulary_size()
vocab_size_eng = english_vectorizer.vocabulary_size()

print(f"Portuguese vocabulary is made up of {vocab_size_por} words")
print(f"English vocabulary is made up of {vocab_size_eng} words")

Portuguese vocabulary is made up of 12000 words
English vocabulary is made up of 12000 words


In [158]:
# This helps you convert from words to ids
word_to_id = tf.keras.layers.StringLookup(
    vocabulary=portuguese_vectorizer.get_vocabulary(), 
    mask_token="", 
    oov_token="[UNK]"
)

# This helps you convert from ids to words
id_to_word = tf.keras.layers.StringLookup(
    vocabulary=portuguese_vectorizer.get_vocabulary(),
    mask_token="",
    oov_token="[UNK]",
    invert=True,
)

In [159]:
unk_id = word_to_id("[UNK]")
sos_id = word_to_id("[SOS]")
eos_id = word_to_id("[EOS]")
baunilha_id = word_to_id("baunilha")

print(f"The id for the [UNK] token is {unk_id}")
print(f"The id for the [SOS] token is {sos_id}")
print(f"The id for the [EOS] token is {eos_id}")
print(f"The id for baunilha (vanilla) is {baunilha_id}")

The id for the [UNK] token is 1
The id for the [SOS] token is 2
The id for the [EOS] token is 3
The id for baunilha (vanilla) is 6276


In [160]:
def process_text(context, target):
    context = english_vectorizer(context).to_tensor()
    target = portuguese_vectorizer(target)
    targ_in = target[:, :-1].to_tensor()
    targ_out = target[:, 1:].to_tensor()
    return (context, targ_in), targ_out


train_data = train_raw.map(process_text, tf.data.AUTOTUNE)
val_data = val_raw.map(process_text, tf.data.AUTOTUNE)


In [161]:

for (to_translate, sr_translation), translation in train_data.take(2):
    print(to_translate[0])
    print(translation[0])
    print(sr_translation[0])
    
    print(to_translate.shape)
    print(translation.shape)
    print(sr_translation.shape)
    print('\n')


2024-07-28 13:58:02.015265: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_13' with dtype string
	 [[{{node Placeholder/_13}}]]
2024-07-28 13:58:02.015897: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_18' with dtype int64
	 [[{{node Placeholder/_18}}]]


tf.Tensor(
[  2   9 219  30 576  11 115  23 946   4   3   0   0   0   0   0   0   0
   0], shape=(19,), dtype=int64)
tf.Tensor(
[ 38 229  71 156  28 890   4   3   0   0   0   0   0   0   0   0   0   0
   0   0], shape=(20,), dtype=int64)
tf.Tensor(
[  2  38 229  71 156  28 890   4   0   0   0   0   0   0   0   0   0   0
   0   0], shape=(20,), dtype=int64)
(64, 19)
(64, 20)
(64, 20)


tf.Tensor(
[   2   44  258   12 2405   96   11 3954    4    3    0    0    0    0
    0    0    0    0], shape=(18,), dtype=int64)
tf.Tensor(
[  12   56  194   15 6585   21   16 3131    4    3    0    0    0    0
    0    0    0    0    0], shape=(19,), dtype=int64)
tf.Tensor(
[   2   12   56  194   15 6585   21   16 3131    4    0    0    0    0
    0    0    0    0    0], shape=(19,), dtype=int64)
(64, 18)
(64, 19)
(64, 19)




# 2) Encoder, Self-attention Decoder

In [162]:
VOCAB_SIZE = 12000
UNITS = 256


In [163]:
class Encoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, units):
        super(Encoder, self).__init__()

        self.embedding = tf.keras.layers.Embedding(input_dim=vocab_size , output_dim=units , mask_zero=True)  
        self.rnn = tf.keras.layers.Bidirectional(merge_mode="sum" , layer=tf.keras.layers.LSTM(units=units , return_sequences=True),)  


    def call(self, context):
        x = self.embedding(context)
        x = self.rnn(x)
        
        return x



In [164]:

# class Encoder(tf.keras.layers.Layer):
#     def __init__(self, vocab_size, units):
#         super(Encoder, self).__init__()
#         self.embedding = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=units, mask_zero=True)
#         self.rnn = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units, return_sequences=True, return_state=True))

#     def call(self, inputs):
#         x = self.embedding(inputs)
#         outputs, forward_h, forward_c, backward_h, backward_c = self.rnn(x)
#         state_h = tf.concat([forward_h, backward_h], axis=-1)
#         state_c = tf.concat([forward_c, backward_c], axis=-1)
#         return outputs, state_h, state_c


In [165]:

class CrossAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super().__init__()
        self.mha = tf.keras.layers.MultiHeadAttention(key_dim=units, num_heads=1)
        self.layernorm = tf.keras.layers.LayerNormalization()
        self.add = tf.keras.layers.Add()

    def call(self, context, target):
        attn_output = self.mha(query=target, value=context)
        x = self.add([target, attn_output])
        x = self.layernorm(x)
        return x


In [166]:

# Create an instance of your class
encoder = Encoder(VOCAB_SIZE, UNITS)

# Pass a batch of sentences to translate from english to portuguese
encoder_output = encoder(to_translate)

print(f'Tensor of sentences in english has shape: {to_translate.shape}\n')
print(f'Encoder output has shape: {encoder_output.shape}')


Tensor of sentences in english has shape: (64, 18)

Encoder output has shape: (64, 18, 256)


In [167]:

# Create an instance of your class
attention_layer = CrossAttention(UNITS)

# The attention layer expects the embedded sr-translation and the context
# The context (encoder_output) is already embedded so you need to do this for sr_translation:
sr_translation_embed = tf.keras.layers.Embedding(VOCAB_SIZE, output_dim=UNITS, mask_zero=True)(sr_translation)

# Compute the cross attention
attention_result = attention_layer(encoder_output, sr_translation_embed)

print(f'Tensor of contexts has shape: {encoder_output.shape}')
print(f'Tensor of translations has shape: {sr_translation_embed.shape}')
print(f'Tensor of attention scores has shape: {attention_result.shape}')


Tensor of contexts has shape: (64, 18, 256)
Tensor of translations has shape: (64, 19, 256)
Tensor of attention scores has shape: (64, 19, 256)


In [93]:

# encoder_lstm = tf.keras.layers.LSTM(
#     units=256,
#     return_sequences=True,
#     return_state=True
# )

# encoder_output_sequences, encoder_final_hidden_state, encoder_final_cell_state = encoder_lstm(encoder_inputs)


In [None]:

# decoder_lstm = tf.keras.layers.LSTM(
#     units=256,
#     return_sequences=True,
#     return_state=True
# )

# decoder_output_sequences, _, _ = decoder_lstm(decoder_inputs, initial_state=[encoder_final_hidden_state, encoder_final_cell_state])


In [168]:

class Decoder(tf.keras.layers.Layer):
    def __init__(self, vocab_size, units):
        super(Decoder, self).__init__()
        self.embedding = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=units, mask_zero=True)
        self.pre_attention_rnn = tf.keras.layers.LSTM(units=units, return_sequences=True, return_state=True)
        self.attention = CrossAttention(units)
        self.post_attention_rnn = tf.keras.layers.LSTM(units=units, return_sequences=True)
        self.output_layer = tf.keras.layers.Dense(units=vocab_size, activation=tf.nn.log_softmax)

    def call(self, context, target, state=None, return_state=False):
        x = self.embedding(target)
        x, hidden_state, cell_state = self.pre_attention_rnn(x, initial_state=state)
        x = self.attention(context, x)
        x = self.post_attention_rnn(x)
        logits = self.output_layer(x)
        if return_state:
            return logits, [hidden_state, cell_state]
        return logits


# 3) Put together Encoder-Decoder into Translator model

In [169]:

class Translator(tf.keras.Model):
    def __init__(self, vocab_size, units):
        super().__init__()
        
        self.encoder = Encoder(vocab_size, units)
        self.decoder = Decoder(vocab_size, units)

    def call(self, inputs): # inputs (tuple(tf.Tensor, tf.Tensor)): Tuple containing the context (sentence to translate) and the target (shifted-to-the-right translation)
        
        context, target = inputs
        encoded_context = self.encoder(context)
        
        logits = self.decoder(encoded_context, target)

        return logits


In [170]:

# Create an instance of your class
translator = Translator(VOCAB_SIZE, UNITS)

# Compute the logits for every word in the vocabulary
logits = translator((to_translate, sr_translation))

print(f'Tensor of sentences to translate has shape: {to_translate.shape}')
print(f'Tensor of right-shifted translations has shape: {sr_translation.shape}')
print(f'Tensor of logits has shape: {logits.shape}')

Tensor of sentences to translate has shape: (64, 18)
Tensor of right-shifted translations has shape: (64, 19)
Tensor of logits has shape: (64, 19, 12000)


# 4) Training

In [172]:

def masked_loss(y_true, y_pred):
    
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
    loss = loss_fn(y_true, y_pred)
    
    # Check which elements of y_true are padding
    mask = tf.cast(y_true != 0, loss.dtype)
    
    loss *= mask
    # Return the total.
    return tf.reduce_sum(loss)/tf.reduce_sum(mask)


In [173]:

def masked_acc(y_true, y_pred):
    y_pred = tf.argmax(y_pred, axis=-1)
    y_pred = tf.cast(y_pred, y_true.dtype)
    match = tf.cast(y_true == y_pred, tf.float32)
    mask = tf.cast(y_true != 0, tf.float32)

    return tf.reduce_sum(match)/tf.reduce_sum(mask)


In [183]:

def compile_and_train(model, epochs=10, steps_per_epoch=500):
    model.compile(optimizer="adam", loss=masked_loss, metrics=[masked_acc, masked_loss])

    history = model.fit(
        train_data.repeat(),
        epochs=epochs,
        steps_per_epoch=steps_per_epoch,
        validation_data=val_data,
        validation_steps=50,
        callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)],
    )

    return model, history


In [184]:

trained_translator, history = compile_and_train(translator)


Epoch 1/10


2024-07-28 14:02:29.180519: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_18' with dtype int64
	 [[{{node Placeholder/_18}}]]
2024-07-28 14:02:29.181058: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_18' with dtype int64
	 [[{{node Placeholder/_18}}]]
2024-07-28 14:02:33.856005: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'gradients/ReverseV2_grad/ReverseV2/ReverseV2/axis' with dtype int32 and shape [1]



2024-07-28 14:07:17.917619: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_14' with dtype int64
	 [[{{node Placeholder/_14}}]]
2024-07-28 14:07:17.918230: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_14' with dtype int64
	 [[{{node Placeholder/_14}}]]


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10


# 5) Inference: Next token prediction

In [185]:

def generate_next_token(decoder, context, next_token, done, state, temperature=0.0):
    
    """Generates the next token in the sequence

    Args:
        decoder (Decoder): The decoder
        context (tf.Tensor): Encoded sentence to translate
        next_token (tf.Tensor): The predicted next token
        done (bool): True if the translation is complete
        state (list[tf.Tensor, tf.Tensor]): Hidden states of the pre-attention LSTM layer
        temperature (float, optional): The temperature that controls the randomness of the predicted tokens. Defaults to 0.0.

    Returns:
        tuple(tf.Tensor, np.float, list[tf.Tensor, tf.Tensor], bool): The next token, log prob of said token, hidden state of LSTM and if translation is done
    """
    
    # Get the logits and state from the decoder
    logits, state = decoder(context, next_token, state=state, return_state=True)
    
    # Trim the intermediate dimension 
    logits = logits[:, -1, :]
        
    # If temp is 0 then next_token is the argmax of logits
    if temperature == 0.0:
        next_token = tf.argmax(logits, axis=-1)
        
    # If temp is not 0 then next_token is sampled out of logits
    else:
        logits = logits / temperature
        next_token = tf.random.categorical(logits, num_samples=1)
    
    # Trim dimensions of size 1
    logits = tf.squeeze(logits)
    next_token = tf.squeeze(next_token)
    
    # Get the logit of the selected next_token
    logit = logits[next_token].numpy()
    
    # Reshape to (1,1) since this is the expected shape for text encoded as TF tensors
    next_token = tf.reshape(next_token, shape=(1,1))
    
    # If next_token is End-of-Sentence token you are done
    if next_token == eos_id:
        done = True
    
    return next_token, logit, state, done


In [None]:

decoder: The trained Decoder model that will generate predictions based on the input context and the current state.

context: The encoded representation of the input sentence (from the encoder) that provides context for the decoder to generate the translation.

next_token: The token predicted in the previous step. Initially, this could be a start-of-sequence token.

done: A boolean flag indicating whether the translation is complete. It helps to stop the generation process when an end-of-sequence token is produced.

state: The hidden states of the pre-attention LSTM layer in the decoder, which are updated at each step and used to maintain context.

temperature: A parameter that controls the randomness of token generation. A temperature of 0.0 means always choosing the most likely token, while a higher temperature introduces more randomness.


In [187]:

# A sentence you wish to translate
eng_sentence = "Cat is the most adorable"

# Convert it to a tensor: tf.convert_to_tensor(eng_sentence) converts the sentence into a TensorFlow tensor. The [tf.newaxis] adds a new axis to the tensor, creating a batch dimension. 
# This is necessary because TensorFlow models expect input tensors to have a batch dimension.
texts = tf.convert_to_tensor(eng_sentence)[tf.newaxis]

# Vectorize it and pass it through the encoder
context = english_vectorizer(texts).to_tensor()
context = encoder(context)

# SET STATE OF THE DECODER
decoder_instance = Decoder(vocab_size=VOCAB_SIZE, units=UNITS)

# There should be three initializer:
# sos_id, hidden_state, cell_state. 
# Next token is Start-of-Sentence since you are starting fresh
next_token = tf.fill((1,1), sos_id)

# Hidden and Cell states of the LSTM can be mocked using uniform samples
state = [tf.random.uniform((1, UNITS)), tf.random.uniform((1, UNITS))]

# You are not done until next token is EOS token
done = False

# Generate next token
next_token, logit, state, done = generate_next_token(decoder_instance , context, next_token, done, state, temperature=0.5)
print(f"Next token: {next_token}\nLogit: {logit:.4f}\nDone? {done}")


Next token: [[1367]]
Logit: -18.7363
Done? False


In [188]:
context


<tf.Tensor: shape=(1, 7, 256), dtype=float32, numpy=
array([[[-0.00096594,  0.00467819, -0.00716301, ...,  0.00231067,
          0.00938596,  0.00606533],
        [-0.0054131 ,  0.00656668, -0.00177414, ...,  0.00797553,
         -0.00529242,  0.00201095],
        [ 0.00638044,  0.01237875, -0.00468413, ...,  0.01869044,
         -0.01541931,  0.00767817],
        ...,
        [-0.00122468,  0.02203771, -0.00367437, ...,  0.01645075,
         -0.00977448,  0.00972566],
        [-0.0032827 ,  0.02007942, -0.01447834, ...,  0.00954908,
         -0.0106133 , -0.00388205],
        [-0.00512495,  0.01496253, -0.01858178, ...,  0.00752145,
         -0.00435399, -0.00835634]]], dtype=float32)>

In [189]:
id_to_word(next_token)


<tf.Tensor: shape=(1, 1), dtype=string, numpy=array([[b'publico']], dtype=object)>

# 6) Translation

Now you can combine all the steps to translate a given sentence. To do this, we generate translate function provided below. This function will handle the following steps:              

1. Process and encode the sentence to be translated.            
2. Initialize the decoder's initial state.          
3. Predict the next token (starting with the `<SOS>` token) for a maximum number of iterations (in case the `<EOS>` token is never returned).           
4. Return the translated text (as a string), the logit of the last iteration (which helps measure the certainty of the complete sequence translation), and the translation in token format.            

In [196]:
def tokens_to_text(tokens, id_to_word):
    words = id_to_word(tokens)
    result = tf.strings.reduce_join(words, axis=-1, separator=" ")
    return result


In [205]:

def translate(model, text, max_length=50, temperature=0.0):
    """Translate a given sentence from English to Portuguese

    Args:
        model (tf.keras.Model): The trained translator
        text (string): The sentence to translate
        max_length (int, optional): The maximum length of the translation. Defaults to 50.
        temperature (float, optional): The temperature that controls the randomness of the predicted tokens. Defaults to 0.0.

    Returns:
        tuple(str, np.float, tf.Tensor): The translation, logit that predicted <EOS> token and the tokenized translation
    """
    # Lists to save tokens and logits
    tokens, logits = [], []

    # PROCESS THE SENTENCE TO TRANSLATE
    
    # Convert the original string into a tensor
    text_tensor = tf.convert_to_tensor([text])
    
    # Vectorize the text using the correct vectorizer
    context = english_vectorizer(text_tensor).to_tensor()
    
    # Get the encoded context (pass the context through the encoder)
    context = model.encoder(context)
    
    # INITIAL STATE OF THE DECODER
    
    # First token should be SOS token with shape (1,1)
    next_token = tf.fill((1, 1), sos_id)
    
    # Initial hidden and cell states should be tensors of zeros with shape (1, UNITS)
    state = [tf.zeros((1, UNITS)), tf.zeros((1, UNITS))]
    
    # You are done when you draw a EOS token as next token (initial state is False)
    done = False

    # Iterate for max_length iterations
    for _ in range(max_length):
        # Generate the next token
        try:
            next_token, logit, state, done = generate_next_token(
                decoder=model.decoder,
                context=context,
                next_token=next_token,
                done=done,
                state=state,
                temperature=temperature
            )
        except Exception as e:
            raise Exception(f"Problem generating the next token: {e}")
        
        # If done then break out of the loop
        if done:
            break
        
        # Add next_token to the list of tokens
        tokens.append(next_token)
        
        # Add logit to the list of logits
        logits.append(logit)
   
    # Concatenate all tokens into a tensor
    tokens = tf.concat(tokens, axis=-1)
    
    # Convert the translated tokens into text
    translation = tf.squeeze(tokens_to_text(tokens, id_to_word))
    translation = translation.numpy().decode()
    
    return translation, logits[-1], tokens


In [206]:
# Running this cell multiple times should return the same output since temp is 0

temp = 0.0 
original_sentence = "Cats are the most lovely animals"

translation, logit, tokens = translate(trained_translator, original_sentence, temperature=temp)

print(f"Temperature: {temp}\n\nOriginal sentence: {original_sentence}\nTranslation: {translation}\nTranslation tokens:{tokens}\nLogit: {logit:.3f}")


Temperature: 0.0

Original sentence: Cats are the most lovely animals
Translation: os gatos sao os animais mais [UNK] .
Translation tokens:[[ 40 758  73  40 938  32   1   4]]
Logit: -1.978
