In [11]:
# This script (`Translator_Shan.ipynb`) loads the trained Transformer model along with the saved TextVectorization layers, and
# allows the user to input English sentences and receive Spanish translations.
# All components (model weights and vectorizers) are pre-trained and saved, so no additional training is required to run this script.

import numpy as np
import tensorflow as tf
import string
import re
from keras.models import load_model
from transformer import Transformer
from keras.saving import register_keras_serializable


In [13]:
@register_keras_serializable()
def custom_standardization(input_string):
    strip_chars = string.punctuation + "¿"
    strip_chars = strip_chars.replace("[", "").replace("]", "")
    return tf.strings.regex_replace(tf.strings.lower(input_string), f"[{re.escape(strip_chars)}]", "")


In [15]:
# Load vectorizers
source_vectorization = load_model("source_vectorizer.keras")
target_vectorization = load_model("target_vectorizer.keras")


In [17]:
# Load vocab for decoding
spa_vocab = target_vectorization.get_vocabulary()
spa_index_lookup = dict(zip(range(len(spa_vocab)), spa_vocab))


In [19]:
# Rebuild model architecture and load weights
vocab_size = 15000
seq_length = 20
model = Transformer(n_layers=4, d_emb=128, n_heads=8, d_ff=512, dropout_rate=0.1,
                    src_vocab_size=vocab_size, tgt_vocab_size=vocab_size)


In [21]:
# Dummy call to build model
dummy_input = tf.constant([[1] * seq_length])
model((dummy_input, dummy_input))
model.load_weights("translation_transformer.weights.h5")




In [23]:
# Translate function
def decode_sequence(input_sentence):
    tokenized_input_sentence = source_vectorization([input_sentence])
    decoded_sentence = "[start]"
    for i in range(seq_length):
        tokenized_target_sentence = target_vectorization([decoded_sentence])[:, :-1]
        predictions = model((tokenized_input_sentence, tokenized_target_sentence))
        sampled_token_index = np.argmax(predictions[0, i, :])
        sampled_token = spa_index_lookup[sampled_token_index]
        decoded_sentence += " " + sampled_token
        if sampled_token == "[end]":
            break
    return decoded_sentence.replace("[start] ", "").replace(" [end]", "")


In [29]:
# Interactive prompt
if __name__ == "__main__":
    while True:
        input_sentence = input("\nEnter English sentence (or type 'exit'): ")
        if input_sentence.lower() == "exit":
            break
        print("Spanish:", decode_sequence(input_sentence))



Enter English sentence (or type 'exit'):  What is your name?


Spanish: cómo se llama tu nombre



Enter English sentence (or type 'exit'):  I'm good, how are you?


Spanish: soy bueno como tú eres



Enter English sentence (or type 'exit'):  I have completed this project.


Spanish: he terminado este proyecto



Enter English sentence (or type 'exit'):  exit


In [None]:
# --