In [14]:
#Libraries
import pathlib
import random
import string
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import callbacks
from keras.saving import register_keras_serializable


In [15]:
@register_keras_serializable()
class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.dense_proj = keras.Sequential(
            [layers.Dense(dense_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.supports_masking = True

    def call(self, inputs, mask=None):
        if mask is not None:
            padding_mask = tf.cast(mask[:, tf.newaxis, :], dtype="int32")
        attention_output = self.attention(
            query=inputs, value=inputs, key=inputs, attention_mask=padding_mask
        )
        proj_input = self.layernorm_1(inputs + attention_output)
        proj_output = self.dense_proj(proj_input)
        return self.layernorm_2(proj_input + proj_output)

In [16]:
@register_keras_serializable()
class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, vocab_size, embed_dim, **kwargs):
        super().__init__(**kwargs)
        self.token_embeddings = layers.Embedding(
            input_dim=vocab_size, output_dim=embed_dim
        )
        self.position_embeddings = layers.Embedding(
            input_dim=sequence_length, output_dim=embed_dim
        )
        self.sequence_length = sequence_length
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim

    def call(self, inputs):
        length = tf.shape(inputs)[-1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_tokens = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        return embedded_tokens + embedded_positions

    def compute_mask(self, inputs, mask=None):
         return keras.ops.not_equal(inputs, 0)

In [17]:
@register_keras_serializable()
class TransformerDecoder(layers.Layer):
    def __init__(self, embed_dim, latent_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.latent_dim = latent_dim
        self.num_heads = num_heads
        self.attention_1 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.attention_2 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim
        )
        self.dense_proj = keras.Sequential(
            [
                layers.Dense(latent_dim, activation="relu"),
                layers.Dense(embed_dim),
            ]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.layernorm_3 = layers.LayerNormalization()
        self.supports_masking = True

    def call(self, inputs, encoder_outputs, mask=None):
        # Causal self-attention (decoder side)
        causal_mask = self.get_causal_attention_mask(inputs)

        attention_output_1 = self.attention_1(
            query=inputs,
            value=inputs,
            key=inputs,
            attention_mask=causal_mask,
        )
        out_1 = self.layernorm_1(inputs + attention_output_1)

        # Cross-attention (NO causal/padding mask to avoid shape mismatch)
        attention_output_2 = self.attention_2(
            query=out_1,
            value=encoder_outputs,
            key=encoder_outputs,
        )
        out_2 = self.layernorm_2(out_1 + attention_output_2)

        proj_output = self.dense_proj(out_2)
        return self.layernorm_3(out_2 + proj_output)

    def get_causal_attention_mask(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size, seq_len = input_shape[0], input_shape[1]
        i = tf.range(seq_len)[:, tf.newaxis]
        j = tf.range(seq_len)
        mask = tf.cast(i >= j, dtype="int32")
        mask = tf.reshape(mask, (1, seq_len, seq_len))
        return tf.tile(mask, [batch_size, 1, 1])


In [18]:
@register_keras_serializable()
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):
  def __init__(self, d_model, warmup_steps=4000):
    super().__init__()

    self.d_model = d_model
    self.d_model = tf.cast(self.d_model, tf.float32)

    self.warmup_steps = warmup_steps

  def __call__(self, step):
    step = tf.cast(step, dtype=tf.float32)
    arg1 = tf.math.rsqrt(step)
    arg2 = step * (self.warmup_steps ** -1.5)

    return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

  def get_config(self):
    return {"d_model": self.d_model.numpy(), "warmup_steps": self.warmup_steps}

In [35]:
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.models import load_model
from google.colab import drive
import json, os

# Mount Drive
drive.mount("/content/drive", force_remount=False)

MODEL_DIR = "/content/drive/MyDrive/model/spanish_to_english"
sequence_length = 30  # MUST match training

# Load model
transformer = load_model(os.path.join(MODEL_DIR, "spanish.keras"))

# Load vocab
with open(os.path.join(MODEL_DIR, "eng_vocab.json")) as f:
    eng_vocab = json.load(f)

with open(os.path.join(MODEL_DIR, "es_vocab.json")) as f:
    es_vocab = json.load(f)

# Recreate vectorizers
es_vectorization = TextVectorization(
    vocabulary=es_vocab,
    output_mode="int",
    output_sequence_length=sequence_length
)

eng_vectorization = TextVectorization(
    vocabulary=eng_vocab,
    output_mode="int",
    output_sequence_length=sequence_length
)

# END token id
end_token_id = eng_vocab.index("end")

# Translation function
def translate_spanish(spanish_text, max_len=30):
    encoder_input = es_vectorization([spanish_text])
    decoder_input = tf.zeros((1, 1), dtype=tf.int64)

    for _ in range(max_len):
        preds = transformer(
            {
                "encoder_inputs": encoder_input,
                "decoder_inputs": decoder_input,
            },
            training=False
        )

        next_token = tf.argmax(preds[:, -1, :], axis=-1)
        token_id = int(next_token[0])

        if token_id == end_token_id:
            break

        decoder_input = tf.concat(
            [decoder_input, tf.expand_dims(next_token, axis=-1)],
            axis=-1
        )

    words = [
        eng_vocab[t]
        for t in decoder_input.numpy()[0]
        if t < len(eng_vocab) and eng_vocab[t] not in ("", "end")
    ]

    return " ".join(words).strip()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




In [37]:
print(translate_spanish("ve."))

go


In [38]:
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.models import load_model
from google.colab import drive
import json, os

# Mount Drive
drive.mount("/content/drive", force_remount=False)

MODEL_DIR = "/content/drive/MyDrive/hindi_to_english"
sequence_length = 30  # MUST match training

# Load model
transformer = load_model(os.path.join(MODEL_DIR, "model.keras"))

# Load vocab
with open(os.path.join(MODEL_DIR, "eng_vocab.json")) as f:
    eng_vocab = json.load(f)

with open(os.path.join(MODEL_DIR, "hi_vocab.json")) as f:
    hi_vocab = json.load(f)

# Recreate vectorizers
hi_vectorization = TextVectorization(
    vocabulary=hi_vocab,
    output_mode="int",
    output_sequence_length=sequence_length
)

eng_vectorization = TextVectorization(
    vocabulary=eng_vocab,
    output_mode="int",
    output_sequence_length=sequence_length
)

# END token id
end_token_id = eng_vocab.index("end")

# Translation function
def translate_hindi(hindi_text, max_len=30):
    encoder_input = hi_vectorization([hindi_text])
    decoder_input = tf.zeros((1, 1), dtype=tf.int64)

    for _ in range(max_len):
        preds = transformer(
            {
                "encoder_inputs": encoder_input,
                "decoder_inputs": decoder_input,
            },
            training=False
        )

        next_token = tf.argmax(preds[:, -1, :], axis=-1)
        token_id = int(next_token[0])

        if token_id == end_token_id:
            break

        decoder_input = tf.concat(
            [decoder_input, tf.expand_dims(next_token, axis=-1)],
            axis=-1
        )

    words = [
        eng_vocab[t]
        for t in decoder_input.numpy()[0]
        if t < len(eng_vocab) and eng_vocab[t] not in ("", "end")
    ]

    return " ".join(words).strip()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [39]:
print(translate_hindi("आप कैसे हैं"))

how are you


In [48]:
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.models import load_model
from google.colab import drive
import json, os

# Mount Drive
drive.mount("/content/drive", force_remount=False)

MODEL_DIR = "/content/drive/MyDrive/model/malayalam_to_english"
sequence_length = 30  # MUST match training

# Load model
transformer = load_model(os.path.join(MODEL_DIR, "model.keras"))

# Load vocab
with open(os.path.join(MODEL_DIR, "eng_vocab.json")) as f:
    eng_vocab = json.load(f)

with open(os.path.join(MODEL_DIR, "ml_vocab.json")) as f:
    ml_vocab = json.load(f)

# Recreate vectorizers
ml_vectorization = TextVectorization(
    vocabulary=ml_vocab,
    output_mode="int",
    output_sequence_length=sequence_length
)

eng_vectorization = TextVectorization(
    vocabulary=eng_vocab,
    output_mode="int",
    output_sequence_length=sequence_length
)

# END token id
end_token_id = eng_vocab.index("end")

# Translation function
def translate_malayalam(malayalam_text, max_len=30):
    encoder_input = ml_vectorization([malayalam_text])
    decoder_input = tf.zeros((1, 1), dtype=tf.int64)

    for _ in range(max_len):
        preds = transformer(
            {
                "encoder_inputs": encoder_input,
                "decoder_inputs": decoder_input,
            },
            training=False
        )

        next_token = tf.argmax(preds[:, -1, :], axis=-1)
        token_id = int(next_token[0])

        if token_id == end_token_id:
            break

        decoder_input = tf.concat(
            [decoder_input, tf.expand_dims(next_token, axis=-1)],
            axis=-1
        )

    words = [
        eng_vocab[t]
        for t in decoder_input.numpy()[0]
        if t < len(eng_vocab) and eng_vocab[t] not in ("", "end")
    ]

    return " ".join(words).strip()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [49]:
print(translate_malayalam("മാധ്യമങ്ങള്‍ക്ക് നിയന്ത്രണം [UNK] ഹരജി ഹൈക്കോടതി തള്ളി"))

the rejected plea plea filed by hc order


In [45]:
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.models import load_model
from google.colab import drive
import json, os

# Mount Drive
drive.mount("/content/drive", force_remount=False)

MODEL_DIR = "/content/drive/MyDrive/model/eng_to_tamil"
sequence_length = 30  # MUST match training

# Load model
transformer = load_model(os.path.join(MODEL_DIR, "model.keras"))

# Load vocab
with open(os.path.join(MODEL_DIR, "en_vocab.json")) as f:
    en_vocab = json.load(f)

with open(os.path.join(MODEL_DIR, "ta_vocab.json")) as f:
    ta_vocab = json.load(f)

# Recreate vectorizers
en_vectorization = TextVectorization(
    vocabulary=en_vocab,
    output_mode="int",
    output_sequence_length=sequence_length
)

ta_vectorization = TextVectorization(
    vocabulary=ta_vocab,
    output_mode="int",
    output_sequence_length=sequence_length
)

# END token id (from Tamil vocab)
end_token_id = ta_vocab.index("[end]")

# Translation function
def translate_tamil(english_text, max_len=30):
    encoder_input = en_vectorization([english_text])
    decoder_input = tf.zeros((1, 1), dtype=tf.int64)

    for _ in range(max_len):
        preds = transformer(
            {
                "encoder_inputs": encoder_input,
                "decoder_inputs": decoder_input,
            },
            training=False
        )

        next_token = tf.argmax(preds[:, -1, :], axis=-1)
        token_id = int(next_token[0])

        # Stop when END token appears
        if token_id == end_token_id:
            break

        decoder_input = tf.concat(
            [decoder_input, tf.expand_dims(next_token, axis=-1)],
            axis=-1
        )

    words = [
        ta_vocab[t]
        for t in decoder_input.numpy()[0]
        if t < len(ta_vocab) and ta_vocab[t] not in ("", "end")
    ]

    return " ".join(words).strip()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [46]:
print(translate_tamil("how are you"))

நீங்கள் எப்படி இருக்கிறீர்கள்
