<a href="https://colab.research.google.com/github/lovnishverma/Python-Getting-Started/blob/main/Kangri_to_Hindi_Translation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fine-Tuning IndicTrans2-1B for Kangri-to-Hindi Translation (LoRA)

This notebook implements a complete pipeline to fine-tune the `IndicTrans2-1B` model on a low-resource dialect (Kangri) using Low-Rank Adaptation (LoRA).

### Key Features:
* **Memory Efficient:** Uses LoRA and FP16 to run on a single T4 GPU (free Colab).
* **Correct Tokenization:** Uses `doi_Deva` (Dogri) as a proxy source tag for Kangri.
* **Evaluation:** Includes a validation split and BLEU score calculation to track translation quality.
* **Bug Fixes:** Handles known compatibility issues with `use_cache` in the transformers library.



---



# Generate Dataset

**Robust Data Generation**: It starts with the Context-Free Grammar approach which will create ~5,000+ high-quality Kangri sentences with correct grammar (questions, imperatives, possessives).

In [2]:
import pandas as pd
import random

# 1. METADATA

subjects = [
    {"k": "मैं", "h": "मैं", "ne_k": "मैं",
        "ne_h": "मैंने", "per": "1", "num": "sg", "g": "m"},
    {"k": "मैं", "h": "मैं", "ne_k": "मैं",
        "ne_h": "मैंने", "per": "1", "num": "sg", "g": "f"},
    {"k": "तू", "h": "तुम", "ne_k": "तैं", "ne_h": "तुमने",
        "per": "2", "num": "sg", "g": "m"},
    {"k": "तुसां", "h": "आप", "ne_k": "तुसां",
        "ne_h": "आपने", "per": "2", "num": "pl", "g": "m"},
    {"k": "सैह", "h": "वह", "ne_k": "तिनी",
        "ne_h": "उसने", "per": "3", "num": "sg", "g": "m"},
    {"k": "सैह", "h": "वह", "ne_k": "तिनी",
        "ne_h": "उसने", "per": "3", "num": "sg", "g": "f"},
    {"k": "असां", "h": "हम", "ne_k": "असां",
        "ne_h": "हमने", "per": "1", "num": "pl", "g": "m"},
    {"k": "मुंडा", "h": "लड़का", "ne_k": "मुंडे",
        "ne_h": "लड़के ने", "per": "3", "num": "sg", "g": "m"},
    {"k": "कुड़ी", "h": "लड़की", "ne_k": "कुड़ी",
        "ne_h": "लड़की ने", "per": "3", "num": "sg", "g": "f"},
    {"k": "अम्मा", "h": "माँ", "ne_k": "अम्मा",
        "ne_h": "माँ ने", "per": "3", "num": "sg", "g": "f"},
]

locs = [("घरा", "घर", "m"), ("स्कूला", "स्कूल", "m"), ("बजारा", "बाजार", "m")]
foods = [("रोटी", "रोटी", "f"), ("धाम", "धाम", "f"), ("मदरा", "मदरा", "m"),
         ("चौळ", "चावल", "m"), ("खाणा", "खाना", "m"), ("दाल", "दाल", "f")]
liquids = [("पाणी", "पानी", "m"), ("चा", "चाय", "f"),
           ("दुद्ध", "दूध", "m"), ("लस्सी", "लस्सी", "f")]
items = [("किताब", "किताब", "f"), ("अखबार", "अखबार", "m"), ("चिट्ठी",
                                                            "चिट्ठी", "f"), ("कम्म", "काम", "m"), ("गाणा", "गाना", "m")]

verbs = [
    {"cat": "motion", "objs": locs, "trans": False, "root": {"k": "जा", "h": "जा"}, "cont": {
        "k": "जादा", "h": "जा"}, "past": {"k": "गया", "h": "गया"}, "fut": {"k": "जांगा", "h": "जा"}},
    {"cat": "motion", "objs": locs, "trans": False, "root": {"k": "औ", "h": "आ"}, "cont": {
        "k": "औंदा", "h": "आ"}, "past": {"k": "आया", "h": "आया"}, "fut": {"k": "औंगा", "h": "आ"}},
    {"cat": "eating", "objs": foods, "trans": True, "root": {"k": "खा", "h": "खा"}, "cont": {
        "k": "खान्दा", "h": "खा"}, "past": {"k": "खाधा", "h": "खाया"}, "fut": {"k": "खांगा", "h": "खा"}},
    {"cat": "drinking", "objs": liquids, "trans": True, "root": {"k": "पी", "h": "पी"}, "cont": {
        "k": "पींदा", "h": "पी"}, "past": {"k": "पीता", "h": "पिया"}, "fut": {"k": "पींगा", "h": "पी"}},
    {"cat": "reading", "objs": items, "trans": True, "root": {"k": "पढ़", "h": "पढ़"}, "cont": {
        "k": "पढ़दा", "h": "पढ़"}, "past": {"k": "पढ्या", "h": "पढ़ा"}, "fut": {"k": "पढ़ांगा", "h": "पढ़"}},
    {"cat": "writing", "objs": items, "trans": True, "root": {"k": "लिख", "h": "लिख"}, "cont": {
        "k": "लिखदा", "h": "लिख"}, "past": {"k": "लिख्या", "h": "लिखा"}, "fut": {"k": "लिखांगा", "h": "लिख"}},
]

# 2. LOGIC ENGINE

def get_auxiliary(lang, sub, tense, obj_gender=None):
    if lang == "hindi":
        if tense == "pres":
            if sub["per"] == "1" and sub["num"] == "sg":
                return "हूँ"
            if sub["per"] == "2" and sub["num"] == "sg":
                return "हो"
            if sub["num"] == "pl":
                return "हैं"
            return "है"

        if tense == "past":
            target_gender = obj_gender if obj_gender else sub["g"]
            if target_gender == "f":
                return "थी"
            return "था"

    elif lang == "kangri":
        if tense == "pres":
            if sub["per"] == "1" and sub["num"] == "sg":
                return "है"
            if sub["num"] == "pl":
                return "हन"
            return "है"
        if tense == "past":
            target_gender = obj_gender if obj_gender else sub["g"]
            if target_gender == "f":
                return "थी"
            return "था"
    return ""


def conjugate_hindi_future(root, sub):
    suffix = ""
    if sub["per"] == "1":
        if sub["num"] == "sg":
            suffix = "ऊँगा" if sub["g"] == "m" else "ऊँगी"
        else:
            suffix = "एंगे"
    elif sub["per"] == "2":
        if sub["num"] == "pl":
            suffix = "ओगे" if sub["g"] == "m" else "ओगी"
        else:
            suffix = "एगा"
    elif sub["per"] == "3":
        if sub["num"] == "pl":
            suffix = "एंगे"
        else:
            suffix = "एगा" if sub["g"] == "m" else "एगी"
    return f"{root}{suffix}".replace("ाए", "ाए")


def conjugate_hindi_past(base, target_gender):
    """Robust Past Tense Conjugator (Padha -> Padhi)"""
    if target_gender == "f":
        if base.endswith("या"):
            return base[:-2] + "यी"  # Gaya -> Gayi
        if base.endswith("ा"):
            return base[:-1] + "ी"   # Padha -> Padhi
        return base + "ी"  # Fallback
    return base  # Default Masculine


def generate_sentence_v8(sub, verb, tense):
    o_k, o_h, o_g = random.choice(verb["objs"])

    use_ne = (tense == "past" and verb["trans"])
    s_k = sub["ne_k"] if use_ne else sub["k"]
    s_h = sub["ne_h"] if use_ne else sub["h"]

    aux_k = get_auxiliary("kangri", sub, tense, o_g if use_ne else None)
    aux_h = get_auxiliary("hindi", sub, tense, o_g if use_ne else None)

    k_verb, h_verb = "", ""

    # --- CONTINUOUS ---
    if tense == "cont":
        base = verb["cont"]["k"]
        if sub["g"] == "f":
            k_verb = base.replace("दा", "दी").replace("ंदा", "न्दी")
        elif sub["num"] == "pl":
            k_verb = base.replace("दा", "दे").replace("ंदा", "न्दे")
        else:
            k_verb = base

        h_suffix = "रहा" if sub["g"] == "m" else "रही"
        if sub["num"] == "pl" and sub["g"] == "m":
            h_suffix = "रहे"
        h_verb = f"{verb['root']['h']} {h_suffix}"

    # --- FUTURE ---
    elif tense == "fut":
        base = verb["fut"]["k"]
        if sub["g"] == "f":
            k_verb = base.replace("ंगा", "ंगी")
        elif sub["num"] == "pl":
            k_verb = base.replace("ंगा", "ंगे")
        else:
            k_verb = base

        h_verb = conjugate_hindi_future(verb["root"]["h"], sub)

    # --- PAST ---
    elif tense == "past":
        target_g = o_g if use_ne else sub["g"]

        # Kangri
        base_k = verb["past"]["k"]
        if target_g == "f":
            if base_k.endswith("या"):
                k_verb = base_k[:-2] + "यी"
            elif base_k.endswith("आ"):
                k_verb = base_k[:-1] + "ई"
            else:
                k_verb = base_k
        else:
            k_verb = base_k

        # Hindi (Use new robust function)
        h_verb = conjugate_hindi_past(verb["past"]["h"], target_g)

    res_k = f"{s_k} {o_k} {k_verb} {aux_k}"
    res_h = f"{s_h} {o_h} {h_verb} {aux_h}"

    return {"kangri": res_k.strip(), "hindi": res_h.strip()}


# Execution
data = []
for _ in range(5):
    for verb in verbs:
        for sub in subjects:
            for tense in ["cont", "past", "fut"]:
                data.append(generate_sentence_v8(sub, verb, tense))

df_v8 = pd.DataFrame(data)
df_v8 = df_v8.drop_duplicates().sample(frac=1).reset_index(drop=True)

print(f"Generated {len(df_v8)} Golden Standard rows.")
print(df_v8.sample(10))
df_v8.to_csv("kangri_hindi.csv", index=False)


Generated 542 Golden Standard rows.
                   kangri                 hindi
480      सैह चिट्ठी पढ़दा     वह चिट्ठी पढ़ रहा
306        मैं घरा गयी थी         मैं घर गयी थी
98    मैं अखबार लिख्या था   मैंने अखबार लिखा था
211       सैह रोटी खान्दा        वह रोटी खा रहा
363       सैह स्कूला औंगा         वह स्कूल आएगा
426        मैं कम्म लिखदा       मैं काम लिख रहा
181           तू घरा औंगा           तुम घर आएगा
336  अम्मा अखबार पढ्या था  माँ ने अखबार पढ़ा था
291      मैं स्कूला जांगी      मैं स्कूल जाऊँगी
299         मैं चौळ खांगा       मैं चावल खाऊँगा


### 1. Installation and Setup

In [3]:
# Install necessary libraries
!pip install -q transformers datasets sentencepiece accelerate peft evaluate sacrebleu

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25h

### 2. Imports and Memory Cleanup
We clear the GPU memory to ensure a fresh start.

In [4]:
import torch
import gc
import numpy as np
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSeq2SeqLM,
    DataCollatorForSeq2Seq,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments
)
from peft import get_peft_model, LoraConfig, TaskType
import evaluate

# Clear GPU memory if previous runs exist
if 'trainer' in globals(): del trainer
if 'model' in globals(): del model
if 'tokenizer' in globals(): del tokenizer
torch.cuda.empty_cache()
gc.collect()

print("Memory cleared and libraries imported.")

Memory cleared and libraries imported.


https://huggingface.co/ai4bharat/indictrans2-indic-indic-1B

Gated model
You must have been granted access to this model

In [5]:
from huggingface_hub import login

# Run this and paste your HF_TOKEN when prompted
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

### 3. Load Model & Configure LoRA
We load the model in half-precision (`float16`) and attach LoRA adapters. This makes the trainable parameters <1% of the total model size.

In [6]:
# 1. Load Model and Tokenizer
checkpoint = "ai4bharat/indictrans2-indic-indic-1B"

tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)

model = AutoModelForSeq2SeqLM.from_pretrained(
    checkpoint,
    trust_remote_code=True,
    torch_dtype=torch.float16,  # Load in FP16 to save RAM
)

# CRITICAL FIX: Disable caching to prevent "AttributeError: 'NoneType' object has no attribute 'shape'"
# This is a known issue when fine-tuning this specific model architecture.
model.config.use_cache = False

# 2. Apply LoRA (Low-Rank Adaptation)
peft_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    inference_mode=False,
    r=32,            # Rank: Higher = more learning capacity (16 is default, 32 is better for new languages)
    lora_alpha=64,   # Alpha: Scaling factor (usually 2x rank)
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj", "k_proj", "out_proj", "fc1", "fc2"]
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

tokenizer_config.json:   0%|          | 0.00/1.10k [00:00<?, ?B/s]

tokenization_indictrans.py:   0%|          | 0.00/8.04k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/ai4bharat/indictrans2-indic-indic-1B:
- tokenization_indictrans.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


dict.SRC.json:   0%|          | 0.00/3.39M [00:00<?, ?B/s]

dict.TGT.json:   0%|          | 0.00/3.39M [00:00<?, ?B/s]

model.SRC:   0%|          | 0.00/3.26M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/96.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

configuration_indictrans.py:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/ai4bharat/indictrans2-indic-indic-1B:
- configuration_indictrans.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
`torch_dtype` is deprecated! Use `dtype` instead!


modeling_indictrans.py:   0%|          | 0.00/79.8k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/ai4bharat/indictrans2-indic-indic-1B:
- modeling_indictrans.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/168 [00:00<?, ?B/s]

trainable params: 35,389,440 || all params: 1,243,486,208 || trainable%: 2.8460


### 4. Data Preparation & Splitting
We load `mandiyali_hindi.csv` and split it into Training (90%) and Validation (10%) sets. We use `doi_Deva` (Dogri) as the source tag.

In [7]:
# 1. Load and Split Dataset
# Ensure you have uploaded 'kangri_hindi.csv' to the Colab runtime
dataset = load_dataset("csv", data_files="kangri_hindi.csv")
dataset = dataset["train"].train_test_split(test_size=0.1) # 10% for validation

print(f"Training examples: {len(dataset['train'])}")
print(f"Validation examples: {len(dataset['test'])}")

# 2. Preprocessing Function
def preprocess(example):
    # Format: "source_tag target_tag source_text"
    # We use 'doi_Deva' (Dogri) as a proxy for kangri because it is linguistically close.
    src = f"doi_Deva hin_Deva {example['kangri']}"
    tgt = example["hindi"]

    # Tokenize inputs
    model_inputs = tokenizer(src, truncation=True, padding="max_length", max_length=128)

    # Tokenize targets (using text_target=... to avoid language tag errors)
    labels = tokenizer(text_target=tgt, truncation=True, padding="max_length", max_length=128)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# 3. Apply Mapping & Remove Text Columns
# Removing original columns prevents the "ValueError: too many dimensions" error
tokenized_datasets = dataset.map(
    preprocess,
    batched=False,
    remove_columns=dataset["train"].column_names
)

Generating train split: 0 examples [00:00, ? examples/s]

Training examples: 487
Validation examples: 55


Map:   0%|          | 0/487 [00:00<?, ? examples/s]

Map:   0%|          | 0/55 [00:00<?, ? examples/s]

### 5. Metrics (BLEU Score)
We define a function to compute the BLEU score, which evaluates the quality of translation against the reference.

In [8]:
metric = evaluate.load("sacrebleu")

def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]

    # Decode predictions
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

    # Replace -100 in labels as we can't decode them (used for padding in training)
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Simple post-processing
    decoded_preds = [pred.strip() for pred in decoded_preds]
    decoded_labels = [[label.strip()] for label in decoded_labels]

    result = metric.compute(predictions=decoded_preds, references=decoded_labels)
    return {"bleu": result["score"]}

Downloading builder script: 0.00B [00:00, ?B/s]

### 6. Training
We configure the trainer. `per_device_train_batch_size` is set to 8 (thanks to LoRA). `gradient_accumulation_steps=2` gives us an effective batch size of 16.

In [9]:
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer

# 1. CRITICAL FIX: Force disable cache in generation config to fix evaluation crash
model.generation_config.use_cache = False
model.config.use_cache = False

training_args = Seq2SeqTrainingArguments(
    output_dir="./mandiyali_lora_model",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=2,
    learning_rate=1e-3,
    num_train_epochs=15,
    logging_steps=10,
    save_strategy="epoch",
    eval_strategy="epoch",

    # TPU Specifics (Keep these if you are on TPU)
    fp16=False,
    bf16=True,
    optim="adafactor",
    dataloader_pin_memory=False,

    predict_with_generate=True,
    report_to="none",
    remove_unused_columns=False,
    generation_max_length=128
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    data_collator=DataCollatorForSeq2Seq(tokenizer, model=model),
    compute_metrics=compute_metrics
)

print("Starting training on TPU (with evaluation fix)...")
trainer.train()

Starting training on TPU (with evaluation fix)...


Epoch,Training Loss,Validation Loss,Bleu
1,10.1554,10.138124,92.668809
2,10.1349,10.133813,97.349747
3,10.1373,10.13337,97.349747
4,10.1329,10.133307,97.349747
5,10.1283,10.133285,94.430055
6,10.1299,10.13328,96.229101
7,10.1242,10.135407,96.229101
8,10.1205,10.133277,94.430055
9,10.1319,10.133277,92.544343
10,10.1422,10.133276,92.544343


TrainOutput(global_step=465, training_loss=10.15574218585927, metrics={'train_runtime': 2496.6895, 'train_samples_per_second': 2.926, 'train_steps_per_second': 0.186, 'total_flos': 5566587485552640.0, 'train_loss': 10.15574218585927, 'epoch': 15.0})

### 7. Inference (Testing)
We perform inference using the trained adapters. Note that we must set `use_cache=False` in `model.generate`.

In [16]:
# Switch model to evaluation mode
model.eval()

def translate_kangri(text):
    # Prepare input with tags
    input_text = f"doi_Deva hin_Deva {text}"
    inputs = tokenizer(input_text, return_tensors="pt", padding=True).to(model.device)

    # Generate translation
    with torch.no_grad():
        # CRITICAL: use_cache=False prevents the "NoneType shape" error
        generated_tokens = model.generate(
            **inputs,
            max_length=128,
            use_cache=False
        )

    return tokenizer.decode(generated_tokens[0], skip_special_tokens=True)

# Test with examples
print("-" * 30)
print(f"Input: कुड़ी धाम खांगी")
print(f"Translation: {translate_kangri('कुड़ी धाम खांगी')}")
print("-" * 30)

------------------------------
Input: कुड़ी धाम खांगी
Translation: लड़की धाम खाएगी
------------------------------


In [11]:
# import shutil
# import os
# from google.colab import drive

# # 1. Mount Google Drive
# drive.mount('/content/drive')

# # 2. Define path in Drive
# # This will create a folder named 'kangri_Translator' in your Drive
# drive_path = "/content/drive/My Drive/kangri_Translator"

# # 3. Save the Model (Adapters) and Tokenizer locally first
# save_path = "./final_model"
# model.save_pretrained(save_path)
# tokenizer.save_pretrained(save_path)

# # 4. Copy to Google Drive
# if os.path.exists(drive_path):
#     shutil.rmtree(drive_path) # Clear old version if exists
# shutil.copytree(save_path, drive_path)

# print(f"Model successfully saved to Google Drive at: {drive_path}")

In [12]:
from huggingface_hub import login

# 1. Login (if not already logged in)
# You need a "Write" token from https://huggingface.co/settings/tokens
login()

# 2. Push to Hub
repo_name = "LovnishVerma/kangri-hindi-translator"

model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)

print(f"Model published to: https://huggingface.co/{repo_name}")

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...adapter_model.safetensors:   0%|          |  558kB /  142MB            

README.md: 0.00B [00:00, ?B/s]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  /tmp/tmp42le5ivo/model.TGT  :  99%|#########8| 3.22MB / 3.26MB            

  /tmp/tmp42le5ivo/model.SRC  :  99%|#########8| 3.22MB / 3.26MB            

Model published to: https://huggingface.co/LovnishVerma/kangri-hindi-translator


# Save model in hugging face

In [17]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# 1. Load the Base Model
base_model_id = "ai4bharat/indictrans2-indic-indic-1B"
base_model = AutoModelForSeq2SeqLM.from_pretrained(
    base_model_id,
    trust_remote_code=True,
    torch_dtype=torch.float16,
    device_map="auto"
)

# 2. FIX: Load Tokenizer from the BASE repository
# This avoids the "multiple values for keyword argument" error
tokenizer = AutoTokenizer.from_pretrained(
    base_model_id, # <--- Changed from adapter_path to base_model_id
    trust_remote_code=True
)

# 3. Load and Attach your LoRA Adapters
# This pulls the fine-tuned weights from your hub
adapter_path = "LovnishVerma/kangri-hindi-translator"
model = PeftModel.from_pretrained(base_model, adapter_path)
model.eval()

# 4. Run Inference
def translate(text):
    # Ensure correct tags for Kangri (using Dogri proxy tag)
    input_text = f"doi_Deva hin_Deva {text}"
    inputs = tokenizer(input_text, return_tensors="pt", padding=True).to(model.device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=128, use_cache=False)

    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test
print(translate("त्वां कुथु चलयो"))



तुम कहाँ जा रहा
