# English → Hindi MT — Colab demo
This notebook demonstrates loading a pretrained EN→HI model, running quick inference, and a tiny fine-tune demo. It is Colab-ready.

In [None]:
# Install dependencies (run in Colab once)
!pip install -q transformers datasets evaluate sentencepiece sacremoses accelerate torch --quiet
!pip install -q sacrebleu streamlit


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
translator = pipeline("translation_en_to_hi", model=model, tokenizer=tokenizer, device=0 if __import__('torch').cuda.is_available() else -1)
print("Loaded model.")

In [None]:
# Inference demo
sentences = ["How are you?", "I love machine learning.", "Where is the nearest station?"]
res = translator(sentences, max_length=120, num_beams=4)
for s, r in zip(sentences, res):
    print("EN:", s)
    print("HI:", r['translation_text'])
    print("---")

In [None]:
# Tiny fine-tune demo (toy) - this will NOT produce a useful model but shows steps
from datasets import Dataset
import pandas as pd
data = {'src': ['Hello', 'Good night'], 'tgt': ['नमस्ते', 'शुभ रात्रि']}
ds = Dataset.from_pandas(pd.DataFrame(data))
def preprocess(batch):
    inputs = tokenizer(batch['src'], truncation=True, padding='max_length', max_length=32)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(batch['tgt'], truncation=True, padding='max_length', max_length=32)
    labels['input_ids'] = [[(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels['input_ids']]
    inputs['labels'] = labels['input_ids']
    return inputs
ds = ds.map(preprocess, batched=True, remove_columns=['src', 'tgt'])
ds = ds.train_test_split(test_size=0.5)
print(ds)

In [None]:
# Save the tokenizer and model to disk (useful for Streamlit demo)
model.save_pretrained('/content/mt-en-hi-demo')
tokenizer.save_pretrained('/content/mt-en-hi-demo')
print('Saved to /content/mt-en-hi-demo')

## Streamlit
You can download the saved model folder and use it with the Streamlit demo included in the repo (`app.py` or `app_improved.py`). In Colab you can also run Streamlit via `ngrok` or `localtunnel`, but that's outside the scope of this quick demo.