In [None]:
!pip install huggingface transformers datasets torch torchaudio
!git clone https://github.com/VarunGumma/IndicTransTokenizer
%cd IndicTransTokenizer
!pip install --editable ./

from IPython.display import clear_output
import torch
from transformers import AutoModelForSeq2SeqLM
from IndicTransTokenizer import IndicProcessor, IndicTransTokenizer

target_languages=["asm_Beng","ben_Beng","brx_Deva","doi_Deva","gom_Deva","guj_Gujr","hin_Deva","kan_Knda","kas_Deva","mai_Deva","mal_Mlym","mar_Deva","mni_Beng","npi_Deva","ory_Orya","pan_Guru","san_Deva","sat_Olck","snd_Deva","tam_Taml","tel_Telu","urd_Arab"]

tokenizer_en_indic = IndicTransTokenizer(direction="en-indic")
ip_en_indic = IndicProcessor(inference=True)
model_en_indic = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-en-indic-dist-200M", trust_remote_code=True)

clear_output()

#Converting Sentences English to Indic Languages

In [None]:
english_sentences = [
    "When I was young, I used to go to the park every day.",
    "He has many old books, which he inherited from his ancestors.",
    "I can't figure out how to solve my problem.",
    "She is very hardworking and intelligent, which is why she got all the good marks.",
    "We watched a new movie last week, which was very inspiring.",
    "If you had met me at that time, we would have gone out to eat.",
    "She went to the market with her sister to buy a new sari.",
    "Raj told me that he is going to his grandmother's house next month.",
    "All the kids were having fun at the party and were eating lots of sweets.",
    "My friend has invited me to his birthday party, and I will give him a gift.",
]

batch = ip_en_indic.preprocess_batch(english_sentences, src_lang="eng_Latn", tgt_lang=target_languages[0])
batch = tokenizer_en_indic(batch, src=True, return_tensors="pt")

with torch.inference_mode():
    outputs = model_en_indic.generate(**batch, num_beams=5, num_return_sequences=1, max_length=256)

outputs = tokenizer_en_indic.batch_decode(outputs, src=False)
outputs = ip_en_indic.postprocess_batch(outputs, lang="hin_Deva")

In [None]:
print(f"Converting English to Indic Language:\n")
for i in range(len(outputs)):
    print(f"English: {english_sentences[i]}\nIndic Translation: {outputs[i]}")
    print("="*100)

Converting English to Indic Language:

English: When I was young, I used to go to the park every day.
Indic Translation: येतिय़ा मइ सৰु आछिलो, मइ प्ৰतिदिने उद्यानलै गैछिलो।
English: He has many old books, which he inherited from his ancestors.
Indic Translation: तेओँৰ बहुतो पुৰणि ग्ৰन्थ आछे, यिबोৰ तेओँ निजৰ पूৰ्बपुৰुषसकलৰ पৰा उत्तৰाधिकाৰी हिचापे लाभ कৰिछिल।
English: I can't figure out how to solve my problem.
Indic Translation: मइ मोৰ समस्याटो केनेकै समाधान कৰिब पाৰिम बुजि पोৱा नाइ।
English: She is very hardworking and intelligent, which is why she got all the good marks.
Indic Translation: तेओँ अति कठोৰ पৰिश्ৰमी आৰु बुद्धिमान, सेय़ेहे तेओँ सकलो भाल नम्बৰ लाभ कৰिछिल।
English: We watched a new movie last week, which was very inspiring.
Indic Translation: आमि योৱा सप्ताहत एखन नतुन चलच्चित्ৰ चाइछिलोँ, यिटो अति अनुप्ৰेৰणामूलक आछिल।
English: If you had met me at that time, we would have gone out to eat.
Indic Translation: यदि आपुनि सेइ समय़त मोक लग पाइछिलहेँतेन, आमि खाबलै बाहिৰलै गैछिलोँहेत

#Converting Sentences Indic to English Languages

In [None]:
tokenizer_indic_en = IndicTransTokenizer(direction="indic-en")
ip_indic_en = IndicProcessor(inference=True)
model_indic_en = AutoModelForSeq2SeqLM.from_pretrained("ai4bharat/indictrans2-indic-en-dist-200M", trust_remote_code=True)

clear_output()

In [None]:
hindi_sentences = [
    "जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।",
    "उसके पास बहुत सारी पुरानी किताबें हैं, जिन्हें उसने अपने दादा-परदादा से विरासत में पाया।",
    "मुझे समझ में नहीं आ रहा कि मैं अपनी समस्या का समाधान कैसे ढूंढूं।",
    "वह बहुत मेहनती और समझदार है, इसलिए उसे सभी अच्छे मार्क्स मिले।",
    "हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।",
    "अगर तुम मुझे उस समय पास मिलते, तो हम बाहर खाना खाने चलते।",
    "वह अपनी दीदी के साथ बाजार गयी थी ताकि वह नई साड़ी खरीद सके।",
    "राज ने मुझसे कहा कि वह अगले महीने अपनी नानी के घर जा रहा है।",
    "सभी बच्चे पार्टी में मज़ा कर रहे थे और खूब सारी मिठाइयाँ खा रहे थे।",
    "मेरे मित्र ने मुझे उसके जन्मदिन की पार्टी में बुलाया है, और मैं उसे एक तोहफा दूंगा।",
]

In [None]:
batch = ip_indic_en.preprocess_batch(hindi_sentences, src_lang="hin_Deva", tgt_lang="eng_Latn")
batch = tokenizer_indic_en(batch, src=True, return_tensors="pt")

with torch.inference_mode():
    outputs = model_indic_en.generate(**batch, num_beams=5, num_return_sequences=1, max_length=256)

outputs = tokenizer_indic_en.batch_decode(outputs, src=False)
outputs = ip_indic_en.postprocess_batch(outputs, lang="eng_Latn")

In [None]:
print(f"Converting Indic to English Language:\n")
for i in range(len(outputs)):
    print(f"Indic: {hindi_sentences[i]}\nEnglish Translation: {outputs[i]}")
    print("="*100)

Converting Indic to English Language:

Indic: जब मैं छोटा था, मैं हर रोज़ पार्क जाता था।
English Translation: When I was young, I used to go to the park every day.
Indic: उसके पास बहुत सारी पुरानी किताबें हैं, जिन्हें उसने अपने दादा-परदादा से विरासत में पाया।
English Translation: He has a lot of old books, which he inherited from his grandparents.
Indic: मुझे समझ में नहीं आ रहा कि मैं अपनी समस्या का समाधान कैसे ढूंढूं।
English Translation: I can't figure out how to find a solution to my problem.
Indic: वह बहुत मेहनती और समझदार है, इसलिए उसे सभी अच्छे मार्क्स मिले।
English Translation: He is very hardworking and understanding, so he got all the good marks.
Indic: हमने पिछले सप्ताह एक नई फिल्म देखी जो कि बहुत प्रेरणादायक थी।
English Translation: We saw a new film last week which was very inspiring.
Indic: अगर तुम मुझे उस समय पास मिलते, तो हम बाहर खाना खाने चलते।
English Translation: If you found me nearby at that time, we would go out to eat.
Indic: वह अपनी दीदी के साथ बाजार गयी थी ताकि 