In [1]:
import numpy as np
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.gleu_score import sentence_gleu
from nltk.translate.meteor_score import meteor_score
from google.transliteration import transliterate_text
from indictrans import Transliterator
import re

In [2]:
roman_scripts_file_path = "../datasets/dakshina_dataset_v1.0/hi/romanized/hi.romanized.rejoined.dev.roman.txt"
native_scripts_file_path = "../datasets//dakshina_dataset_v1.0/hi/romanized/hi.romanized.rejoined.dev.native.txt"

In [3]:
native_lines = []
with open(native_scripts_file_path, 'r', encoding='utf-8') as file:
    for line in file:
        native_lines.append(line)
        # cleaned_line = line.replace('-', '')
        #cleaned_line = re.sub(r'["\'#,]', '', line)
        #native_lines.append(cleaned_line)

romanized_lines = []
with open(roman_scripts_file_path, 'r', encoding='utf-8') as file:
    for line in file:
        romanized_lines.append(line)
        # cleaned_line = line.replace('-', '')
        #cleaned_line = re.sub(r'["\',]', '', line)
        #romanized_lines.append(cleaned_line)

In [4]:
len(native_lines)

5000

In [5]:
len(romanized_lines)

5000

In [6]:
df = pd.DataFrame({'Hinglish': romanized_lines, 'Hindi': native_lines})
print(df.head())

                                            Hinglish  \
0  iske aane se purva hi log gharon ki safai ka k...   
1            vilupti ki kagaar par gunkaari tikhur\n   
2  Michael kamen (the wall ke vadyamaya hisso ke ...   
3  shahnama noroj ke tyohaar ko mahaan Jamshed ke...   
4                                 Mehrotra, Dr॰ N.\n   

                                               Hindi  
0  इसके आने से पूर्व ही लोग घरों की सफाई का कार्य...  
1                विलुप्ति की कगार पर गुणकारी तीखुर\n  
2  माइकल कामेन (द वॉल के वाद्यमय हिस्सों के लिए ए...  
3  शाहनामा नौरोज़ के त्यौहार को महान जमशेद के शास...  
4                               मेहरोत्रा, डॉ॰ एन.\n  


In [7]:
len(df)

5000

In [8]:
sampled_df = df.sample(n = 100, replace = False, random_state = np.random.randint(low=1, high=1234))

In [9]:
hinglish_sentences = sampled_df['Hinglish']
print(hinglish_sentences)

hinglish_sentences = hinglish_sentences.values
print(type(hinglish_sentences))
print(hinglish_sentences.shape)

hindi_true = list(sampled_df['Hindi'])

631        iski parikalpana javaharlal nehru ne ki thi.\n
4228    strabo kehte hai ki kakeshas me ek sing vale g...
4350    Boeing C-17 Globemaster yah vishwa ke bade mal...
2803    sanyogavash shri kilha aur agradas ji usi van ...
4213    Kuch manasik rogon ki chikitsa prakrutik dhang...
                              ...                        
3783    bloger (upyokta): blog yani chittha upyog karn...
1292                      Aur purv disha ke dyotak hai.\n
261     wah agnipind sarakkar pichhe chala gaya aur pr...
290     ek aur vichar hai ki bhagavan ke hetu karyakar...
3115    is abhutapurva nidaan/parikshan hetu shodhakar...
Name: Hinglish, Length: 100, dtype: object
<class 'numpy.ndarray'>
(100,)


In [10]:
# Testing using LIBINC transliterator
# trn = Transliterator(source='eng', target='hin', build_lookup=True, decode='beamsearch')
trn = Transliterator(source='eng', target='hin', build_lookup=True)
libinc_bleu_scores = []
libinc_gleu_scores = []
libinc_meteor_scores = []
for i in range(hinglish_sentences.size):

    transformed_sentence = trn.transform(hinglish_sentences[i])
    reference = [hindi_true[i].split()]
    transformed_tokens = transformed_sentence.split()
    
    bleu_score = sentence_bleu(reference, transformed_tokens)
    gleu_score = sentence_gleu(reference, transformed_tokens)
    met_score = meteor_score(reference, transformed_tokens)
    libinc_bleu_scores.append(bleu_score)
    libinc_gleu_scores.append(gleu_score)
    libinc_meteor_scores.append(met_score)

    if i % 10 == 0:
        print("Transliterating sentence ", i+1, "\n")
        print('Original Code mixed text :', hinglish_sentences[i])
        print('Original Native Hindi convertion :', hindi_true[i])
        print('Transliterated to Hindi :', transformed_sentence)
        print('Average BLEU score :', np.mean(libinc_bleu_scores), '\n')
        print('Average GLEU score :', np.mean(libinc_gleu_scores), '\n')
        print('Average METEOR score :', np.mean(libinc_meteor_scores), '\n')
        print()

Transliterating sentence  1 

Original Code mixed text : iski parikalpana javaharlal nehru ne ki thi.

Original Native Hindi convertion : इसकी परिकल्पना जवाहरलाल नेहरू ने की थी।

Transliterated to Hindi : इसकी परिकल्पना जवाहरलाल नेहरू ने की थी.

Average BLEU score : 0.8091067115702212 

Average GLEU score : 0.8181818181818182 

Average METEOR score : 0.8551587301587302 


Transliterating sentence  11 

Original Code mixed text : Harris Matrix

Original Native Hindi convertion : हैरिस मैट्रिक्स

Transliterated to Hindi : हरिस माट्रिक्स

Average BLEU score : 0.397229715616154 

Average GLEU score : 0.51558435014233 

Average METEOR score : 0.6285881495253921 




The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Transliterating sentence  21 

Original Code mixed text : is kii vajah se is misile ko kahn bhi badi asani se tansport kiya ja satka hai, jisase ham apne dushman ke kareeb pahunch sakte hain.

Original Native Hindi convertion : इस की वजह से इस मिसाइल को कहीं भी बड़ी आसानी से ट्रांसपोर्ट किया जा सकता है, जिससे हम अपने दुश्मन के करीब पहुंच सकते हैं।

Transliterated to Hindi : इस की वजह से इस मिसिले को कहन भी बड़ी असनी से तांस्पोर्ट किया जा सत्का हैं, जिससे हम अपने दुश्मन के करीब पहुँच सकते हैं.

Average BLEU score : 0.3662928829278959 

Average GLEU score : 0.4541178358401468 

Average METEOR score : 0.5995048910648769 


Transliterating sentence  31 

Original Code mixed text : lokakathaon ke anusar is gaanv ke nivaasiyon ko yah ajgar bahut pareshan kiya karta tha.

Original Native Hindi convertion : लोककथाओं के अनुसार इस गाँव के निवासियों को यह अजगर बहुत परेशान किया करता था।

Transliterated to Hindi : लोकाकाताओं के अनुसर इस गांव के निवासियों को यह अजगर बहुत परेशान किया करता था.

Averag

In [11]:
average_bleu_score = np.mean(libinc_bleu_scores)
average_gleu_score = np.mean(libinc_gleu_scores)
average_meteor_score = np.mean(libinc_meteor_scores)
print('Average BLEU Score for Dev Set using LIBINC transliterator: ', average_bleu_score)
print('Average GLEU Score for Dev Set using LIBINC transliterator: ', average_gleu_score)
print('Average METEOR Score for Dev Set using LIBINC transliterator: ', average_meteor_score)

Average BLEU Score for Dev Set using LIBINC transliterator:  0.33237996400108316
Average GLEU Score for Dev Set using LIBINC transliterator:  0.44467842128030904
Average METEOR Score for Dev Set using LIBINC transliterator:  0.6191066729237032


In [12]:
# # The path to the local git repo for Indic NLP library
# INDIC_NLP_LIB_HOME=r"C:\Johns Hopkins\sem1\Machine Translation\MT Final Project\IndicTrans2\indic_nlp_library"

# # The path to the local git repo for Indic NLP Resources
# INDIC_NLP_RESOURCES=r"C:\Johns Hopkins\sem1\Machine Translation\MT Final Project\IndicTrans2\indic_nlp_resources"

# import sys
# sys.path.append(r'{}'.format(INDIC_NLP_LIB_HOME))

# from indicnlp import common
# common.set_resources_path(INDIC_NLP_RESOURCES)

# from indicnlp import loader
# loader.load()

# # Testing using AI4Bharath transliterator
# ai4b_bleu_scores = []
# lang = 'hi'
# for i in range(hinglish_sentences.size):

#     transformed_sentence = ItransTransliterator.from_itrans(hinglish_sentences[i],lang)
#     reference = [hindi_true[i].split()]
#     # print(transformed_sentence)
#     transformed_tokens = transformed_sentence.split()
#     bleu_score = sentence_bleu(reference, transformed_tokens)
#     ai4b_bleu_scores.append(bleu_score)

#     if i % 10 == 0:
#         print("Transliterating sentence ", i+1)
#         print('Original Code mized text :', hinglish_sentences[i])
#         print('Original Native Hindi convertion :', hindi_true[i])
#         print('Transliterated to Hindi :', transformed_sentence)
#         print('Average BLEU score :', np.mean(ai4b_bleu_scores), '\n')

# average_bleu_score = np.mean(ai4b_bleu_scores)
# print('Average Bleu Score for Train Set : ', average_bleu_score)

In [13]:
# Testing out Google Transliterate package
from google.transliteration import transliterate_word
def transliterate_all_alphanumeric_parts(sentence, lang_code='hi'):
    transliterated_sentence = []
    words = sentence.split()

    for word in words:
        # Use regex to find all alphanumeric parts in the word
        parts = re.findall(r'[a-zA-Z0-9]+|[^a-zA-Z0-9]+', word)
        
        transliterated_word = ""
        
        for part in parts:
            if part.isalnum():  # Transliterate only alphanumeric parts
                try:
                    # Transliterate the alphanumeric part
                    suggestions = transliterate_word(part, lang_code=lang_code)
                    transliterated_part = suggestions[0] if suggestions else part
                except IndexError:
                    # If transliteration fails, keep the alphanumeric part as is
                    transliterated_part = part
            else:
                # Keep non-alphanumeric parts (like "-" or spaces) as is
                transliterated_part = part
            
            transliterated_word += transliterated_part
        
        transliterated_sentence.append(transliterated_word)

    return ' '.join(transliterated_sentence)

google_bleu_scores = []
google_gleu_scores = []
google_meteor_scores = []
lang = 'hi'
for i in range(hinglish_sentences.size):

    # print(i)
    # transformed_sentence = transliterate_text(hinglish_sentences[i],lang_code = lang)
    transformed_sentence = transliterate_all_alphanumeric_parts(hinglish_sentences[i], lang_code=lang)
    
    reference = [hindi_true[i].split()]
    transformed_tokens = transformed_sentence.split()
    
    bleu_score = sentence_bleu(reference, transformed_tokens)
    gleu_score = sentence_gleu(reference, transformed_tokens)
    met_score = meteor_score(reference, transformed_tokens)
    google_bleu_scores.append(bleu_score)
    google_gleu_scores.append(gleu_score)
    google_meteor_scores.append(met_score)

    if i % 10 == 0:
        print("Transliterating sentence ", i+1)
        print('Original Code mized text :', hinglish_sentences[i])
        print('Original Native Hindi convertion :', hindi_true[i])
        print('Transliterated to Hindi :', transformed_sentence)
        print('Average BLEU score :', np.mean(google_bleu_scores), '\n')
        print('Average GLEU score :', np.mean(google_gleu_scores), '\n')
        print('Average METEOR score :', np.mean(google_meteor_scores), '\n')
        print()

Transliterating sentence  1
Original Code mized text : iski parikalpana javaharlal nehru ne ki thi.

Original Native Hindi convertion : इसकी परिकल्पना जवाहरलाल नेहरू ने की थी।

Transliterated to Hindi : इसकी परिकल्पना जवाहरलाल नेहरू ने की थी.
Average BLEU score : 0.8091067115702212 

Average GLEU score : 0.8181818181818182 

Average METEOR score : 0.8551587301587302 


Transliterating sentence  11
Original Code mized text : Harris Matrix

Original Native Hindi convertion : हैरिस मैट्रिक्स

Transliterated to Hindi : हर्रिस मैट्रिक्स
Average BLEU score : 0.4608508328011913 

Average GLEU score : 0.6347694820567931 

Average METEOR score : 0.7183484881289876 


Transliterating sentence  21
Original Code mized text : is kii vajah se is misile ko kahn bhi badi asani se tansport kiya ja satka hai, jisase ham apne dushman ke kareeb pahunch sakte hain.

Original Native Hindi convertion : इस की वजह से इस मिसाइल को कहीं भी बड़ी आसानी से ट्रांसपोर्ट किया जा सकता है, जिससे हम अपने दुश्मन के करीब प

In [14]:
average_bleu_score = np.mean(google_bleu_scores)
average_gleu_score = np.mean(google_gleu_scores)
average_meteor_score = np.mean(google_meteor_scores)
print('Average BLEU Score for Dev Set using Google transliterator: ', average_bleu_score)
print('Average GLEU Score for Dev Set using Google transliterator: ', average_gleu_score)
print('Average METEOR Score for Dev Set using Google transliterator: ', average_meteor_score)
print('Google Transliterate model is very accurate but takes time to transliterate the sentence')

Average BLEU Score for Dev Set using Google transliterator:  0.4871145831537531
Average GLEU Score for Dev Set using Google transliterator:  0.595736198199297
Average METEOR Score for Dev Set using Google transliterator:  0.7319968393264286
Google Transliterate model is very accurate but takes time to transliterate the sentence


In [15]:
# e = XlitEngine("hi")
# out = e.translit_word("computer", topk=5, beam_width=10)
# print(out)
# # output:{'hi': ['कम्प्यूटर', 'कंप्यूटर', 'कम्पूटर', 'कम्पुटर', 'कम्प्युटर']}

In [24]:
# Testing using AI4Bharath transliterator

from ai4bharat.transliteration import XlitEngine

ai4b_bleu_scores = []
ai4b_gleu_scores = []
ai4b_meteor_scores = []
lang = 'hi'
e = XlitEngine('hi')
for i in range(hinglish_sentences.size):

    transformed_sentence = e.translit_sentence(hinglish_sentences[i])[lang]
    reference = [hindi_true[i].split()]
    # print(transformed_sentence)
    transformed_tokens = transformed_sentence.split()
    bleu_score = sentence_bleu(reference, transformed_tokens)
    gleu_score = sentence_gleu(reference, transformed_tokens)
    met_score = meteor_score(reference, transformed_tokens)
    ai4b_bleu_scores.append(bleu_score)
    ai4b_gleu_scores.append(gleu_score)
    ai4b_meteor_scores.append(met_score)

    if i % 10 == 0:
        print("Transliterating sentence ", i+1)
        print('Original Code mized text :', hinglish_sentences[i])
        print('Original Native Hindi convertion :', hindi_true[i])
        print('Transliterated to Hindi :', transformed_sentence)
        print('Average BLEU score :', np.mean(ai4b_bleu_scores), '\n')
        print('Average GLEU score :', np.mean(ai4b_gleu_scores), '\n')
        print('Average METEOR score :', np.mean(ai4b_meteor_scores), '\n')
        print()

Loading hi...
Transliterating sentence  1
Original Code mized text : iski parikalpana javaharlal nehru ne ki thi.

Original Native Hindi convertion : इसकी परिकल्पना जवाहरलाल नेहरू ने की थी।

Transliterated to Hindi : इसकी परिकल्पना जवाहरलाल नहरू ने कि थि.
Average BLEU score : 5.395774370246974e-78 

Average GLEU score : 0.3181818181818182 

Average METEOR score : 0.5357142857142857 


Transliterating sentence  11
Original Code mized text : Harris Matrix

Original Native Hindi convertion : हैरिस मैट्रिक्स

Transliterated to Hindi : हैरिस मेट्रिक्स
Average BLEU score : 0.20217858878068967 

Average GLEU score : 0.4488290340866687 

Average METEOR score : 0.5710439666190354 


Transliterating sentence  21
Original Code mized text : is kii vajah se is misile ko kahn bhi badi asani se tansport kiya ja satka hai, jisase ham apne dushman ke kareeb pahunch sakte hain.

Original Native Hindi convertion : इस की वजह से इस मिसाइल को कहीं भी बड़ी आसानी से ट्रांसपोर्ट किया जा सकता है, जिससे हम अपने 

In [25]:
average_bleu_score = np.mean(ai4b_bleu_scores)
average_gleu_score = np.mean(ai4b_gleu_scores)
average_meteor_score = np.mean(ai4b_meteor_scores)
print('Average BLEU Score for Dev Set using AI4Bharat Transliterator: ', average_bleu_score)
print('Average GLEU Score for Dev Set using AI4Bharat Transliterator: ', average_gleu_score)
print('Average METEOR Score for Dev Set using AI4Bharat Transliterator: ', average_meteor_score)

Average BLEU Score for Dev Set using AI4Bharat Transliterator:  0.25145052369892545
Average GLEU Score for Dev Set using AI4Bharat Transliterator:  0.37644423121765164
Average METEOR Score for Dev Set using AI4Bharat Transliterator:  0.5504389614394115


In [26]:
print("For Hinglish to Hindi, Google Transliterate package works the best")

For Hinglish to Hindi, Google Transliterate package works the best


In [29]:
test_set_roman_scripts_file_path = "../datasets/dakshina_dataset_v1.0/hi/romanized/hi.romanized.rejoined.test.roman.txt"
test_set_native_scripts_file_path = "../datasets//dakshina_dataset_v1.0/hi/romanized/hi.romanized.rejoined.test.native.txt"

test_set_native_lines = []
with open(test_set_native_scripts_file_path, 'r', encoding='utf-8') as file:
    for line in file:
        test_set_native_lines.append(line)
        # cleaned_line = line.replace('-', '')
        #cleaned_line = re.sub(r'["\'#,]', '', line)
        #native_lines.append(cleaned_line)

test_set_romanized_lines = []
with open(test_set_roman_scripts_file_path, 'r', encoding='utf-8') as file:
    for line in file:
        test_set_romanized_lines.append(line)
        # cleaned_line = line.replace('-', '')
        #cleaned_line = re.sub(r'["\',]', '', line)
        #romanized_lines.append(cleaned_line)

In [30]:
df_test_set = pd.DataFrame({'Hinglish': test_set_romanized_lines, 'Hindi': test_set_native_lines})
print(df_test_set.head())

                                            Hinglish  \
0  Kumbh rashi men janme log carbanion se bhari e...   
1                         Iska ulat bhi satya hai.\n   
2  kuch devta jo mukhyatah nagar devta the, apne ...   
3  Tel ke utpadan men sansar men Romania ka chhat...   
4  Banarasi Lal se milkar police ne sara bhed pra...   

                                               Hindi  
0  कुंभ राशि में जन्मे लोग संभावनाओं से भरी एक जग...  
1                             इसका उलट भी सत्य है।\n  
2  कुछ देवता जो मुख्यत: नगर देवता थे, अपने संप्रद...  
3  तेल के उत्पादन में संसार में रोमानिया का छठा स...  
4  बनारसी लाल से मिलकर पुलिस ने सारा भेद प्राप्त ...  


In [32]:
sampled_df_test_set = df_test_set.sample(n = 100, replace = False, random_state = np.random.randint(low=1, high=1234))

hinglish_sentences_test = sampled_df_test_set['Hinglish']
print(hinglish_sentences_test)

hinglish_sentences_test = hinglish_sentences_test.values
print(type(hinglish_sentences_test))
print(hinglish_sentences_test.shape)

hindi_true_test = list(sampled_df_test_set['Hindi'])

3991    bank, check ko sansaadhit karne mein lagane wa...
20      jab Vasudev ki aathvi santaan kanya hoti hai, ...
4871    ghaav ke chaar alag-alag roopon kaa varnan kiy...
61      Kant ne is yukti ko mana nahi, parantu kaha ki...
377     England ko unche sthan par pahunchne ka shrey ...
                              ...                        
1608                   Ismen Protein ki matra adhik ho.\n
1963    2010 tak Russia ne Gorny (Saratov Oblast) aur ...
1200    men Rudravarman dwitiyak ki mrutyu ke saath is...
1689                                          mahavrutt\n
159     inke adhar par vah ek aur kuch bhashaon se sam...
Name: Hinglish, Length: 100, dtype: object
<class 'numpy.ndarray'>
(100,)


In [35]:
# Translitearting sampled sentences from the test set using Google Translitearte package
google_bleu_scores_test_set = []
google_gleu_scores_test_set = []
google_meteor_scores_test_set = []
lang = 'hi'
for i in range(hinglish_sentences_test.size):

    # print(i)
    # transformed_sentence = transliterate_text(hinglish_sentences[i],lang_code = lang)
    transformed_sentence = transliterate_all_alphanumeric_parts(hinglish_sentences_test[i], lang_code=lang)
    reference = [hindi_true_test[i].split()]
    transformed_tokens = transformed_sentence.split()
    
    bleu_score = sentence_bleu(reference, transformed_tokens)
    gleu_score = sentence_gleu(reference, transformed_tokens)
    met_score = meteor_score(reference, transformed_tokens)
    google_bleu_scores_test_set.append(bleu_score)
    google_gleu_scores_test_set.append(gleu_score)
    google_meteor_scores_test_set.append(met_score)

    if i % 10 == 0:
        print("Transliterating sentence ", i+1)
        print('Original Code mixed text :', hinglish_sentences_test[i])
        print('Original Native Hindi convertion :', hindi_true_test[i])
        print('Transliterated to Hindi :', transformed_sentence)
        print('Average BLEU score :', np.mean(google_bleu_scores_test_set), '\n')
        print('Average GLEU score :', np.mean(google_gleu_scores_test_set), '\n')
        print('Average METEOR score :', np.mean(google_meteor_scores_test_set), '\n')
        print()

Transliterating sentence  1
Original Code mixed text : bank, check ko sansaadhit karne mein lagane wale samay ko bachaane ke liye unhein bankon ke bich electronic tarike se bhejate hain.

Original Native Hindi convertion : बैंक, चॅक को संसाधित करने में लगने वाले समय को बचाने के लिए उन्हें बैंकों के बीच इलेक्ट्रॉनिक तरीके से भेजते हैं।

Transliterated to Hindi : बैंक, चेक को संसाधित करने में लगाने वाले समय को बचाने के लिए उन्हें बैंकों के बिच इलेक्ट्रॉनिक तरीके से भेजते हैं.
Average BLEU score : 0.5961621647299126 

Average GLEU score : 0.6219512195121951 

Average METEOR score : 0.813692480359147 


Transliterating sentence  11
Original Code mixed text : Jyadatar Musabbar prajatiyn ke patte bade, mote, gaddedar gulab ki pankhudiyon ki tarah sajawat wale hote hain.

Original Native Hindi convertion : ज्यादातर मुसब्बर प्रजातियों के पत्ते बड़े, मोटे, गुद्देदार गुलाब की पंखुडि़यों की तरह सजावट वाले होते हैं।

Transliterated to Hindi : ज्यादातर मुसब्बर प्रजातियाँ के पत्ते बड़े, मोठे, गद्देदा

The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Transliterating sentence  21
Original Code mixed text : agle din dubara yuddh hua, donon hi pakshon ne kai Hathiyaar aur Ratho ka upyog kiya is Yuddh men par donon ko hee kafi hani hui aur koi bhi paksh nahin jeeta.

Original Native Hindi convertion : अगले दिन दुबारा युद्ध हुआ, दोनों ही पक्षों ने कई हथियार और रथो का उपयोग किया इस युद्ध में पर दोनों को ही काफी हानि हुई और कोई भी पक्ष नहीं जीता।

Transliterated to Hindi : अगले दिन दुबारा युद्ध हुआ, दोनों ही पक्षों ने कई हथियार और राठो का उपयोग किया इस युद्ध में पर दोनों को ही काफी हानि हुई और कोई भी पक्ष नहीं जीता.
Average BLEU score : 0.6142594342260188 

Average GLEU score : 0.687653402650219 

Average METEOR score : 0.8126864964850665 


Transliterating sentence  31
Original Code mixed text : yah madhyam varg ka kantedar vruksh Balui jameen men nadi ke kinare adhik ugta hai.

Original Native Hindi convertion : यह मध्यम वर्ग का काँटेदार वृक्ष बलुई जमीन में नदी के किनारे अधिक उगता है।

Transliterated to Hindi : यह माध्यम वर्ग का कांटेदा

The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


Transliterating sentence  41
Original Code mixed text : Antarrashtriy vyvasayik school badi tezi se is vishay ko apne pathyakram ki ruprekha men shamil kar rahe hain aur kuch schoolon be visheshagya snatak ki upadhiyon ke rup men 'nivesh prabandhan' ya 'parisampatti prabandhan' ke shirshak ka nirman bhi kar diya hai (jaise Caas Business School, London)

Original Native Hindi convertion : अंतर्राष्ट्रीय व्यावसायिक स्कूल बड़ी तेजी से इस विषय को अपने पाठ्यक्रम की रूपरेखा में शामिल कर रहे हैं और कुछ स्कूलों ने विशेषज्ञ स्नातक की उपाधियों के रूप में 'निवेश प्रबंधन' या 'परिसंपत्ति प्रबंधन' के शीर्षक का निर्माण भी कर दिया है (जैसे कास बिजनेस स्कूल, लन्दन).

Transliterated to Hindi : अंतर्राष्ट्रीय व्यवसायिक स्कूल बड़ी तेज़ी से इस विषय को अपने पाठ्यक्रम की रुपरेखा में शामिल कर रहे हैं और कुछ स्कूलों बे विशेषज्ञ स्नातक की उपाधियों के रूप में 'निवेश प्रबंधन' या 'परिसंपत्ति प्रबंधन' के शीर्षक का निर्माण भी कर दिया है (जैसे सास बिज़नेस स्कूल, लंदन)
Average BLEU score : 0.5669298538501804 

Average GL

In [36]:
average_bleu_score = np.mean(google_bleu_scores_test_set)
average_gleu_score = np.mean(google_gleu_scores_test_set)
average_meteor_score = np.mean(google_meteor_scores_test_set)
print('Average BLEU Score for Dev Set using Google transliterator: ', average_bleu_score)
print('Average GLEU Score for Dev Set using Google transliterator: ', average_gleu_score)
print('Average METEOR Score for Dev Set using Google transliterator: ', average_meteor_score)
print('Google Transliterate model is very accurate but takes time to transliterate the sentence')

Average BLEU Score for Dev Set using Google transliterator:  0.5083387804725698
Average GLEU Score for Dev Set using Google transliterator:  0.6059268724686462
Average METEOR Score for Dev Set using Google transliterator:  0.7250058931981057
Google Transliterate model is very accurate but takes time to transliterate the sentence


In [18]:
# print(hinglish_sentences[12])
# print(hindi_true[12])

In [19]:
# def transliterate_all_alphanumeric_parts(sentence, lang_code='hi'):
#     transliterated_sentence = []
#     words = sentence.split()

#     for word in words:
#         # Use regex to find all alphanumeric parts in the word
#         parts = re.findall(r'[a-zA-Z0-9]+|[^a-zA-Z0-9]+', word)
        
#         transliterated_word = ""
        
#         for part in parts:
#             if part.isalnum():  # Transliterate only alphanumeric parts
#                 try:
#                     # Transliterate the alphanumeric part
#                     suggestions = transliterate_word(part, lang_code=lang_code)
#                     transliterated_part = suggestions[0] if suggestions else part
#                 except IndexError:
#                     # If transliteration fails, keep the alphanumeric part as is
#                     transliterated_part = part
#             else:
#                 # Keep non-alphanumeric parts (like "-" or spaces) as is
#                 transliterated_part = part
            
#             transliterated_word += transliterated_part
        
#         transliterated_sentence.append(transliterated_word)

#     return ' '.join(transliterated_sentence)

# # Example usage
# sentence = "This is an example with uprichar-Wasu and another-word."
# print(transliterate_all_alphanumeric_parts(sentence))
# print(transliterate_all_alphanumeric_parts(hinglish_sentences[12]))
# print(transliterate_all_alphanumeric_parts(hinglish_sentences[24]))
# print(hindi_true[24])

In [20]:
# import nltk
# nltk.download('wordnet')