In [1]:
!pip install nltk  # Install the Natural Language Toolkit (NLTK) library.
!pip install googletrans==4.0.0-rc1  # Install version 4.0.0-rc1 of the googletrans library.

Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl (55 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.1/55.1 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2023.1.1-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.4/133.4 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl (58 kB)
[2K     [90m━

In [2]:
# Import the 're' module for regular expressions.
import re

# Import the 'nltk' library for natural language processing.
import nltk

# Import specific functions 'pos_tag' and 'word_tokenize' from nltk.
from nltk import pos_tag, word_tokenize

# Import the 'Translator' class from the 'googletrans' library.
from googletrans import Translator

# Download the 'punkt' & 'averaged_perceptron_tagger' dataset for NLTK
nltk.download('punkt')

nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [3]:
# Function to find nouns in a sentence
def find_nouns(sentence):
    # Tokenize the sentence and get the part-of-speech tags
    words = word_tokenize(sentence)
    tags = pos_tag(words)

    # Extract nouns from the tagged words
    nouns = [word for word, tag in tags if tag in ['NN', 'NNS', 'NNP', 'NNPS']]

    return nouns

# Function to translate text to Hindi
def translate_to_hindi(text):
    translator = Translator()
    translation = translator.translate(text, src='en', dest='hi')
    return translation.text

# Function to replace nouns in Hindi sentence with their English counterparts
def replace_nouns_with_english(hindi_sentence, english_sentence):
    # Find nouns in the English sentence
    english_nouns = find_nouns(english_sentence)

    # Translate the English nouns to Hindi
    translated_nouns = [translate_to_hindi(noun) for noun in english_nouns]

    # Create a dictionary to map translated nouns to their English counterparts
    noun_mapping = {translated: original for original, translated in zip(english_nouns, translated_nouns)}

    # Replace nouns in the Hindi sentence with their English counterparts
    for hindi_noun, english_noun in noun_mapping.items():
        hindi_sentence = hindi_sentence.replace(hindi_noun, english_noun)

    return hindi_sentence

In [5]:
# Test the function
sentence = input("Enter a sentence: ")
hindi_translation = translate_to_hindi(sentence)

result_sentence = replace_nouns_with_english(hindi_translation, sentence)
print("Hinglish Sentence:", result_sentence)

Enter a sentence: I had about a 30 minute demo just using this new headset
Hinglish Sentence: मेरे पास इस नए headset का उपयोग करके लगभग 30 minute का demo था


In [6]:
# Input sentences
sentences = [
    "Definitely share your feedback in the comment section.",
    "So even if it's a big video, I will clearly mention all the products.",
    "I was waiting for my bag."
]

# Translate each sentence to Hinglish with word selection
for sentence in sentences:
  hindi_translation = translate_to_hindi(sentence)

  result_sentence = replace_nouns_with_english(hindi_translation, sentence)
  print("Hinglish Sentence:", result_sentence)


Hinglish Sentence: निश्चित रूप से comment section में अपनी feedback साझा करें।
Hinglish Sentence: तो भले ही यह एक बड़ा video है, मैं स्पष्ट रूप से सभी products का उल्लेख करूंगा।
Hinglish Sentence: मैं अपने बैग का इंतजार कर रहा था।


In [7]:
reference_translations = [
    "निश्चित रूप से comment section में अपनी feedback साझा करें।",
    "तो भले ही यह एक बड़ा video है, मैं स्पष्ट रूप से सभी products का उल्लेख करूंगा।",
    "मैं अपने बैग का इंतजार कर रहा था।"
]

# Initialize a counter for correct translations
correct_translations = 0

# Translate each sentence and compare to the reference translation
for sentence, reference in zip(sentences, reference_translations):
    hindi_translation = translate_to_hindi(sentence)
    result_sentence = replace_nouns_with_english(hindi_translation, sentence)

    # Check if the generated translation matches the reference translation
    if result_sentence == reference:
        correct_translations += 1

# Calculate accuracy
accuracy = (correct_translations / len(sentences)) * 100

# Print the accuracy
print(f"Accuracy: {accuracy:.2f}%")


Accuracy: 100.00%
