# Translating fun!

There are two initial scopes to this:
- first, get a word (or words) to translate
- second, use hugging face to translate the sentences

In [54]:
import pandas as pd 
import requests
from bs4 import BeautifulSoup
import time

## Obtain words and sentences

In [None]:
def parse_wotd_url(
    url: str
) -> tuple[str, str, list[str], str]:
    """Function that takes in the url where the word of the day is located
        And parses out the word, definition, examples and the stand-alone
        word enty link.

        Parameters
        ----------
        url : str
            The url pointing to the word of the day location.

        Returns
        wotd : str
            Word of the day (on the given url).
        definition : str
            Definition of the wotd.
        examples : list
            Example or examples of the wotd.
        entry_link : str
            Url for the wotd entry in the dictionary.
    """

    result = requests.get(url)

    if result.status_code != 200:
        raise ValueError(f'Website did not respond as expected. Status code: {result.status_code}')

    soup = BeautifulSoup(result.text)

    # can see previous words of the day using h2 tags
    wotd = soup.find_all('h2')[0].text

    meaning_tag = soup.find_all('h2')[1]

    # get the definition and examples until we reach the "see the entry"
    sibling = meaning_tag
    definition = ''
    examples = []
    entry_link = ''
    while True:
        sibling = sibling.find_next_sibling('p')

        if definition == '':
            definition = sibling.text
        elif "See the entry" in sibling.text:
            entry_link = sibling.find(href=True).attrs.get('href', 'Not found')
            break
        else:
            examples.append(sibling.text.replace('//', '').strip())

    return wotd, definition, examples, entry_link

In [None]:
wotd, definition, examples, entry_link = parse_wotd_url('https://www.merriam-webster.com/word-of-the-day/')

print('word of the day', wotd)
print('definition:', definition)
for example in examples:
    print(example)
print('entry link:', entry_link)

litmus test
definition: A litmus test is something (such as an opinion about a political or moral issue) that is used to make a judgment about whether someone or something is acceptable.
At our family’s Thanksgiving dinner, the litmus test for good mac and cheese is whether or not it is baked.
entry link: https://bit.ly/4kbDEsg


In [65]:
row_data = {
    'Language': 'Eng',
    'Wotd': wotd,
    'Definition': definition,
    'EntryLink': entry_link
}

for i, example in enumerate(examples):
    row_data[f'Example_{i+1}'] = example

test_df = pd.DataFrame([row_data])
test_df

Unnamed: 0,Language,Wotd,Definition,EntryLink,Example_1
0,Eng,litmus test,A litmus test is something (such as an opinion...,https://bit.ly/4kbDEsg,"At our family’s Thanksgiving dinner, the litmu..."


## Translation

In [16]:
# Use a pipeline as a high-level helper
from transformers import pipeline

In [17]:
eng_to_ita = pipeline(
    "translation", 
    model="facebook/m2m100_418M", 
    tokenizer="facebook/m2m100_418M",
    src_lang="en", 
    tgt_lang="it"  # Italian code
)

eng_to_slo = pipeline(
    "translation", 
    model="facebook/m2m100_418M", 
    tokenizer="facebook/m2m100_418M",
    src_lang="en", 
    tgt_lang="sl"  # Slovenian code
)


Device set to use cpu
Device set to use cpu


In [66]:
print('===== word =====')
print(wotd)
print('ita:',eng_to_ita(wotd)[0]['translation_text'])
print('slo:',eng_to_slo(wotd)[0]['translation_text'])
print('')

print('===== definition =====')
print(definition)
print('ita:',eng_to_ita(definition)[0]['translation_text'])
print('slo:',eng_to_slo(definition)[0]['translation_text'])
print('')

print('===== examples =====')
for example in examples:
    print(example)
    print('ita:',eng_to_ita(example)[0]['translation_text'])
    print('slo:',eng_to_slo(example)[0]['translation_text'])

===== word =====
litmus test
ita: Test di LITMUS
slo: LITMUS test

===== definition =====
A litmus test is something (such as an opinion about a political or moral issue) that is used to make a judgment about whether someone or something is acceptable.
ita: Un test di litmo è qualcosa (come un'opinione su una questione politica o morale) che viene usato per giudicare se qualcuno o qualcosa è accettabile.
slo: Litmus test je nekaj (kot je mnenje o političnem ali moralnem vprašanju), ki se uporablja za odločanje o tem, ali je nekdo ali kaj sprejemljivo.

===== examples =====
At our family’s Thanksgiving dinner, the litmus test for good mac and cheese is whether or not it is baked.
ita: Nella cena di Thanksgiving della nostra famiglia, il test di litmus per un buon mac e formaggio è se è bagnato o meno.
slo: Na naši družini zahvalni večerji, test litmus za dober mac in sir je, ali je pečen ali ne.


In [None]:
# examples[1].replace('// ', '')

'A convivial atmosphere filled the gallery, with good food in abundance, and wine and conversation both flowing freely.'

## Random 10 wotd and translation, putting it all together

In [56]:
# could go back to 2022-01-01 is not more, based on June 20, 2025 structure...
# instead, more fun to just take 1001 days of values, by including today and excluding the 1001 ago!
end_date = pd.to_datetime('today').normalize()
start_date = end_date - pd.to_timedelta('1000d')
print(end_date)
print(start_date)

dates_1001 = pd.date_range(start_date, end_date, freq='D')
print(len(dates_1001))

2025-06-20 00:00:00
2022-09-24 00:00:00
1001


In [None]:
for date in pd.Series(dates_1001).sample(10):
    print(date.date())
    wotd, examples = parse_wotd_url(f'https://www.merriam-webster.com/word-of-the-day/2022-01-01/{date.date()}')

    print(wotd)
    for i, example in enumerate(examples):
        print(example)   
    print() 
    time.sleep(2)