# Translating fun!

There are two initial scopes to this:
- first, get a word (or words) to translate
- second, use hugging face to translate the sentences

In [39]:
import pandas as pd 
import requests
from bs4 import BeautifulSoup

## Obtain words and sentences

In [40]:
url = 'https://www.merriam-webster.com/word-of-the-day/'

result = requests.get(url)

if result.status_code != 200:
    raise ValueError(f'Website did not respond as expected. Status code: {result.status_code}')

In [41]:
soup = BeautifulSoup(result.text)

In [42]:
# can see previous words of the day using h2 tags
wotd = soup.find_all('h2')[0].text
wotd

'jeopardize'

In [43]:
meaning_tag = soup.find_all('h2')[1]

# get the definition and examples until we reach the "see the entry"
sibling = meaning_tag
examples = []
while True:
    sibling = sibling.find_next_sibling('p')

    if "See the entry" in sibling.text:
        entry_link = sibling.find(href=True).attrs.get('href', 'Not found')
        examples.append(entry_link)
        break

    examples.append(sibling.text)



In [44]:
examples

['To jeopardize something or someone is to put them at risk or in danger.',
 '// The wrong decision could seriously jeopardize the success of the project.',
 'https://www.merriam-webster.com/dictionary/jeopardize']

## Translation

In [65]:
# Use a pipeline as a high-level helper
from transformers import pipeline

In [None]:
eng_to_ita = pipeline(
    "translation", 
    model="facebook/m2m100_418M", 
    tokenizer="facebook/m2m100_418M",
    src_lang="en", 
    tgt_lang="it"  # Italian code
)

eng_to_slo = pipeline(
    "translation", 
    model="facebook/m2m100_418M", 
    tokenizer="facebook/m2m100_418M",
    src_lang="en", 
    tgt_lang="sl"  # Slovenian code
)


Device set to use cpu
Device set to use cpu


In [None]:
print('===== word =====')
print(wotd)
print('ita:',eng_to_ita(wotd)[0]['translation_text'])
print('slo:',eng_to_slo(wotd)[0]['translation_text'])
print('')

print('===== definition =====')
print(examples[0])
print('ita:',eng_to_ita(examples[0])[0]['translation_text'])
print('slo:',eng_to_slo(examples[0])[0]['translation_text'])
print('')

print('===== examples =====')
for example in examples[1:]:
    if example.startswith('http'):
        break
    print(example)
    print('ita:',eng_to_ita(example)[0]['translation_text'])
    print('slo:',eng_to_slo(example)[0]['translation_text'])

===== word =====
jeopardize
ita: Il pericolo
slo: grožnja

===== definition =====
To jeopardize something or someone is to put them at risk or in danger.
ita: Per mettere in pericolo qualcosa o qualcuno è metterli a rischio o in pericolo.
slo: Da bi nekaj ali nekoga ogrozili, je, da bi jih postavili v nevarnost ali v nevarnost.

===== example =====
// The wrong decision could seriously jeopardize the success of the project.
ita: La decisione sbagliata potrebbe seriamente minacciare il successo del progetto.
slo: Napačna odločitev bi lahko resno ogrozila uspeh projekta.
