## Text-to-Sound

### Google Text-to-Speech (gTTS)
pip install gtts

In [12]:
from gtts import gTTS

tts = gTTS(text='Hello, my name is Hung!', lang='en')
tts.save("hello.mp3")


In [None]:
# Show the audio file
from IPython.display import Audio

Audio("hello.mp3")

In [None]:
# Play audio directly from memory
import io

audio_fp = io.BytesIO() # Create an in-memory file-like object
tts.write_to_fp(audio_fp) # Write the audio file to the in-memory file-like object
audio_fp.seek(0) # Seek to the beginning of the file-like object
Audio(audio_fp.read()) # Play the audio file from the in-memory file-like object

## IPA

In [17]:
# !pip install eng-to-ipa
import eng_to_ipa as ipa

text = "The record was expensive."
ipa_text = ipa.convert(text)
print(ipa_text)  # ˈhɛloʊ haʊ ɑr ju

ipa_text = ipa.ipa_list(text)
print(ipa_text)


ðə ˈrɛkərd wɑz ɪkˈspɛnsɪv.
[['ði', 'ðə'], ['rəˈkɔrd', 'rɪˈkɔrd', 'ˈrɛkərd'], ['wɑz'], ['ɪkˈspɛnsɪv.']]


In [7]:
# !pip install english-ipa

from english_ipa.cambridge import CambridgeDictScraper

scraper = CambridgeDictScraper()
ipa_in_dict = scraper.get_ipa_in_dict("hello")
print(ipa_in_dict)
# returned value
# {'word': 'hello', 'ipas': [{'region': 'uk', 'ipas': ['/heˈləʊ/']}, {'region': 'us', 'ipas': ['/heˈloʊ/']}]}

ipa_in_json = scraper.get_ipa_in_json("hello")
print(ipa_in_json)
# returned value
# {"word":"hello","ipas":[{"region":"uk","ipas":["/heˈləʊ/"]},{"region":"us","ipas":["/heˈloʊ/"]}]}

ipa_in_str = scraper.get_ipa_in_str("vegetable")
print(ipa_in_str)
# returned value
# uk: ['/heˈləʊ/']; us: ['/heˈloʊ/']

{'word': 'hello', 'ipas': [{'region': 'uk', 'ipas': ['/heˈləʊ/']}, {'region': 'us', 'ipas': ['/heˈloʊ/']}]}
{"word":"hello","ipas":[{"region":"uk","ipas":["/heˈləʊ/"]},{"region":"us","ipas":["/heˈloʊ/"]}]}
uk: ['/ˈvedʒ.tə.bəl/']; us: ['/ˈvedʒ.tə.bəl/']


## Multiple inputs

In [3]:
import pandas as pd
import os
import google.generativeai as genai
import io
from dotenv import load_dotenv

load_dotenv()

# filepath = r"D:\repos\streamlit_IPA\input.txt"
# filepath = r"D:\repos\streamlit_IPA\input.csv"
filepath = r"D:\repos\streamlit_IPA\input.xlsx"

genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
model = genai.GenerativeModel('gemini-2.0-flash')

filename, ext = os.path.splitext(filepath)
print(filename, ext)

if ext == '.txt':
    df = pd.read_csv(filepath, encoding='utf-8-sig', sep='\t', header=None)

elif ext == '.csv':
    df = pd.read_csv(filepath, encoding='utf-8-sig', header=None)

elif ext in ['.xls', '.xlsx']:
    df = pd.read_excel(filepath, header=None)

df.columns = ['text']
df['text'] = df['text'].apply(lambda x: x.strip())

print(df.head())
df.info()


D:\repos\streamlit_IPA\input .xlsx
                                                text
0  This is especially true for countries that lac...
1  Ecotourism, for example, is designed to take a...
2  but ironically the environment is often sacrif...
3                                          hordes of
4                                              flora
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   text    7 non-null      object
dtypes: object(1)
memory usage: 188.0+ bytes


In [17]:
# Plese give IPA (Bre and NAmE), synnonyms, antonymns, translation of the following input:

prompts = ["""
    ## Prompt for IPA, translation, synonyms, and antonyms of English words or sentences

**Objective:**:
- **Input:** English word or sentence.
- **Output:**
    - BrE IPA of input word or sentence.
    - NAmE IPA of input word or sentence.
    - Synonyms if input is a word.
    - Antonyms if input is a word.
    - Vietnamese translation of input word or sentence.

---

### Input Example:
Broken rice is a traditional Vietnamese dish.
          
### Output Example:
- **BrE IPA**: ˈbrəʊkən raɪs ɪz ə trəˈdɪʃənl ˌvjetnəˈmiːz dɪʃ.
- **NAmE IPA**: ˈbroʊkən raɪs ɪz ə trəˈdɪʃənl ˌviˌɛtnəˈmiz dɪʃ.
- **Synonyms**: none.
- **Antonyms**: none.
- **Translation**: Cơm tấm là một món ăn truyền thống của Việt Nam.

---

Input:
"""]
prompts

['\n    ## Prompt for IPA, translation, synonyms, and antonyms of English words or sentences\n\n**Objective:**:\n- **Input:** English word or sentence.\n- **Output:**\n    - BrE IPA of input word or sentence.\n    - NAmE IPA of input word or sentence.\n    - Synonyms if input is a word.\n    - Antonyms if input is a word.\n    - Vietnamese translation of input word or sentence.\n\n---\n\n### Input Example:\nBroken rice is a traditional Vietnamese dish.\n\n### Output Example:\n- **BrE IPA**: ˈbrəʊkən raɪs ɪz ə trəˈdɪʃənl ˌvjetnəˈmiːz dɪʃ.\n- **NAmE IPA**: ˈbroʊkən raɪs ɪz ə trəˈdɪʃənl ˌviˌɛtnəˈmiz dɪʃ.\n- **Synonyms**: none.\n- **Antonyms**: none.\n- **Translation**: Cơm tấm là một món ăn truyền thống của Việt Nam.\n\n---\n\nInput:\n']

In [None]:
import time

responses = {}
for i in range(df.shape[0]):
    prompt = prompts[0] + f"\n\n{df.loc[i, 'text']}"
    response = model.generate_content(contents=prompt).text
    responses.update({i: response})
    time.sleep(4) # rate limit: 15rpm

print(responses)

{0: '- **BrE IPA**: ðɪs ɪz ɪˈspeʃəli truː fɔː(r) ˈkʌntriːz ðæt læk eksˈpɔːtəbl ˈnætʃrəl rɪˈsɔːsɪz, bʌt pəˈzes ˈplenti ɒv ˈnætʃrəl əˈtrækʃənz, sʌtʃ æz biːtʃɪz, ˈmaʊntɪnz, lʌʃ ˈfɒrɪsts, ænd ˈdʒʌŋɡlz.\n- **NAmE IPA**: ðɪs ɪz ɪˈspɛʃəli truː fɔr ˈkʌntriz ðæt læk ɛkˈspɔrtəbl ˈnætʃərəl rɪˈsɔrsɪz, bʌt pəˈzɛs ˈplɛnti ʌv ˈnætʃərəl əˈtrækʃənz, sʌtʃ æz ˈbitʃɪz, ˈmaʊntənz, lʌʃ ˈfɔrəsts, ænd ˈdʒʌŋɡəlz.\n- **Synonyms**: none.\n- **Antonyms**: none.\n- **Translation**: Điều này đặc biệt đúng đối với các quốc gia thiếu tài nguyên thiên nhiên có thể xuất khẩu, nhưng sở hữu nhiều điểm thu hút tự nhiên, chẳng hạn như bãi biển, núi, rừng tươi tốt và rừng rậm.\n', 1: '- **BrE IPA**: ˌiːkəʊˈtʊərɪzəm, fər ɪɡˈzɑːmpl, ɪz dɪˈzaɪnd tə teɪk ədˈvɑːntɪdʒ əv ə ləʊˈkɑːlz ˈnætʃrəl ˈbjuːtiː tə əˈtrækt ˈɡriːn ˈtrævələz.\n- **NAmE IPA**: ˌikoʊˈtʊrɪzəm, fər ɪɡˈzæmpl, ɪz dɪˈzaɪnd tə teɪk ædˈvæntɪdʒ əv ə loʊˈkælz ˈnætʃərəl ˈbjutiː tə əˈtrækt ˈɡrin ˈtrævəlrz.\n- **Synonyms**: None.\n- **Antonyms**: None.\n- **Translation**: V

In [43]:
responses

{0: '- **BrE IPA**: ðɪs ɪz ɪˈspeʃəli truː fɔː(r) ˈkʌntriːz ðæt læk eksˈpɔːtəbl ˈnætʃrəl rɪˈsɔːsɪz, bʌt pəˈzes ˈplenti ɒv ˈnætʃrəl əˈtrækʃənz, sʌtʃ æz biːtʃɪz, ˈmaʊntɪnz, lʌʃ ˈfɒrɪsts, ænd ˈdʒʌŋɡlz.\n- **NAmE IPA**: ðɪs ɪz ɪˈspɛʃəli truː fɔr ˈkʌntriz ðæt læk ɛkˈspɔrtəbl ˈnætʃərəl rɪˈsɔrsɪz, bʌt pəˈzɛs ˈplɛnti ʌv ˈnætʃərəl əˈtrækʃənz, sʌtʃ æz ˈbitʃɪz, ˈmaʊntənz, lʌʃ ˈfɔrəsts, ænd ˈdʒʌŋɡəlz.\n- **Synonyms**: none.\n- **Antonyms**: none.\n- **Translation**: Điều này đặc biệt đúng đối với các quốc gia thiếu tài nguyên thiên nhiên có thể xuất khẩu, nhưng sở hữu nhiều điểm thu hút tự nhiên, chẳng hạn như bãi biển, núi, rừng tươi tốt và rừng rậm.\n',
 1: '- **BrE IPA**: ˌiːkəʊˈtʊərɪzəm, fər ɪɡˈzɑːmpl, ɪz dɪˈzaɪnd tə teɪk ədˈvɑːntɪdʒ əv ə ləʊˈkɑːlz ˈnætʃrəl ˈbjuːtiː tə əˈtrækt ˈɡriːn ˈtrævələz.\n- **NAmE IPA**: ˌikoʊˈtʊrɪzəm, fər ɪɡˈzæmpl, ɪz dɪˈzaɪnd tə teɪk ædˈvæntɪdʒ əv ə loʊˈkælz ˈnætʃərəl ˈbjutiː tə əˈtrækt ˈɡrin ˈtrævəlrz.\n- **Synonyms**: None.\n- **Antonyms**: None.\n- **Translation**: 

In [44]:
import re
t = responses[1]
t

'- **BrE IPA**: ˌiːkəʊˈtʊərɪzəm, fər ɪɡˈzɑːmpl, ɪz dɪˈzaɪnd tə teɪk ədˈvɑːntɪdʒ əv ə ləʊˈkɑːlz ˈnætʃrəl ˈbjuːtiː tə əˈtrækt ˈɡriːn ˈtrævələz.\n- **NAmE IPA**: ˌikoʊˈtʊrɪzəm, fər ɪɡˈzæmpl, ɪz dɪˈzaɪnd tə teɪk ædˈvæntɪdʒ əv ə loʊˈkælz ˈnætʃərəl ˈbjutiː tə əˈtrækt ˈɡrin ˈtrævəlrz.\n- **Synonyms**: None.\n- **Antonyms**: None.\n- **Translation**: Ví dụ, du lịch sinh thái được thiết kế để tận dụng vẻ đẹp tự nhiên của một địa phương để thu hút khách du lịch "xanh".\n'

In [45]:
parts = t.split('\n')
parts = [re.sub(r'\s+', ' ', part) for part in parts]
parts

['- **BrE IPA**: ˌiːkəʊˈtʊərɪzəm, fər ɪɡˈzɑːmpl, ɪz dɪˈzaɪnd tə teɪk ədˈvɑːntɪdʒ əv ə ləʊˈkɑːlz ˈnætʃrəl ˈbjuːtiː tə əˈtrækt ˈɡriːn ˈtrævələz.',
 '- **NAmE IPA**: ˌikoʊˈtʊrɪzəm, fər ɪɡˈzæmpl, ɪz dɪˈzaɪnd tə teɪk ædˈvæntɪdʒ əv ə loʊˈkælz ˈnætʃərəl ˈbjutiː tə əˈtrækt ˈɡrin ˈtrævəlrz.',
 '- **Synonyms**: None.',
 '- **Antonyms**: None.',
 '- **Translation**: Ví dụ, du lịch sinh thái được thiết kế để tận dụng vẻ đẹp tự nhiên của một địa phương để thu hút khách du lịch "xanh".',
 '']

In [46]:
bre_ipa = [part for part in parts if 'BrE IPA' in part][0]
name_ipa = [part for part in parts if 'NAmE IPA' in part][0]
symnoyms = [part for part in parts if 'Synonyms' in part][0]
antonyms = [part for part in parts if 'Antonyms' in part][0]
translation = [part for part in parts if 'Translation' in part][0]
print(bre_ipa)
print(name_ipa)
print(symnoyms)
print(antonyms)
print(translation)

- **BrE IPA**: ˌiːkəʊˈtʊərɪzəm, fər ɪɡˈzɑːmpl, ɪz dɪˈzaɪnd tə teɪk ədˈvɑːntɪdʒ əv ə ləʊˈkɑːlz ˈnætʃrəl ˈbjuːtiː tə əˈtrækt ˈɡriːn ˈtrævələz.
- **NAmE IPA**: ˌikoʊˈtʊrɪzəm, fər ɪɡˈzæmpl, ɪz dɪˈzaɪnd tə teɪk ædˈvæntɪdʒ əv ə loʊˈkælz ˈnætʃərəl ˈbjutiː tə əˈtrækt ˈɡrin ˈtrævəlrz.
- **Synonyms**: None.
- **Antonyms**: None.
- **Translation**: Ví dụ, du lịch sinh thái được thiết kế để tận dụng vẻ đẹp tự nhiên của một địa phương để thu hút khách du lịch "xanh".


In [52]:
def processing_text(text):
    # text = text.replace('- **BrE IPA**:', '').replace('- **NAmE IPA**:', '').replace('- **Synonyms**:', '').replace('- **Antonyms**:', '').replace('- **Translation**:', '')
    # text = text.replace('**BrE IPA**:', '').replace('**NAmE IPA**:', '').replace('**Synonyms**:', '').replace('**Antonyms**:', '').replace('**Translation**:', '')
    text = text.replace('BrE IPA', '').replace('NAmE IPA', '').replace('Synonyms', '').replace('Antonyms', '').replace('Translation', '')
    text = re.sub(r'- \*+:', '', text)
    text = re.sub(r'\*+:', '', text)
    text = text.strip()
    return text

def processing_response(response):
    parts = response.split('\n')
    parts = [re.sub(r'\s+', ' ', part) for part in parts]

    bre_ipa = [part for part in parts if 'BrE IPA' in part][0]
    name_ipa = [part for part in parts if 'NAmE IPA' in part][0]
    symnoyms = [part for part in parts if 'Synonyms' in part][0]
    antonyms = [part for part in parts if 'Antonyms' in part][0]
    translation = [part for part in parts if 'Translation' in part][0]
    
    bre_ipa = processing_text(bre_ipa)
    name_ipa = processing_text(name_ipa)
    symnoyms = processing_text(symnoyms)
    antonyms = processing_text(antonyms)
    translation = processing_text(translation)

    return bre_ipa, name_ipa, symnoyms, antonyms, translation

for index, response in responses.items():
    bre_ipa, name_ipa, symnoyms, antonyms, translation = processing_response(response)
    # print(bre_ipa)
    # print(name_ipa)
    # print(symnoyms)
    # print(antonyms)
    # print(translation)
    df.loc[index, 'bre_ipa'] = bre_ipa
    df.loc[index, 'name_ipa'] = name_ipa
    df.loc[index, 'synonyms'] = symnoyms
    df.loc[index, 'antonyms'] = antonyms
    df.loc[index, 'translation'] = translation

display(df)
df.to_excel("output.xlsx", index=False)
    

Unnamed: 0,text,bre_ipa,name_ipa,synonyms,antonyms,translation
0,This is especially true for countries that lac...,ðɪs ɪz ɪˈspeʃəli truː fɔː(r) ˈkʌntriːz ðæt læk...,ðɪs ɪz ɪˈspɛʃəli truː fɔr ˈkʌntriz ðæt læk ɛkˈ...,none.,none.,Điều này đặc biệt đúng đối với các quốc gia th...
1,"Ecotourism, for example, is designed to take a...","ˌiːkəʊˈtʊərɪzəm, fər ɪɡˈzɑːmpl, ɪz dɪˈzaɪnd tə...","ˌikoʊˈtʊrɪzəm, fər ɪɡˈzæmpl, ɪz dɪˈzaɪnd tə te...",None.,None.,"Ví dụ, du lịch sinh thái được thiết kế để tận ..."
2,but ironically the environment is often sacrif...,bʌt aɪˈrɒnɪkli ðɪ ɪnˈvaɪərənmənt ɪz ˈɒfn ˈsækr...,bʌt aɪˈrɑːnɪkli ði ɪnˈvaɪrənmənt ɪz ˈɔːfn ˈsæk...,none.,none.,"Nhưng trớ trêu thay, môi trường thường bị hy s..."
3,hordes of,/hɔːdz ɒv/,/hɔrdz əv/,"crowds of, masses of, swarms of, droves of, pa...","few, scarcity of, handful of.","đám đông, lũ lượt."
4,flora,ˈflɔːrə,ˈflɔrə,"vegetation, plants, greenery, herbage.",fauna.,hệ thực vật
5,erosion,/ɪˈrəʊʒən/,/ɪˈroʊʒən/,"wearing away, abrasion, corrosion, deteriorati...","accretion, building, construction, growth.","sự xói mòn, sự ăn mòn, sự bào mòn."
6,stronghold,ˈstrɒŋˌhəʊld,ˈstrɔːŋˌhoʊld,"fortress, fort, citadel, bastion, bulwark, def...","weakness, vulnerability","Thành trì, pháo đài, cứ điểm."
