In [None]:
"""
conda env remove -n spk
conda create --name spk python=3.8  ipykernel -y
conda activate spk
pip install keras==2.6
pip install TensorFlowTTS
pip install git+https://github.com/repodiac/german_transliterate.git#egg=german_transliterate
pip install -U pyttsx3
pip install -U bs4 nltk
pip install -U langdetect playsound
"""

"""
sudo apt install espeak
"""

In [None]:

import requests
import pyttsx3
#import torch
import nltk
#import pyaudio  
import wave  
import IPython.display as ipd
import tensorflow      as tf
import soundfile       as sf

from tensorflow_tts.inference import AutoProcessor
from tensorflow_tts.inference import TFAutoModel

from langdetect       import detect
from bs4              import BeautifulSoup, __version__
from scipy.io.wavfile import write
from nltk.tokenize    import sent_tokenize
from playsound        import playsound

print(f"requests     : {  requests.__version__}")
print(f"nltk         : {      nltk.__version__}")
print(f"tensorflow   : {        tf.__version__}")
print(f"BeautifulSoup: {           __version__}")

engine = pyttsx3.init()
engine.setProperty('rate', 200)
langs = {v.languages[0].decode()[1:]: v.id for v in engine.getProperty('voices')}


nltk.download('punkt')

processor_fr = AutoProcessor.from_pretrained("tensorspeech/tts-tacotron2-synpaflex-fr")
tacotron2_fr =   TFAutoModel.from_pretrained("tensorspeech/tts-tacotron2-synpaflex-fr")
melgan_fr    =   TFAutoModel.from_pretrained("tensorspeech/tts-mb_melgan-synpaflex-fr")

processor_en = AutoProcessor.from_pretrained("tensorspeech/tts-tacotron2-ljspeech-en")
tacotron2_en =   TFAutoModel.from_pretrained("tensorspeech/tts-tacotron2-ljspeech-en")
melgan_en    =   TFAutoModel.from_pretrained("tensorspeech/tts-melgan-ljspeech-en"   )

out_file = "tts_out.wav"
sr          = 22050




In [None]:
url = "https://fr.wikipedia.org/wiki/Saison_cyclonique_2022-2023_dans_l%27oc%C3%A9an_Indien_sud-ouest"
#url = "https://en.wikipedia.org/wiki/Pronunciation"
#url  = "https://artilect.fr"
try:
    response = requests.get(url)
except Exception as e:
    print(f"Exception raised: {e}")
    response = None

#print(langs)
if response.status_code==200:
    print("Successfully opened the web page")

    soup       = BeautifulSoup(response.text,'html.parser')
    lang       = soup.find('lang')
    heading    = soup.select("#firstHeading")
    #print(heading)
    if len(heading) < 1:
        title    = soup.find("title").get_text()
    else:
        title    = heading[0].text

    paragraphs = soup.select("p")
    sentenses  = [s for para in paragraphs for s in sent_tokenize(para.text)]
    
    #print(sentenses)
    n = 0
    for s in sentenses[:5]:
        file        = str(n) + '_' + out_file
        try:
            lang = detect(s)  #only first line 
        except Exception as e:
            if lang==None: lang = 'en'
        

        #if not lang in (langs.keys() or ['en', 'fr']):
        #    lang = lang + '-' + lang
        #    if not lang in langs.keys():
        #        print(f"WARNING! language {lang} not supported defaulting to english")
        #        lang = 'en'
        print(f"[{lang}]: {s}")
        if lang=='en':
            input_ids = processor_en.text_to_sequence(s)
            decoder_output, mel_outputs, stop_token_prediction, alignment_history = tacotron2_en.inference(
                input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
                input_lengths=tf.convert_to_tensor([len(input_ids)], tf.int32),
                speaker_ids=tf.convert_to_tensor([0], dtype=tf.int32),)
            audio = melgan_en.inference(mel_outputs)[0, :, 0]
            sf.write(file, audio, sr, "PCM_16")
            playsound(file)

        elif lang=='fr':
            input_ids = processor_fr.text_to_sequence(s)
            decoder_output, mel_outputs, stop_token_prediction, alignment_history = tacotron2_fr.inference(
                input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
                input_lengths=tf.convert_to_tensor([len(input_ids)], tf.int32),
                speaker_ids=tf.convert_to_tensor([0], dtype=tf.int32),)
            # melgan inference (mel-to-wav)
            audio = melgan_fr.inference(mel_outputs)[0, :, 0]
            sf.write(file, audio, sr, "PCM_16")
            playsound(file)
            
        else:
            engine.setProperty("voice", langs[lang])
            engine.say(s)
            engine.runAndWait()
        
        n += 1

else: print(f"Error: {response.status_code}")