In [2]:
!pip install google-cloud-speech google-cloud google-cloud-texttospeech

Collecting google-cloud-texttospeech
  Downloading google_cloud_texttospeech-2.16.3-py2.py3-none-any.whl (151 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m152.0/152.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[0mInstalling collected packages: google-cloud-texttospeech
Successfully installed google-cloud-texttospeech-2.16.3


In [3]:
BUCKET_NAME = "mnlee-stt" # @param {type:"string"}

In [5]:
def synthesize_text(lang_code, text):
    """Synthesizes speech from the input string of text."""
    from google.cloud import texttospeech

    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.SynthesisInput(text=text)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(
        language_code=lang_code,
        #name="en-US-Standard-C",
        ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
    )

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )

    response = client.synthesize_speech(
        request={"input": input_text, "voice": voice, "audio_config": audio_config}
    )

    # The response's audio_content is binary.
    with open(f"{lang_code}.mp3", "wb") as out:
        out.write(response.audio_content)
        print(f"Audio content written to file {lang_code}.mp3")

In [6]:
synthesize_text("en-US","Hello. This is a sentence to test multilingual language detection.")

Audio content written to file en-US.mp3


In [7]:
synthesize_text("es-ES","Hola. Esta es una oración para probar la detección de lenguaje multilingüe.")

Audio content written to file es-ES.mp3


In [8]:
synthesize_text("fr-FR","Bonjour. Ceci est une phrase pour tester la détection de langue multilingue.")

Audio content written to file fr-FR.mp3


In [9]:
synthesize_text("de-DE","Hallo. Dies ist ein Satz, um die mehrsprachige Spracherkennung zu testen.")

Audio content written to file de-DE.mp3


In [10]:
synthesize_text("pt-PT","Olá. Esta é uma frase para testar a detecção de idioma multilíngue.")

Audio content written to file pt-PT.mp3


In [16]:
synthesize_text("cmn-CN","你好。这是一句用于测试多语言语言检测的句子。")

Audio content written to file cmn-CN.mp3


In [17]:
synthesize_text("ja-JP","こんにちは。これは多言語言語検出をテストするための文です。")

Audio content written to file ja-JP.mp3


In [18]:
synthesize_text("ko-KR","안녕하세요. 다국어 언어 감지를 테스트하기 위한 문장입니다.")

Audio content written to file ko-KR.mp3


In [19]:
synthesize_text("ar-AE","مرحبًا. هذه جملة لاختبار اكتشاف اللغة متعددة اللغات.")

Audio content written to file ar-AE.mp3


In [20]:
synthesize_text("hi-IN","नमस्ते। यह बहुभाषी भाषा पहचान का परीक्षण करने के लिए एक वाक्य है.")

Audio content written to file hi-IN.mp3


In [None]:
!gsutil cp *.mp3 gs://mnlee-stt/robot/mp3/

In [24]:
from google.cloud import speech_v1p1beta1 as speech
#from google.cloud import speech_v2 as speech

#def stt(wav_uri) -> speech.RecognizeResponse:
def stt(lang_code):
    # Instantiates a client
    client = speech.SpeechClient()
    audio_uri = f"gs://{BUCKET_NAME}/robot/mp3/{lang_code}.mp3"
    audio = speech.RecognitionAudio(uri=audio_uri)

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        language_code="en-US",
        sample_rate_hertz=44100,
        alternative_language_codes=["es-ES","fr-FR","de-DE","pt-PT","cmn-CN","ja-JP","ko-KR", "ar-AE", "hi-IN"]
    )

    # Detects speech in the audio file
    response = client.recognize(config=config, audio=audio)
    #print(response)
    for result in response.results:
        print("Transcript: {}".format(result.alternatives[0].transcript))
        print("lang code: {}".format(result.language_code))
          
    #return response

In [25]:
stt("en-US")

InvalidArgument: 400 Invalid recognition 'config': bad sample rate hertz.

In [56]:
stt("es-ES")

Transcript: hello this is a multilingual support test
lang code: en-us


In [44]:
stt("fr-FR")

Transcript: hallo dies ist ein mehrsprachiger Support Test
lang code: de-de


In [45]:
stt("de-DE")

Transcript: hola esta es una prueba de soporte multilingüe
lang code: es-es


In [48]:
stt("pt-PT")

Transcript: bonjour Il s'agit d'un test de support multilingue
lang code: fr-fr


In [49]:
stt("cmn-CN")

Transcript: こんにちは多言語サポートテストです
lang code: ja-jp


In [50]:
stt("ja-JP")

Transcript: Helo ini ialah ujian sokongan berbilang bahasa
lang code: ms-my


In [52]:
stt("ko-KR")

Transcript: Sing Tao Daily by Kim Chattahoochee
lang code: en-us


In [53]:
stt("ar-AE")

Transcript: Helvete Facebook IT support Test
lang code: de-de


In [None]:
stt("hi-IN")