In [2]:
!pip install google-cloud-speech google-cloud google-cloud-texttospeech

Collecting google-cloud-texttospeech
  Downloading google_cloud_texttospeech-2.16.3-py2.py3-none-any.whl (151 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m152.0/152.0 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[0mInstalling collected packages: google-cloud-texttospeech
Successfully installed google-cloud-texttospeech-2.16.3


In [3]:
BUCKET_NAME = "mnlee-stt" # @param {type:"string"}

In [50]:
def synthesize_text(lang_code, text):
    """Synthesizes speech from the input string of text."""
    from google.cloud import texttospeech

    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.SynthesisInput(text=text)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(
        language_code=lang_code,
        #name="en-US-Standard-C",
        ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
    )

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.LINEAR16
    )

    response = client.synthesize_speech(
        request={"input": input_text, "voice": voice, "audio_config": audio_config}
    )

    # The response's audio_content is binary.
    with open(f"{lang_code}.wav", "wb") as out:
        out.write(response.audio_content)
        print(f"Audio content written to file {lang_code}.wav")

In [51]:
synthesize_text("en-US","Hello. This is a sentence to test multilingual language detection.")

Audio content written to file en-US.wav


In [52]:
synthesize_text("es-ES","Hola. Esta es una oración para probar la detección de lenguaje multilingüe.")

Audio content written to file es-ES.wav


In [53]:
synthesize_text("fr-FR","Bonjour. Ceci est une phrase pour tester la détection de langue multilingue.")

Audio content written to file fr-FR.wav


In [54]:
synthesize_text("de-DE","Hallo. Dies ist ein Satz, um die mehrsprachige Spracherkennung zu testen.")

Audio content written to file de-DE.wav


In [55]:
synthesize_text("pt-PT","Olá. Esta é uma frase para testar a detecção de idioma multilíngue.")

Audio content written to file pt-PT.wav


In [56]:
synthesize_text("cmn-CN","你好。这是一句用于测试多语言语言检测的句子。")

Audio content written to file cmn-CN.wav


In [57]:
synthesize_text("ja-JP","こんにちは。これは多言語言語検出をテストするための文です。")

Audio content written to file ja-JP.wav


In [58]:
synthesize_text("ko-KR","안녕하세요. 다국어 언어 감지를 테스트하기 위한 문장입니다.")

Audio content written to file ko-KR.wav


In [59]:
synthesize_text("ar-AE","مرحبًا. هذه جملة لاختبار اكتشاف اللغة متعددة اللغات.")

Audio content written to file ar-AE.wav


In [60]:
synthesize_text("hi-IN","नमस्ते। यह बहुभाषी भाषा पहचान का परीक्षण करने के लिए एक वाक्य है.")

Audio content written to file hi-IN.wav


In [62]:
!gsutil cp *.wav gs://mnlee-stt/robot/wav/

Copying file://ar-AE.wav [Content-Type=audio/x-wav]...
Copying file://cmn-CN.wav [Content-Type=audio/x-wav]...                         
Copying file://de-DE.wav [Content-Type=audio/x-wav]...                          
Copying file://en-US.wav [Content-Type=audio/x-wav]...                          
/ [4 files][932.8 KiB/932.8 KiB]                                                
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying file://es-ES.wav [Content-Type=audio/x-wav]...
Copying file://fr-FR.wav [Content-Type=audio/x-wav]...                          
Copying file://hi-IN.wav [Content-Type=audio/x-wav]...                          
Copying file://ja-JP.wav [Content-Type=audio/x-wav]...                          
Copying file://ko-KR.wav [Content-Type=audio/x-wav]...                 

In [76]:
from google.cloud import speech_v1p1beta1 as speech
#from google.cloud import speech_v2 as speech

#def stt(wav_uri) -> speech.RecognizeResponse:
def stt(lang_code):
    # Instantiates a client
    client = speech.SpeechClient()
    audio_uri = f"gs://{BUCKET_NAME}/robot/wav/{lang_code}.wav"
    audio = speech.RecognitionAudio(uri=audio_uri)

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        language_code="en-US",
        sample_rate_hertz=24000,
        #alternative_language_codes=["es-ES","fr-FR","de-DE","pt-PT","cmn-CN","ja-JP","ko-KR", "ar-AE", "hi-IN"]
        alternative_language_codes=["es-ES","fr-FR","de-DE","pt-PT","cmn-CN","ko-KR"]
    )

    # Detects speech in the audio file
    response = client.recognize(config=config, audio=audio)
    #print(response)
    for result in response.results:
        print("Transcript: {}".format(result.alternatives[0].transcript))
        print("lang code: {}".format(result.language_code))
          
    #return response

In [78]:
stt("en-US")

Transcript: hello this is a descendants 2 test multilingual language detection
lang code: en-us


In [79]:
stt("es-ES")

Transcript: hola esta es una oración para aprobar la detección de lenguaje multilingüe
lang code: es-es


In [80]:
stt("fr-FR")

Transcript: bonjour ceci est une phrase pour tester la détection de langue multilingue
lang code: fr-fr


In [81]:
stt("de-DE")

Transcript: hallo dies ist ein Satz um die mehrsprachige Spracherkennung zu testen
lang code: de-de


In [82]:
stt("pt-PT")

Transcript: Olá esta é uma frase para testar a detecção de idioma multilingue
lang code: pt-pt


In [83]:
stt("cmn-CN")

Transcript: 你好这是一句用于测试多语言语言检测的句子
lang code: cmn-hans-cn


In [84]:
stt("ko-KR")

Transcript: 안녕하세요 다국어 언어 감지를 테스트 하기 위한 문장입니다
lang code: ko-kr


In [None]:
stt("ja-JP")

In [None]:
stt("ar-AE")

In [None]:
stt("hi-IN")