In [None]:
!pip3 install google-cloud-speech google-cloud google-cloud-texttospeech langdetect 
!pip3 install google-cloud-translate

In [21]:
BUCKET_NAME = "mnlee-stt" # @param {type:"string"}

In [31]:
def synthesize_text(lang_code, text, audio_name):
    """Synthesizes speech from the input string of text."""
    from google.cloud import texttospeech

    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.SynthesisInput(text=text)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = texttospeech.VoiceSelectionParams(
        language_code=lang_code,
        #name="en-US-Standard-C",
        ssml_gender=texttospeech.SsmlVoiceGender.FEMALE,
    )

    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.LINEAR16
    )

    response = client.synthesize_speech(
        request={"input": input_text, "voice": voice, "audio_config": audio_config}
    )

    # The response's audio_content is binary.
    with open(f"{audio_name}.wav", "wb") as out:
        out.write(response.audio_content)
        print(f"Audio content written to file {audio_name}.wav")

In [138]:
synthesize_text("en-US","Hello. This is a sentence to test multilingual language detection.", "en-US")

Audio content written to file en-US.wav


In [5]:
synthesize_text("es-ES","Hola. Esta es una oración para probar la detección de lenguaje multilingüe.", "es-ES")

Audio content written to file es-ES.wav


In [6]:
synthesize_text("fr-FR","Bonjour. Ceci est une phrase pour tester la détection de langue multilingue.", "fr-FR")

Audio content written to file fr-FR.wav


In [7]:
synthesize_text("de-DE","Hallo. Dies ist ein Satz, um die mehrsprachige Spracherkennung zu testen.", "de-DE")

Audio content written to file de-DE.wav


In [8]:
synthesize_text("pt-PT","Olá. Esta é uma frase para testar a detecção de idioma multilíngue.", "pt-PT")

Audio content written to file pt-PT.wav


In [9]:
synthesize_text("cmn-CN","你好。这是一句用于测试多语言语言检测的句子。", "cmn-CN")

Audio content written to file cmn-CN.wav


In [10]:
synthesize_text("ja-JP","こんにちは。これは多言語言語検出をテストするための文です。", "ja-JP")

Audio content written to file ja-JP.wav


In [11]:
synthesize_text("ko-KR","안녕하세요. 다국어 언어 감지를 테스트하기 위한 문장입니다.", "ko-KR")

Audio content written to file ko-KR.wav


In [41]:
synthesize_text("ko-KR","hello good morning. 영한 혼합 문장입니다", "ko-en-word")

Audio content written to file ko-en-word.wav


In [12]:
synthesize_text("ar-AE","مرحبًا. هذه جملة لاختبار اكتشاف اللغة متعددة اللغات.", "ar-AE")

Audio content written to file ar-AE.wav


In [60]:
synthesize_text("hi-IN","नमस्ते। यह बहुभाषी भाषा पहचान का परीक्षण करने के लिए एक वाक्य है.", "hi-IN")

Audio content written to file hi-IN.wav


In [140]:
!gsutil cp en-es-word.wav gs://mnlee-stt/robot/wav/

43583.55s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


Copying file://en-es-word.wav [Content-Type=audio/x-wav]...
- [1 files][ 96.1 KiB/ 96.1 KiB]                                                
Operation completed over 1 objects/96.1 KiB.                                     


In [151]:
def detect_language(text: str) -> dict:
    """Detects the text's language."""
    from google.cloud import translate_v2 as translate

    translate_client = translate.Client()

    # Text can also be a sequence of strings, in which case this method
    # will return a sequence of results for each text.
    result = translate_client.detect_language(text)

    #print(f"Text: {text}")
    #print("Confidence: {}".format(result["confidence"]))
    #print("Language: {}".format(result["language"]))

    return result["language"]

In [159]:
from google.cloud import speech_v1p1beta1 as speech
from langdetect import detect
#from google.cloud import speech_v2 as speech

#def stt(wav_uri) -> speech.RecognizeResponse:
def stt(lang_code, rate, model=None):
    # Instantiates a client
    client = speech.SpeechClient()
    audio_uri = f"gs://{BUCKET_NAME}/robot/wav/{lang_code}.wav"
    audio = speech.RecognitionAudio(uri=audio_uri)

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        language_code="en-US",
        #sample_rate_hertz=24000,
        sample_rate_hertz=rate,
        #alternative_language_codes=["es-ES","fr-FR","de-DE","pt-PT","cmn-CN","ja-JP","ko-KR", "ar-AE", "hi-IN"]
        alternative_language_codes=["es-ES","fr-FR","de-DE","pt-PT","cmn-CN","ko-KR"],
        max_alternatives=1,
        enable_spoken_punctuation=True,
        model=model
    )

    # Detects speech in the audio file
    response = client.recognize(config=config, audio=audio)
    #print(response)
    for result in response.results:
        for alternative in result.alternatives:
            print("Transcript: {}".format(alternative.transcript))    
            print("Detected [lang code]]: {}".format(detect(alternative.transcript)))
            print("Translation API [lang code]: {}".format(detect_language(alternative.transcript)))
        print("STT [lang code]: {}".format(result.language_code))
        
          
    #return response

In [139]:
stt("en-US", 24000, "latest_short")

Transcript: hello this is a sentence to test multilingual language detection
Detected [lang code]]: en
STT [lang code]: en-us


In [79]:
stt("es-ES",24000, "latest_short")

Transcript: hola esta es una oración para aprobar la detección de lenguaje multilingüe
lang code: es-es


In [80]:
stt("fr-FR",24000, "latest_short")

Transcript: bonjour ceci est une phrase pour tester la détection de langue multilingue
lang code: fr-fr


In [81]:
stt("de-DE" 24000, "latest_short")

Transcript: hallo dies ist ein Satz um die mehrsprachige Spracherkennung zu testen
lang code: de-de


In [82]:
stt("pt-PT",24000, "latest_short")

Transcript: Olá esta é uma frase para testar a detecção de idioma multilingue
lang code: pt-pt


In [83]:
stt("cmn-CN",24000, "latest_short")

Transcript: 你好这是一句用于测试多语言语言检测的句子
lang code: cmn-hans-cn


In [84]:
stt("ko-KR",24000, "latest_short")

Transcript: 안녕하세요 다국어 언어 감지를 테스트 하기 위한 문장입니다
lang code: ko-kr


In [None]:
stt("ja-JP",24000, "latest_short")

In [None]:
stt("ar-AE",24000, "latest_short")

In [None]:
stt("hi-IN",24000, "latest_short")

In [160]:
# 한국인 음섬 - 영어 + 한국어 발화
stt("en-ko-word1",48000)

Transcript: my favorite Korean food is 김치찌개
Detected [lang code]]: en
Translation API [lang code]: ko
STT [lang code]: ko-kr


In [161]:
# 한국인 음섬 - 영어 + 한국어 발화
stt("ko-en-word1", 48000)

Transcript: 김치찌개 It's my favorite food
Detected [lang code]]: en
Translation API [lang code]: ko
STT [lang code]: ko-kr


In [162]:
# 한국인 음섬 - 영어 발화
stt("ko-en-word3", 48000)

Transcript: my favorite food is pizza
Detected [lang code]]: en
Translation API [lang code]: en
STT [lang code]: ko-kr


In [168]:
# 일본인 음섬 - 영어 발화
stt("ja-en", 24000)

InvalidArgument: 400 sample_rate_hertz (24000) in RecognitionConfig must either be omitted or match the value in the WAV header (48000).

In [163]:
# 원어민 음섬 - 영어 발화
stt("en-pizza", 48000)

Transcript: I love pizza it's my favorite food
Detected [lang code]]: it
Translation API [lang code]: en
STT [lang code]: en-us


In [165]:
# 스페인 음섬 - 영어 발화
stt("en-es-word", 24000)

Transcript: my favorite Korean food is pizza
Detected [lang code]]: en
Translation API [lang code]: en
STT [lang code]: en-us


In [166]:
# Python detect 언어감지 오류 case
detect("I love pizza it's my favorite food")

'it'

In [167]:
# Translation API 언어 감지 
detect_language("I love pizza it's my favorite food")

'en'