In [4]:
from IPython.display import Audio

# Give your code a voice with the Text to Speech API

This notebook shows how you can use the Google Cloud Text to Speech API to synthesize speech from text.

We'll use the Google Cloud Client Libraries to do this.

## Hello, world!

In [1]:
from google.cloud import texttospeech

In [2]:
def synthesize(text, voice=None):
    client = texttospeech.TextToSpeechClient()

    input_text = texttospeech.types.SynthesisInput(text=text)

    if voice is not None:
        voice = texttospeech.types.VoiceSelectionParams(
            language_code='en-US',
            name=voice)
    else:
        voice = texttospeech.types.VoiceSelectionParams(
            language_code='en-US',
            ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)

    audio_config = texttospeech.types.AudioConfig(
        audio_encoding=texttospeech.enums.AudioEncoding.MP3)

    response = client.synthesize_speech(input_text, voice, audio_config)
    
    return Audio(response.audio_content)

In [5]:
synthesize("Hello, PyCon! It is great to see you today.")

## Not just one voice, many!

The API contains a variety of voices for use across a variety of languages.

In [19]:
client = texttospeech.TextToSpeechClient()
voices = client.list_voices()
voices

voices {
  language_codes: "es-ES"
  name: "es-ES-Standard-A"
  ssml_gender: FEMALE
  natural_sample_rate_hertz: 24000
}
voices {
  language_codes: "ja-JP"
  name: "ja-JP-Standard-A"
  ssml_gender: FEMALE
  natural_sample_rate_hertz: 22050
}
voices {
  language_codes: "pt-BR"
  name: "pt-BR-Standard-A"
  ssml_gender: FEMALE
  natural_sample_rate_hertz: 24000
}
voices {
  language_codes: "tr-TR"
  name: "tr-TR-Standard-A"
  ssml_gender: FEMALE
  natural_sample_rate_hertz: 22050
}
voices {
  language_codes: "sv-SE"
  name: "sv-SE-Standard-A"
  ssml_gender: FEMALE
  natural_sample_rate_hertz: 22050
}
voices {
  language_codes: "nl-NL"
  name: "nl-NL-Standard-A"
  ssml_gender: FEMALE
  natural_sample_rate_hertz: 24000
}
voices {
  language_codes: "en-US"
  name: "en-US-Wavenet-A"
  ssml_gender: MALE
  natural_sample_rate_hertz: 24000
}
voices {
  language_codes: "en-US"
  name: "en-US-Wavenet-B"
  ssml_gender: MALE
  natural_sample_rate_hertz: 24000
}
voices {
  language_codes: "en-US"
  n

## More natural speech using Wavenet voices

The Text to Speech API offers several voices that uses Google's Wavenet technology to synthesize more natural sounding voices.

In [10]:
synthesize('Hello again, this voice uses wavenet.', voice='en-US-Wavenet-B')

In [11]:
synthesize('"and what is the use of a book," thought Alice, "without pictures or conversations?"', voice='en-US-Wavenet-C')