In [1]:
!pip install azure-cognitiveservices-speech

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import os
from dotenv import load_dotenv
import azure.cognitiveservices.speech as speechsdk

In [3]:
load_dotenv()

True

In [4]:
endpoint = os.environ.get('AI_SPEECH_ENDPOINT')
key = os.environ.get('AI_SPEECH_KEY')

In [None]:
speech_config = speechsdk.SpeechConfig(subscription=key, endpoint=endpoint)
audio_config = speechsdk.audio.AudioOutputConfig(filename="output_from_tts.wav")

In [5]:
speech_config.speech_synthesis_voice_name='ar-EG-SalmaNeural'

In [17]:
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

In [18]:
text = "Hello, this is a test of the Azure Cognitive Services Speech SDK."

In [19]:
#results = speech_synthesizer.speak_text_async(text).get()
speech_synthesis_result = speech_synthesizer.speak_text(text)

In [20]:
speech_synthesis_result.reason

<ResultReason.SynthesizingAudioCompleted: 9>

In [23]:
if speech_synthesis_result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
    print("Speech synthesized for text [{}]".format(text))
elif speech_synthesis_result.reason == speechsdk.ResultReason.Canceled:
    cancellation_details = speech_synthesis_result.cancellation_details
    print("Speech synthesis canceled: {}".format(cancellation_details.reason))
    if cancellation_details.reason == speechsdk.CancellationReason.Error:
        if cancellation_details.error_details:
            print("Error details: {}".format(cancellation_details.error_details))
            print("Did you set the speech resource key and endpoint values?")

Speech synthesized for text [Hello, this is a test of the Azure Cognitive Services Speech SDK.]


### SSML

In [5]:
speech_config = speechsdk.SpeechConfig(subscription=key, endpoint=endpoint)
audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)

In [29]:
speech_config.speech_synthesis_voice_name='ar-EG-SalmaNeural'

In [6]:
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

In [11]:
with open("./Data/ssml.xml", "r") as file:
    ssml_text = file.read()

In [12]:
speech_synthesizer.speak_ssml_async(ssml_text).get()

<azure.cognitiveservices.speech.SpeechSynthesisResult at 0x17fb6bc4910>

#### Other SSML examples

1) Change speaking rate and pitch
```xml
<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="en-US">
  <voice name="en-US-AvaMultilingualNeural">
    <prosody rate="80%" pitch="+2st">
      When you're on the freeway, it's a good idea to use a GPS.
    </prosody>
  </voice>
</speak>

```

2) Add a pause (break)
```xml
<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="en-US">
  <voice name="en-US-AvaMultilingualNeural">
    When you're on the freeway, <break time="500ms"/> it's a good idea to use a GPS.
  </voice>
</speak>

```

3) Emphasize a word
```xml
<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="en-US">
  <voice name="en-US-AvaMultilingualNeural">
    When you're on the freeway, it's a good idea to <emphasis level="strong">always</emphasis> use a GPS.
  </voice>
</speak>

```

4) Spell out an acronym
```xml
<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="en-US">
  <voice name="en-US-AvaMultilingualNeural">
    When driving on the freeway, make sure your <say-as interpret-as="characters">GPS</say-as> is working properly.
  </voice>
</speak>

```

5) Insert audio (audio tag)
```xml
<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="en-US">
  <voice name="en-US-AvaMultilingualNeural">
    When you're on the freeway, it's a good idea to use a GPS.
    <audio src="https://www.example.com/sounds/alert.mp3"/>
    Please proceed to the highlighted route.
  </voice>
</speak>

```
