In [64]:
from dotenv import load_dotenv
import os
from pathlib import Path

import azure.cognitiveservices.speech as speechsdk
import urllib
from IPython.display import Audio, display

In [26]:
dotenv_path = Path(os.getcwd(), 'speech.env')
load_dotenv(dotenv_path)
CNV_KEY = os.environ.get("cnv_key")
CNV_API = os.environ.get("cnv_api")
CNV_REGION = os.environ.get("cnv_region", "southeastasia")



In [29]:
# get deployement id from url query string in the form of ?deploymentId=<deployement_id>
def get_deployment_id(url):
    query = urllib.parse.urlparse(url).query
    query_dict = urllib.parse.parse_qs(query)
    return query_dict.get('deploymentId', [None])[0]        




In [62]:
# Creates an instance of a speech config with specified subscription key and service region.
speech_config = speechsdk.SpeechConfig(subscription=CNV_KEY, region=CNV_REGION)
speech_config.endpoint_id = get_deployment_id(CNV_API)
speech_config.speech_synthesis_voice_name = "MeralNeural"
speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3)

text = "Hi, this is my custom voice. My name is Meral!"
file_name = "sample.wav"

# using the default speaker as audio output.
# audio_output_config = speechsdk.audio.AudioOutputConfig(filename=file_name)
audio_output_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_output_config)

result = speech_synthesizer.speak_text_async(text).get()
# Check result
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
    print("Speech synthesized for text [{}], and the audio was saved to [{}]".format(text, file_name))
elif result.reason == speechsdk.ResultReason.Canceled:
    cancellation_details = result.cancellation_details
    print("Speech synthesis canceled: {}".format(cancellation_details.reason))
    if cancellation_details.reason == speechsdk.CancellationReason.Error:
        print("Error details: {}".format(cancellation_details.error_details))


Audio(result.audio_data)

Speech synthesized for text [Hi, this is my custom voice. My name is Meral!], and the audio was saved to [sample.wav]


In [63]:
# use the following text for input
'''
Here's some Python code that uses the IPython.display module to display a music 
play control in a Jupyter notebook: Make sure to replace 'music_file.mp3' 
with the path to your own music file.
'''


# read user input
print("Enter text to synthesize: ")
text = input("Enter text to synthesize: ")
Audio(speech_synthesizer.speak_text_async(text).get().audio_data)

Enter text to synthesize: 
