In [1]:
import os, requests, time, sys
from xml.etree import ElementTree
from datetime import datetime
from random import randint
import json

In [2]:
class TextToSpeech(object):
    
    def __init__(self, subscription_key):
        self.subscription_key = subscription_key
        self.text = None   ##  the text to be converted to audio -- defaulting the value to None
        self.timestr = time.strftime("%Y%m%d-%H%M%S")
        self.access_token = None
        self.access_token_startTime = None
    
    #The TTS endpoint requires an access token. This method exchanges your
    #subscription key for an access token that is valid for ten minutes.
    def get_token(self):
        #fetch_token_url = "https://westus.api.cognitive.microsoft.com/sts/v1.0/issueToken"
        fetch_token_url = "https://westeurope.api.cognitive.microsoft.com/sts/v1.0/issueToken"
        headers = {
            'Ocp-Apim-Subscription-Key': self.subscription_key
        }
        response = requests.post(fetch_token_url, headers=headers)
        self.access_token = str(response.text)
        self.access_token_startTime = time.strftime("%Y%m%d-%H%M%S")
    
    def save_audio(self, inVoiceTypeShortName = r'', _outWavFile = r''):
        #### Inputs:
        ####       the ShortName of the voice type to be used.
        ####       the file number to include in the filename.
        ####       the absolute path where to save the audio file.
        #### Returns:
        ####       the status code (should be 200 if all good).
        ####       the audio file name created.
        base_url = 'https://westeurope.tts.speech.microsoft.com/'
        path = 'cognitiveservices/v1'
        constructed_url = base_url + path
        headers = {
            'Authorization': 'Bearer ' + self.access_token,
            'Content-Type': 'application/ssml+xml',
            'X-Microsoft-OutputFormat': 'riff-16khz-16bit-mono-pcm', # Deepspeech needs 16kHz, mono, PCM encoded wav files
            'User-Agent': 'ThesisWorkSpeech1'
        }
        xml_body = ElementTree.Element('speak', version='1.0')
        xml_body.set('{http://www.w3.org/XML/1998/namespace}lang', 'en-us')
        voice = ElementTree.SubElement(xml_body, 'voice')
        voice.set('{http://www.w3.org/XML/1998/namespace}lang', 'en-US')
        #voice.set('name', 'en-US-Guy24kRUS') # Short name for 'Microsoft Server Speech Text to Speech Voice (en-US, Guy24KRUS)'
        voice.set('name', inVoiceTypeShortName) # Short name for 'Microsoft Server Speech Text to Speech Voice
        voice.text = self.text
        body = ElementTree.tostring(xml_body)
        ## sometimes azure response = 503
        ##    https://docs.microsoft.com/en-us/rest/api/searchservice/http-status-codes
        ##    Solution: make API call up to 3 times with random delay
        for _ in range(3):
            response = requests.post(constructed_url, headers=headers, data=body)
            if response.status_code != 503:
                break
            sleep503Time = randint(3, 10)
            print(f"\nAure response = 503, so sleeping for {sleep503Time} seconds.\n")
            time.sleep(sleep503Time)
        
        #If a success response is returned, then the binary audio is written to the file.
        outWavFile = '' # if conversion failed then this will remain empty string
        # if the conversion is successful then status code will be 200.
        if response.status_code == 200:
            ## the wav file is created at the location sent during function call
            ##     example of how filename will look: azureSTT_7_en-CA-HeatherRUS_20191119-121233.wav
            outWavFile = _outWavFile
            with open(_outWavFile, 'wb') as audio:
                audio.write(response.content)
                print(f"\nSUCCESS file created here: {outWavFile}")
        else:
            print(f"\nFAILED : response.status_code: {str(response.status_code)}")
        
        return response.status_code, outWavFile
    
    def get_voices_list(self):
        #base_url = 'https://westus.tts.speech.microsoft.com/'
        base_url = 'https://westeurope.tts.speech.microsoft.com/'
        path = 'cognitiveservices/voices/list'
        constructed_url = base_url + path
        headers = {
            'Authorization': 'Bearer ' + self.access_token,
        }
        response = requests.get(constructed_url, headers=headers)
        if response.status_code == 200:
            print(f"\nAvailable voices:\n{response.text}")
        else:
            print(f"\nStatus code: {str(response.status_code)}\nSomething went wrong. Check your subscription key and headers.\n")
        return response.text
    
    def set_text_to_convert(self, inText):
        if isinstance(inText, str):
            self.tts = inText
            return True
        else:
            return False
    #

In [3]:
## create the object for the TTS processing
app = TextToSpeech('xxxxxxxxxxxxxxx')  ## key 1, RG=RGMasterThesis1, Resource=RGMasterThesis1
app.get_token()
print(f"\nFirst time Access Token = \n{app.access_token}")
print(f"\nFirst time Access Token Start Time = \n{app.access_token_startTime}\n")


First time Access Token = 
eyJhbGciOiJodHRwOi8vd3d3LnczLm9yZy8yMDAxLzA0L3htbGRzaWctbW9yZSNobWFjLXNoYTI1NiIsInR5cCI6IkpXVCJ9.eyJyZWdpb24iOiJ3ZXN0ZXVyb3BlIiwic3Vic2NyaXB0aW9uLWlkIjoiNGUyMzBhMzVmMjVmNDQ3YTlmOGYxODZmNmE3YThiYjIiLCJwcm9kdWN0LWlkIjoiU3BlZWNoU2VydmljZXMuRjAiLCJjb2duaXRpdmUtc2VydmljZXMtZW5kcG9pbnQiOiJodHRwczovL2FwaS5jb2duaXRpdmUubWljcm9zb2Z0LmNvbS9pbnRlcm5hbC92MS4wLyIsImF6dXJlLXJlc291cmNlLWlkIjoiL3N1YnNjcmlwdGlvbnMvYTBhOGI0MDQtMWZhOS00ZmI4LTljMjgtZWEwMTRlNDk0OTk5L3Jlc291cmNlR3JvdXBzL1JHTWFzdGVyVGhlc2lzMS9wcm92aWRlcnMvTWljcm9zb2Z0LkNvZ25pdGl2ZVNlcnZpY2VzL2FjY291bnRzL1JHTWFzdGVyVGhlc2lzMSIsInNjb3BlIjoic3BlZWNoc2VydmljZXMiLCJhdWQiOiJ1cm46bXMuc3BlZWNoc2VydmljZXMud2VzdGV1cm9wZSIsImV4cCI6MTU5NjY3MDY2NSwiaXNzIjoidXJuOm1zLmNvZ25pdGl2ZXNlcnZpY2VzIn0.WonI6aYxZqFzIQoMqd9Tn5RkvgFullBEnuqRn9ovDG0

First time Access Token Start Time = 
20200806-012745



In [17]:
## CHECKED THE TYPES OF VOICES THAT I CAN USE
## view the list of short names available
#response_info = app.get_voices_list()
#print(f"{response_info}")

#voice_types_list_ALL = json.loads(response_info)

#english_short_names_list = []
#for voice_info in voice_types_list_ALL:
#    if voice_info["Locale"].startswith(r"en-") and voice_info["VoiceType"] == r"Standard":
#        english_short_names_list.append(voice_info["ShortName"])
#print(f"{english_short_names_list}")


## short names of voice types - English i.e. Locale starts with "en-", AND
##                              Voicetype=Standard
#voiceTypeShortNameList = [
#    'en-AU-Catherine', 'en-AU-HayleyRUS', 'en-CA-HeatherRUS',
#    'en-CA-Linda', 'en-GB-George', 'en-GB-HazelRUS',
#    'en-GB-Susan', 'en-IE-Sean', 'en-IN-Heera',
#    'en-IN-PriyaRUS', 'en-IN-Ravi', 'en-US-AriaRUS',
#    'en-US-BenjaminRUS', 'en-US-GuyRUS', 'en-US-ZiraRUS']

['en-AU-Catherine', 'en-AU-HayleyRUS', 'en-CA-HeatherRUS', 'en-CA-Linda', 'en-GB-George', 'en-GB-HazelRUS', 'en-GB-Susan', 'en-IE-Sean', 'en-IN-Heera', 'en-IN-PriyaRUS', 'en-IN-Ravi', 'en-US-AriaRUS', 'en-US-BenjaminRUS', 'en-US-GuyRUS', 'en-US-ZiraRUS']


In [None]:
## short names of voice types - English i.e. Locale starts with "en-", AND
##                              Voicetype=Standard
voiceTypeShortNameList = [
    'en-AU-Catherine', 'en-AU-HayleyRUS', 'en-CA-HeatherRUS',
    'en-CA-Linda', 'en-GB-George', 'en-GB-HazelRUS',
    'en-GB-Susan', 'en-IE-Sean', 'en-IN-Heera',
    'en-IN-PriyaRUS', 'en-IN-Ravi', 'en-US-AriaRUS',
    'en-US-BenjaminRUS', 'en-US-GuyRUS', 'en-US-ZiraRUS']

In [None]:
textToConvert = r'blab blah
if not app.set_text_to_convert(textToConvert):
    print(f"FATAL ERROR")

In [None]:
appSaveAudioRespCode, savedWavFilename = app.save_audio( voiceTypeShortName, fileNumber, outWavFilesPath )
if appSaveAudioRespCode != 200:
     print(f"\n\nERROR audio conversion. Azure response Status Code = {appSaveAudioRespCode}\n\n")