In [2]:
!pip install openpyxl --q

In [1]:
!pip install azure-cognitiveservices-speech

Collecting azure-cognitiveservices-speech
  Downloading azure_cognitiveservices_speech-1.29.0-py3-none-win_amd64.whl (1.5 MB)
     ---------------------------------------- 1.5/1.5 MB 9.3 MB/s eta 0:00:00
Installing collected packages: azure-cognitiveservices-speech
Successfully installed azure-cognitiveservices-speech-1.29.0


## Speech Synthesis Using the Speech SDK

First, set up some general items. Import the Speech SDK Python:

In [34]:
import azure.cognitiveservices.speech as speechsdk

Set up the subscription info for the Speech Service:

In [35]:
import os

subscription_key = os.getenv('SUBSCRIPTION_KEY')
service_region = os.getenv('SERVICE_REGION')
translator_endpoint = "https://api.cognitive.microsofttranslator.com"
speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=service_region)

In [43]:
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config)

## Reading voice parameters from SSML file

In [101]:
# ssml_string = open("ssml_test.xml", "r").read()
result = speech_synthesizer.speak_ssml_async(ssml_string).get()

stream = speechsdk.AudioDataStream(result)
# stream.save_to_wav_file("ssml_test_result.mp3")

## Language detection

In [46]:
!pip install azure.ai.textanalytics



In [50]:
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

ta_credential = AzureKeyCredential(subscription_key)

In [51]:
text_analytics_client = TextAnalyticsClient(
            endpoint="https://cogn-cc-cds-nonprod.cognitiveservices.azure.com/", 
            credential=ta_credential)

In [70]:
documents = ["jestem Roman"]
response = text_analytics_client.detect_language(documents = documents)[0]
response.primary_language.name

'Polish'

In [72]:
response.primary_language.iso6391_name

'pl'

## Process SSML files

In [1]:
import re
from pathlib import Path


def get_voice_name(file_path):
    with open(file_path, 'r') as file:
        file_content = file.read()
        pattern = r"\"ShortName\":\"([^\"]+)\""
        match = re.search(pattern, file_content)
        if match:
            voice_name = match.group(1)
            voice_name = voice_name.split("-")[-1]
            return voice_name
        else:
            raise Exception
        
        
def generate_voice_configurations(base_folder):
    voice_configurations = {}

    base_path = Path(base_folder)
    subfolders = [subfolder for subfolder in base_path.iterdir() if subfolder.is_dir()]

    for subfolder in subfolders:
        voice_configurations[subfolder.name] = {}

        files = [file for file in subfolder.glob("*.txt") if file.is_file()]

        for file in files:
            voice_name = get_voice_name(str(file))
            voice_configurations[subfolder.name][voice_name] = file

    return voice_configurations


In [130]:

import os
from dotenv import load_dotenv

import azure.cognitiveservices.speech as speechsdk
from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential
load_dotenv()

base_folder = "ssml_files"
voice_configurations = generate_voice_configurations(base_folder)
# print(voice_configurations)

# Configure Azure Speech Service
subscription_key = os.getenv('SUBSCRIPTION_KEY')
service_region = os.getenv('SERVICE_REGION')
translator_endpoint = os.getenv('TRANSLATOR_ENDPOINT')
text_analytics_endpoint = os.getenv('TEXT_ANALYTICS_ENDPOINT')

speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=service_region)
# speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=None)

# Specify the output audio configuration
output_file = "temp_generated_result.mp3"
audio_config = speechsdk.audio.AudioOutputConfig(filename=output_file)

# Create the speech synthesizer with the speech config and audio config
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

In [66]:
target_lang_code = "fr"
selected_voice = "DavisNeural"

# test_text = "Où puis-je trouver un bon restaurant/café/la plage/le centre-ville?"
test_text = "forget it"

In [135]:
def modify_ssml(ssml_string, new_text):
    pattern1 = r"<prosody([^>]*)>.*?<\/prosody>"
    pattern2 = r"<mstts:express-as([^>]*)>.*?<\/mstts:express-as>"
    pattern3 = r"<s />.*?<s />"
    pattern4 = r"<voice([^>]*)>.*?</voice>"

    match1 = re.search(pattern1, ssml_string)
    match2 = re.search(pattern2, ssml_string)
    match3 = re.search(pattern3, ssml_string)
    match4 = re.search(pattern4, ssml_string)
    if match1: 
        return re.sub(pattern1, f"<prosody\\1>{new_text}</prosody>", ssml_string)
    elif match2:
        return re.sub(pattern2, f"<mstts:express-as\\1>{new_text}</mstts:express-as>", ssml_string)
    elif match3:
        return re.sub(pattern3, f"<s />{new_text}<s />", ssml_string)
    elif match4:
        return re.sub(pattern4, f"<voice\\1>{new_text}</voice>", ssml_string)


In [136]:
ssml_string = """

<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="fr-FR"><voice name="fr-FR-AlainNeural"></voice></speak>
"""
ssml_string_modified = modify_ssml(ssml_string, "what color is it?!")

In [137]:
ssml_string_modified

'\n\n<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="fr-FR"><voice name="fr-FR-AlainNeural">what color is it?!</voice></speak>\n'

In [134]:
result = speech_synthesizer.speak_ssml(ssml_string_modified)

# Check the synthesis result
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
    print("Speech synthesized and saved to [{}]".format(output_file))

Speech synthesized and saved to [temp_generated_result.mp3]


In [92]:
from pathlib import Path


def generate_voice_configurations(base_folder):
    voice_configurations = {}

    base_path = Path(base_folder)
    subfolders = [subfolder for subfolder in base_path.iterdir() if subfolder.is_dir()]

    for subfolder in subfolders:
        voice_configurations[subfolder.name] = {}

        files = [file for file in subfolder.glob("*.txt") if file.is_file()]

        for file in files:
            voice_name = get_voice_name(str(file))
            voice_configurations[subfolder.name][voice_name] = file

    return voice_configurations

base_folder = "ssml_files"
voice_configurations = generate_voice_configurations(base_folder)
print(voice_configurations)

{'en': {'DavisNeural': WindowsPath('ssml_files/en/davis.txt'), 'GuyNeural': WindowsPath('ssml_files/en/guy.txt')}, 'es': {'IreneNeural': WindowsPath('ssml_files/es/irene.txt'), 'JorgeNeural': WindowsPath('ssml_files/es/jorge.txt')}, 'fr': {}, 'hi': {}}


In [183]:
x = voice_configurations["en"]["DavisNeural"].read_text()

In [184]:
x

'<!--ID=B7267351-473F-409D-9765-754A8EBCDE05;Version=1|{"VoiceNameToIdMapItems":[{"Id":"b5f86142-ce84-4483-8142-45db0d778add","Name":"Microsoft Server Speech Text to Speech Voice (en-US, DavisNeural)","ShortName":"en-US-DavisNeural","Locale":"en-US","VoiceType":"StandardVoice"}]}-->\n<!--ID=5B95B1CC-2C7B-494F-B746-CF22A0E779B7;Version=1|{"Locales":{"en-US":{"AutoApplyCustomLexiconFiles":[{}]},"de-DE":{"AutoApplyCustomLexiconFiles":[{}]},"fr-FR":{"AutoApplyCustomLexiconFiles":[{}]}}}-->\n<!--ID=FCB40C2B-1F9F-4C26-B1A1-CF8E67BE07D1;Version=1|{"Files":{}}-->\n<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US"><voice name="en-US-DavisNeural"><s /><mstts:express-as style="whispering" styledegree="1.1"></mstts:express-as><s /></voice></speak>'

In [108]:
# with open(r"ssml_files/es/irene.txt", 'r') as file:
#     x = file.read()

In [178]:
with open(r"ssml_files/en/davis.txt", 'r') as file:
    ssml_string_1 = file.read()

with open(r"ssml_files/en/guy.txt", 'r') as file:
    ssml_string_2 = file.read()

In [168]:
def modify_ssml_with_new_text(ssml_string, new_text):
    pattern = r"<prosody([^>]*)>.*?<\/prosody>"
    match = re.search(pattern, ssml_string)
    if match:
        modified_ssml_string = re.sub(pattern, f"<prosody\\1>{new_text}</prosody>", ssml_string)
        return modified_ssml_string
    else:
        pattern = r"<mstts:express-as([^>]*)>.*?<\/mstts:express-as>"
        modified_ssml_string = re.sub(pattern, f"<mstts:express-as\\1>{new_text}</mstts:express-as>", ssml_string)
        return modified_ssml_string

In [179]:
ssml_string_1

'<!--ID=B7267351-473F-409D-9765-754A8EBCDE05;Version=1|{"VoiceNameToIdMapItems":[{"Id":"b5f86142-ce84-4483-8142-45db0d778add","Name":"Microsoft Server Speech Text to Speech Voice (en-US, DavisNeural)","ShortName":"en-US-DavisNeural","Locale":"en-US","VoiceType":"StandardVoice"}]}-->\n<!--ID=5B95B1CC-2C7B-494F-B746-CF22A0E779B7;Version=1|{"Locales":{"en-US":{"AutoApplyCustomLexiconFiles":[{}]},"de-DE":{"AutoApplyCustomLexiconFiles":[{}]},"fr-FR":{"AutoApplyCustomLexiconFiles":[{}]}}}-->\n<!--ID=FCB40C2B-1F9F-4C26-B1A1-CF8E67BE07D1;Version=1|{"Files":{}}-->\n<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US"><voice name="en-US-DavisNeural"><s /><mstts:express-as style="whispering" styledegree="1.1"></mstts:express-as><s /></voice></speak>'

In [180]:
y = modify_ssml(ssml_string_1, "hello sweetie...")

In [182]:
result = speech_synthesizer.speak_ssml_async(y).get()

stream = speechsdk.AudioDataStream(result)

In [12]:
# # Request the list of available voices
# voices_result = speech_synthesizer.get_voices_async().get()

In [13]:
# selected_lang_code = "de"

In [14]:
voices_for_selected_lang = dict()
for voice in voices_result.voices:
    if voice.locale[:2] == selected_lang_code:
        voices_for_selected_lang[voice.local_name] = voice.short_name

In [16]:
# print(voices_result.voices[200].locale)
# print(voices_result.voices[200].local_name)

# print("-")
# print(voices_result.voices[200].short_name)
# print(voices_result.voices[200].name)

es-MX
Renata
-
es-MX-RenataNeural
Microsoft Server Speech Text to Speech Voice (es-MX, RenataNeural)


Receives a text from user input.

In [None]:
print("Type some text that you want to speak...")
text = ("Type some text that you want to speak...")

Type some text that you want to speak...


Synthesizes the received text to speech. The synthesized speech is expected to be heard on the speaker with below line executed.

In [30]:
text = "Hello Bohdanko"

In [31]:
result = speech_synthesizer.speak_text_async(text).get()

Checks the synthesis result.

In [None]:
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
    print("Speech synthesized to speaker for text [{}]".format(text))
elif result.reason == speechsdk.ResultReason.Canceled:
    cancellation_details = result.cancellation_details
    print("Speech synthesis canceled: {}".format(cancellation_details.reason))
    if cancellation_details.reason == speechsdk.CancellationReason.Error:
        if cancellation_details.error_details:
            print("Error details: {}".format(cancellation_details.error_details))
    print("Did you update the subscription info?")

Speech synthesized to speaker for text [Type some text that you want to speak...]


In [126]:
import os


speech_key, service_region = ####

speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)

# Specify the output audio configuration
output_file = "temp_generated_result.mp3"
audio_config = speechsdk.audio.AudioOutputConfig(filename=output_file)

# Create the speech synthesizer with the speech config and audio config
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

text = "hello hello!" # TEXT FROM INPUT FIELD

# Synthesize the received text to speech and save it to the output file
result = speech_synthesizer.speak_text_async(text).get()

# Check the synthesis result
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
    print("Speech synthesized and saved to [{}]".format(output_file))
elif result.reason == speechsdk.ResultReason.Canceled:
    cancellation_details = result.cancellation_details
    print("Speech synthesis canceled: {}".format(cancellation_details.reason))
    if cancellation_details.reason == speechsdk.CancellationReason.Error:
        if cancellation_details.error_details:
            print("Error details: {}".format(cancellation_details.error_details))
    print("Did you update the subscription info?")


Speech synthesized and saved to [temp_generated_result.mp3]


In [None]:
from azure.identity import DefaultAzureCredential
from azure.ai.translation.document import DocumentTranslationClient
credential = DefaultAzureCredential()

document_translation_client = DocumentTranslationClient(
    endpoint=endpoint,
    credential=credential
)

In [None]:
import requests, os, uuid, json
from dotenv import load_dotenv
load_dotenv()

True

In [95]:
# from azure.core.exceptions import HttpResponseError
# from azure.ai.translation.text import TextTranslationClient


# try:
#     text_translator = TextTranslationClient(endpoint=endpoint, credential=subscription_key)
#     supported_languages = text_translator.get_languages()

#     print("Connection to Azure Cognitive Services Translation successful!")
#     print(f"Supported languages: {', '.join(supported_languages)}")
# except HttpResponseError as e:
#     if e.error.code == "InvalidRequest":
#         print("Azure Cognitive Services Translation resource does not exist.")
#     else:
#         print("An error occurred while connecting to Azure Cognitive Services Translation.")
#     print(f"Error details: {e}")


In [4]:
import requests, uuid, json

# Add your key and endpoint
key = ####
endpoint = "https://api.cognitive.microsofttranslator.com"

# location, also known as region.
# required if you're using a multi-service or regional (not global) resource. It can be found in the Azure portal on the Keys and Endpoint page.
location = "canadacentral"

path = '/translate'
constructed_url = endpoint + path

params = {
    'api-version': '3.0',
    'from': 'en',
    'to': ['fr', 'zu']
}

headers = {
    'Ocp-Apim-Subscription-Key': key,
    # location required if you're using a multi-service or regional (not global) resource.
    'Ocp-Apim-Subscription-Region': location,
    'Content-type': 'application/json',
    'X-ClientTraceId': str(uuid.uuid4())
}

# You can pass more than one object in body.
body = [{
    'text': 'I would really like to drive your car around the block a few times!'
}]

request = requests.post(constructed_url, params=params, headers=headers, json=body)
response = request.json()

print(json.dumps(response, sort_keys=True, ensure_ascii=False, indent=4, separators=(',', ': ')))

[
    {
        "translations": [
            {
                "text": "J’aimerais vraiment conduire votre voiture autour du pâté de maisons plusieurs fois!",
                "to": "fr"
            },
            {
                "text": "Ngingathanda ngempela ukushayela imoto yakho emhlabeni block izikhathi ezimbalwa!",
                "to": "zu"
            }
        ]
    }
]


In [5]:
import requests



# Example translation request
text_to_translate = 'Hello, how are you?'

headers = {
    'Ocp-Apim-Subscription-Key': key,
    'Content-type': 'application/json',
    'Ocp-Apim-Subscription-Region': location
}

params = {
    'api-version': '3.0',
    'from': 'en',
    'to': 'es'
}

body = [{
    'text': text_to_translate
}]

response = requests.post(f'{endpoint}/translate', headers=headers, params=params, json=body)
translation = response.json()[0]['translations'][0]['text']

print(translation)


¿Hola cómo estás?


In [6]:
import os
import azure.cognitiveservices.speech as speechsdk

In [7]:
from flask import Flask, render_template, request, send_file
import os
import azure.cognitiveservices.speech as speechsdk
import base64
import docx
from dotenv import load_dotenv
import requests

app = Flask(__name__)

# Load environment variables from .env file
load_dotenv()

# Configure Azure Speech Service
subscription_key = os.getenv('SUBSCRIPTION_KEY')
service_region = os.getenv('SERVICE_REGION')
speech_config = speechsdk.SpeechConfig(subscription=subscription_key, region=service_region)

In [15]:
# Indicate that we want to translate and the API version (3.0) and the target language
path = '/translate?api-version=3.0'
# Add the target language parameter
target_language_parameter = '&to=' + target_language
# Create the full URL
constructed_url = endpoint + path + target_language_parameter

# Set up the header information, which includes our subscription key
headers = {
    'Ocp-Apim-Subscription-Key': key,
    'Ocp-Apim-Subscription-Region': location,
    'Content-type': 'application/json',
    'X-ClientTraceId': str(uuid.uuid4())
}

# Create the body of the request with the text to be translated
body = [{ 'text': original_text }]

# Make the call using post
translator_request = requests.post(constructed_url, headers=headers, json=body)
# Retrieve the JSON response
translator_response = translator_request.json()
# Retrieve the translation
translated_text = translator_response[0]['translations'][0]['text']

KeyError: 0

In [13]:
translator_response

{'error': {'code': '404', 'message': 'Resource not found'}}

In [None]:
import os
import azure.cognitiveservices.speech as speechsdk

In [2]:
import requests, uuid, json

# Add your key and endpoint


# location, also known as region.
# required if you're using a multi-service or regional (not global) resource. It can be found in the Azure portal on the Keys and Endpoint page.
location = "canadacentral"

path = '/translate'
constructed_url = endpoint + path

params = {
    'api-version': '3.0',
    'from': 'en',
    'to': ['fr', 'zu']
}

headers = {
    'Ocp-Apim-Subscription-Key': key,
    # location required if you're using a multi-service or regional (not global) resource.
    'Ocp-Apim-Subscription-Region': location,
    'Content-type': 'application/json',
    'X-ClientTraceId': str(uuid.uuid4())
}

# You can pass more than one object in body.
body = [{
    'text': 'I would really like to drive your car around the block a few times!'
}]

request = requests.post(constructed_url, params=params, headers=headers, json=body)
response = request.json()

print(json.dumps(response, sort_keys=True, ensure_ascii=False, indent=4, separators=(',', ': ')))

{
    "error": {
        "code": "404",
        "message": "Resource not found"
    }
}


In [5]:
import re

In [16]:
def enhance_with_punctuation_characters(text):
    # Regular expression to match URLs
    url_pattern = r'((?:http://|https://|www\.)\S+|(?:\w+\.\w+/\S+))'
    
    # Find all URL matches in the text
    urls = re.findall(url_pattern, text)
    
    # Replace each URL with the enhanced format and add a comma after the URL
    for url in urls:
        enhanced_url = re.sub(r'/', ',/', url)
        text = text.replace(url, enhanced_url + ",")
    
    return text

In [17]:
text = "visit wsib.ca/onlineservices to find all the ways to work with us online, 24/7."

In [18]:
enhance_with_punctuation_characters(text)

'visit wsib.ca,/onlineservices, to find all the ways to work with us online, 24/7.'

In [23]:
import re

def enhance_with_punctuation_characters(text):
    # Replace mobile numbers with spaces between them with dashes
    text = re.sub(r'\b1\s\d{3}\s\d{3}\s\d{4}\b', lambda match: match.group().replace(" ", "-"), text)
    
    # Replace Canadian governmental short numbers like "911" to "9-1-1"
    text = re.sub(r'\b911\b', '9-1-1', text)

    # Regular expression to match URLs
    url_pattern = r'((?:http://|https://|www\.)\S+|(?:\w+\.\w+/\S+))'
    
    # Find all URL matches in the text
    urls = re.findall(url_pattern, text)
    
    # Replace each URL with the enhanced format and add a comma after the URL
    for url in urls:
        enhanced_url = re.sub(r'/', ',/', url)
        text = text.replace(url, enhanced_url + ",")
    
    # Replace "WSIB" with "W S IB"
    text = text.replace("WSIB", "W S IB")
    
    return text

# Example usage
text = "Call 1 866 797 0000, or 911. Visit wsib.ca/onlineservices to find all the ways to work with us online, 24/7. WSIB is great."
enhanced_text = enhance_with_punctuation_characters(text)
print(enhanced_text)



Call 1-866-797-0000, or 9-1-1. Visit wsib.ca,/onlineservices, to find all the ways to work with us online, 24/7. W S IB is great.


In [3]:
import re

# Input text
text = "appelez Télésanté Ontario au 1-866-797-0000. Dans une situation d’urgence, composez toujours le 911."

# Step 1: Find and extract phone number patterns
matches = re.findall(r'\b(\d)-?(\d{3})-?(\d{3})-?(\d{4})\b|\b(\d) (\d{3}) (\d{3}) (\d{4})\b', text)

# Step 2: Combine the digits to form a single number without spaces or dashes
numbers = [''.join(match[:4]) if match[0] else ''.join(match[4:]) for match in matches]

# Step 3: Format the resulting numbers with breaks
formatted_numbers = ', '.join([f'<break strength="x-weak"/> {digit}' for number in numbers for digit in number])

# Replace the phone numbers in the original text with the formatted version
formatted_text = re.sub(r'\b(\d)-?(\d{3})-?(\d{3})-?(\d{4})\b|\b(\d) (\d{3}) (\d{3}) (\d{4})\b', formatted_numbers, text)

# Output the result with enhanced numbers
print(formatted_text)


appelez Télésanté Ontario au <break strength="x-weak"/> 1, <break strength="x-weak"/> 8, <break strength="x-weak"/> 6, <break strength="x-weak"/> 6, <break strength="x-weak"/> 7, <break strength="x-weak"/> 9, <break strength="x-weak"/> 7, <break strength="x-weak"/> 0, <break strength="x-weak"/> 0, <break strength="x-weak"/> 0, <break strength="x-weak"/> 0. Dans une situation d’urgence, composez toujours le 911.
