Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Azure Cognitive Services voices support #177

Draft
wants to merge 4 commits into
base: firefox
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions custom-voices.html
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
</style>

<script src="js/jquery-3.1.1.min.js"></script>
<script src="js/microsoft.cognitiveservices.speech.sdk.bundle-min.js"></script>
<script src="js/defaults.js"></script>
<script src="js/custom-voices.js"></script>
</head>
Expand Down Expand Up @@ -45,6 +46,30 @@ <h2>Enable Custom Voices</h2>
</div>
</div>

<div class="card">
<div class="card-header">
Enter credentials to enable Azure Cognitive Services TTS voices.
</div>
<div class="card-body">
<form>
<div class="form-group">
<input type="text" class="form-control" id="azure-tts-key" placeholder="Subscription key" />
</div>
<div class="form-group">
<input type="text" class="form-control" id="azure-tts-region" placeholder="Region" />
</div>
<div class="form-group">
<button type="button" class="btn btn-primary" id="azure-save-button">Save</button>
</div>
<div class="form-group">
<img id="azure-progress" class="status progress" src="img/loading.gif" />
<div id="azure-success" class="status alert alert-success"></div>
<div id="azure-error" class="status alert alert-danger"></div>
</div>
</form>
</div>
</div>

<div class="card">
<div class="card-header">
Enter GCP API key to enable Google Wavenet voices.
Expand Down
51 changes: 50 additions & 1 deletion js/custom-voices.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

$(function() {
getSettings(["awsCreds", "gcpCreds", "ibmCreds"])
getSettings(["awsCreds", "gcpCreds", "ibmCreds", "azureCreds"])
.then(function(items) {
if (items.awsCreds) {
$("#aws-access-key-id").val(obfuscate(items.awsCreds.accessKeyId));
Expand All @@ -13,11 +13,16 @@ $(function() {
$("#ibm-api-key").val(obfuscate(items.ibmCreds.apiKey));
$("#ibm-url").val(obfuscate(items.ibmCreds.url));
}
if (items.azureCreds) {
$("#azure-tts-key").val(obfuscate(items.azureCreds.subKey));
$("#azure-tts-region").val(obfuscate(items.azureCreds.region));
}
})
$(".status").hide();
$("#aws-save-button").click(awsSave);
$("#gcp-save-button").click(gcpSave);
$("#ibm-save-button").click(ibmSave);
$("#azure-save-button").click(azureSave);
})

function obfuscate(key) {
Expand Down Expand Up @@ -67,6 +72,50 @@ function testAws(accessKeyId, secretAccessKey) {
}


function azureSave() {
$(".status").hide();
var subKey = $("#azure-tts-key").val().trim();
var region = $("#azure-tts-region").val().trim();
if (subKey && region) {
$("#azure-progress").show();
testAzure(subKey, region);
}
else if (!subKey && !region) {
clearSettings(["azureCreds"])
.then(function() {
$("#azure-success").text("Azure Cognitive Services voices are disabled.").show();
})
}
else {
$("#azure-error").text("Missing required fields.").show();
}
}

function testAzure(subKey, region) {
const speechConfig = SpeechSDK.SpeechConfig.fromSubscription(subKey, region);
const audioConfig = SpeechSDK.AudioConfig.fromDefaultSpeakerOutput();

const synthesizer = new SpeechSDK.SpeechSynthesizer(speechConfig, audioConfig);
synthesizer.speakTextAsync("Azure Congitive Services test",
result => {
if (result) {
console.log(JSON.stringify(result));
}
synthesizer.close();
$("#azure-progress").hide();
updateSettings({azureCreds: {subscriptionKey: subKey, region: region}});
$("#azure-success").text("Azure Cognitive Services voices are enabled.").show();
$("#azure-tts-key").val(obfuscate(subKey));
$("#azure-tts-region").val(obfuscate(region));
},
error => {
console.log(error);
synthesizer.close();
$("#azure-progress").hide();
$("#azure-error").text("Test failed: " + err.message).show();
});
}

function gcpSave() {
$(".status").hide();
var apiKey = $("#gcp-api-key").val().trim();
Expand Down
9 changes: 7 additions & 2 deletions js/defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ function setState(key, value) {
* VOICES
*/
function getVoices() {
return getSettings(["awsCreds", "gcpCreds"])
return getSettings(["awsCreds", "gcpCreds", "azureCreds"])
.then(function(settings) {
return Promise.all([
browserTtsEngine.getVoices(),
Expand All @@ -112,6 +112,7 @@ function getVoices() {
settings.awsCreds ? amazonPollyTtsEngine.getVoices() : [],
settings.gcpCreds ? googleWavenetTtsEngine.getVoices() : googleWavenetTtsEngine.getFreeVoices(),
ibmWatsonTtsEngine.getVoices(),
settings.azureCreds ? azureTtsEngine.getVoices() : []
])
})
.then(function(arr) {
Expand All @@ -135,6 +136,10 @@ function isAmazonCloud(voice) {
return /^Amazon /.test(voice.voiceName);
}

function isAzure(voice) {
return /^Azure /.test(voice.voiceName);
}

function isMicrosoftCloud(voice) {
return /^Microsoft /.test(voice.voiceName) && voice.voiceName.indexOf(' - ') == -1;
}
Expand All @@ -156,7 +161,7 @@ function isIbmWatson(voice) {
}

function isRemoteVoice(voice) {
return isAmazonCloud(voice) || isMicrosoftCloud(voice) || isOpenFPT(voice) || isGoogleTranslate(voice) || isGoogleWavenet(voice) || isAmazonPolly(voice) || isIbmWatson(voice);
return isAmazonCloud(voice) || isAzure(voice) || isMicrosoftCloud(voice) || isOpenFPT(voice) || isGoogleTranslate(voice) || isGoogleWavenet(voice) || isAmazonPolly(voice) || isIbmWatson(voice);
}

function isPremiumVoice(voice) {
Expand Down
8 changes: 8 additions & 0 deletions js/microsoft.cognitiveservices.speech.sdk.bundle-min.js

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions js/speech.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ function Speech(texts, options) {
})
}
if (isAmazonPolly(options.voice)) return amazonPollyTtsEngine;
if (isAzure(options.voice)) return azureTtsEngine;
if (isGoogleWavenet(options.voice)) return googleWavenetTtsEngine;
if (isIbmWatson(options.voice)) return ibmWatsonTtsEngine;
if (isRemoteVoice(options.voice)) return remoteTtsEngine;
Expand Down
157 changes: 157 additions & 0 deletions js/tts-engines.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ var googleTranslateTtsEngine = new GoogleTranslateTtsEngine();
var amazonPollyTtsEngine = new AmazonPollyTtsEngine();
var googleWavenetTtsEngine = new GoogleWavenetTtsEngine();
var ibmWatsonTtsEngine = new IbmWatsonTtsEngine();
var azureTtsEngine = new AzureTtsEngine();


/*
Expand Down Expand Up @@ -38,6 +39,162 @@ interface TtsEngine {
}
*/

function AzureTtsEngine() {
var audio = document.createElement("AUDIO");
var prefetchAudio;
var isSpeaking = false;
var speakPromise;
var synthesizer;
this.speak = function(utterance, options, onEvent) {
if (!options.volume) options.volume = 1;
if (!options.rate) options.rate = 1;
if (!options.pitch) options.pitch = 1;
audio.pause();
audio.volume = options.volume;
audio.defaultPlaybackRate = options.rate;
audio.onplay = function() {
onEvent({type: 'start', charIndex: 0});
isSpeaking = true;
};
audio.onended = function() {
onEvent({type: 'end', charIndex: utterance.length});
isSpeaking = false;
};
audio.onerror = function() {
onEvent({type: "error", errorMessage: audio.error.message});
isSpeaking = false;
};
speakPromise = Promise.resolve()
.then(function() {
if (prefetchAudio && prefetchAudio[0] == utterance && prefetchAudio[1] == options) return prefetchAudio[2];
else return getAudioUrl(utterance, options.lang, options.voice, options.pitch);
})
.then(function(url) {
audio.src = url;
return audio.play();
})
.catch(function(err) {
onEvent({
type: "error",
errorMessage: err.name == "NotAllowedError" ? JSON.stringify({code: "error_user_gesture_required"}) : err.message
})
})
};
this.isSpeaking = function(callback) {
callback(isSpeaking);
};
this.pause =
this.stop = function() {
speakPromise.then(function() {audio.pause()});
};
this.resume = function() {
audio.play();
};
this.setNextStartTime = function() {
};
this.getVoices = function() {
return voices;
/*getSettings(["pollyVoices"])
.then(function(items) {
if (!items.pollyVoices || Date.now()-items.pollyVoices[0].ts > 24*3600*1000) updateVoices();
return items.pollyVoices || voices;
})*/
}
function updateVoices() {
// TODO - using list API: get access token, list voices, turn result into compatible format
// https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech#get-a-list-of-voices
/*ajaxGet(config.serviceUrl + "/read-aloud/list-voices/amazon")
.then(JSON.parse)
.then(function(list) {
list[0].ts = Date.now();
updateSettings({pollyVoices: list});
})*/
}
function getAudioUrl(text, lang, voice, pitch) {
assert(text && lang && voice && pitch != null);
/*var matches = voice.voiceName.match(/^Amazon .* \((\w+)\)( \+\w+)?$/);
var voiceId = matches[1];
var style = matches[2] && matches[2].substr(2);*/

return getSynthesizer()
.then(function(synthesizer) {
synthesizer.speakTextAsync(
text,
result => {
//const audioData = result.audioData;
URL.createObjectURL(result.audioData);
console.log(`Audio data byte size: ${audioData.byteLength}.`)
synthesizer.close();
},
error => {
console.log(error);
synthesizer.close();
});
});
}
function getSynthesizer() {
//return synthesizer || (synthesizer = createSynthesizer());
// TODO figure out how to reuse synthesizer object
return createSynthesizer();
}
function createSynthesizer() {
return getSettings(["azureCreds"])
.then(function(items) {
if (!items.azureCreds) throw new Error("Missing Azure credentials");
const speechConfig = SpeechSDK.SpeechConfig.fromSubscription(items.azureCreds.subscriptionKey, items.azureCreds.region);
return new SpeechSDK.SpeechSynthesizer(speechConfig);
})
}
/*function getOpts(text, voiceId, style) {
switch (style) {
case "newscaster":
return {
OutputFormat: "mp3",
Text: '<speak><amazon:domain name="news">' + escapeXml(text) + '</amazon:domain></speak>',
TextType: "ssml",
VoiceId: voiceId,
Engine: "neural"
}
case "conversational":
return {
OutputFormat: "mp3",
Text: '<speak><amazon:domain name="conversational">' + escapeXml(text) + '</amazon:domain></speak>',
TextType: "ssml",
VoiceId: voiceId,
Engine: "neural"
}
case "neural":
return {
OutputFormat: "mp3",
Text: text,
VoiceId: voiceId,
Engine: "neural"
}
default:
return {
OutputFormat: "mp3",
Text: text,
VoiceId: voiceId
}
}
}
function escapeXml(unsafe) {
return unsafe.replace(/[<>&'"]/g, function (c) {
switch (c) {
case '<': return '&lt;';
case '>': return '&gt;';
case '&': return '&amp;';
case '\'': return '&apos;';
case '"': return '&quot;';
}
})
}*/
var voices = [
{"voiceName":"Azure default voice","lang":"en-US","gender":"female"}
]
}


function BrowserTtsEngine() {
this.speak = function(text, options, onEvent) {
brapi.tts.speak(text, {
Expand Down
3 changes: 2 additions & 1 deletion manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@
"js/tts-engines.js",
"js/speech.js",
"js/document.js",
"js/events.js"
"js/events.js",
"js/microsoft.cognitiveservices.speech.sdk.bundle-min.js"
],
"persistent": false
},
Expand Down
1 change: 1 addition & 0 deletions options.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
<link rel="stylesheet" type="text/css" href="css/options.css">

<script src="js/jquery-3.1.1.min.js"></script>
<script src="js/microsoft.cognitiveservices.speech.sdk.bundle-min.js"></script>
<script src="js/defaults.js"></script>
<script src="js/tts-engines.js"></script>
<script src="js/options.js"></script>
Expand Down