Skip to content

Commit

Permalink
add Azure TTS support
Browse files Browse the repository at this point in the history
  • Loading branch information
ken107 committed Jan 18, 2024
1 parent a119154 commit 6db16a8
Show file tree
Hide file tree
Showing 6 changed files with 197 additions and 18 deletions.
24 changes: 24 additions & 0 deletions custom-voices.html
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,30 @@ <h3 class="mt-3">Enable Custom Voices</h3>
</div>
</div>

<div class="card">
<div class="card-header">
Enter SpeechRegion and SpeechKey to enable Azure voices.
</div>
<div class="card-body">
<form>
<div class="form-group">
<div class="input-group">
<input type="text" class="form-control" id="azure-region" placeholder="SPEECH_REGION">
<input type="text" class="form-control" id="azure-key" placeholder="SPEECH_KEY">
<div class="input-group-append">
<button type="button" class="btn btn-primary" id="azure-save-button">Save</button>
</div>
</div>
</div>
<div class="form-group">
<img id="azure-progress" class="status progress" src="img/loading.gif">
<div id="azure-success" class="status alert alert-success"></div>
<div id="azure-error" class="status alert alert-danger"></div>
</div>
</form>
</div>
</div>

<div class="mt-4 mb-3">
<a href="options.html" class="btn btn-secondary">Back to Options</a>
</div>
Expand Down
41 changes: 40 additions & 1 deletion js/custom-voices.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

$(function() {
getSettings(["awsCreds", "gcpCreds", "ibmCreds", "rivaCreds", "openaiCreds"])
getSettings(["awsCreds", "gcpCreds", "ibmCreds", "rivaCreds", "openaiCreds", "azureCreds"])
.then(function(items) {
if (items.awsCreds) {
$("#aws-access-key-id").val(obfuscate(items.awsCreds.accessKeyId));
Expand All @@ -20,13 +20,18 @@ $(function() {
if (items.openaiCreds) {
$("#openai-api-key").val(obfuscate(items.openaiCreds.apiKey))
}
if (items.azureCreds) {
$("#azure-region").val(items.azureCreds.region)
$("#azure-key").val(obfuscate(items.azureCreds.key))
}
})
$(".status").hide();
$("#aws-save-button").click(awsSave);
$("#gcp-save-button").click(gcpSave);
$("#ibm-save-button").click(ibmSave);
$("#riva-save-button").click(rivaSave);
$("#openai-save-button").click(openaiSave)
$("#azure-save-button").click(azureSave)
})

function obfuscate(key) {
Expand Down Expand Up @@ -218,3 +223,37 @@ async function testOpenai(apiKey) {
const res = await fetch("https://api.openai.com/v1/models", {headers: {"Authorization": "Bearer " + apiKey}})
if (!res.ok) throw await res.json().then(x => x.error)
}



async function azureSave() {
$(".status").hide()
const region = $("#azure-region").val().trim()
const key = $("#azure-key").val().trim()
if (region && key) {
$("#azure-progress").show()
try {
await testAzure(region, key)
await updateSettings({azureCreds: {region, key}})
$("#azure-success").text("Azure voices are enabled.").show()
$("#azure-key").val(obfuscate(key))
}
catch (err) {
$("#azure-error").text("Test failed: " + err.message).show()
}
finally {
$("#azure-progress").hide()
}
}
else if (!region && !key) {
await clearSettings(["azureCreds"])
$("#azure-success").text("IBM Watson voices are disabled.").show()
}
else {
$("#azure-error").text("Missing required fields.").show()
}
}

async function testAzure(region, key) {
await azureTtsEngine.fetchVoices(region, key)
}
21 changes: 19 additions & 2 deletions js/defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ function clearState(key) {
*/
function getVoices(opts) {
if (!opts) opts = {}
return getSettings(["awsCreds", "gcpCreds", "openaiCreds"])
return getSettings(["awsCreds", "gcpCreds", "openaiCreds", "azureCreds"])
.then(function(settings) {
return Promise.all([
browserTtsEngine.getVoices(),
Expand All @@ -141,6 +141,7 @@ function getVoices(opts) {
nvidiaRivaTtsEngine.getVoices(),
phoneTtsEngine.getVoices(),
settings.openaiCreds ? openaiTtsEngine.getVoices() : [],
settings.azureCreds ? azureTtsEngine.getVoices() : [],
])
})
.then(function(arr) {
Expand Down Expand Up @@ -204,12 +205,16 @@ function isOpenai(voice) {
return /^ChatGPT /.test(voice.voiceName);
}

function isAzure(voice) {
return /^Azure /.test(voice.voiceName);
}

function isUseMyPhone(voice) {
return voice.isUseMyPhone == true
}

function isRemoteVoice(voice) {
return isAmazonCloud(voice) || isMicrosoftCloud(voice) || isReadAloudCloud(voice) || isGoogleTranslate(voice) || isGoogleWavenet(voice) || isAmazonPolly(voice) || isIbmWatson(voice) || isNvidiaRiva(voice);
return isAmazonCloud(voice) || isMicrosoftCloud(voice) || isReadAloudCloud(voice) || isGoogleTranslate(voice) || isGoogleWavenet(voice) || isAmazonPolly(voice) || isIbmWatson(voice) || isNvidiaRiva(voice) || isOpenai(voice) || isAzure(voice);
}

function isPremiumVoice(voice) {
Expand Down Expand Up @@ -923,6 +928,18 @@ function removeAllAttrs(el, recursive) {
if (recursive) for (const child of el.children) removeAllAttrs(child, true)
}

function escapeXml(unsafe) {
return unsafe.replace(/[<>&'"]/g, function (c) {
switch (c) {
case '<': return '&lt;';
case '>': return '&gt;';
case '&': return '&amp;';
case '\'': return '&apos;';
case '"': return '&quot;';
}
})
}

var languageTable = (function() {
const nameFromCode = new Map([
['af', 'Afrikaans'],
Expand Down
1 change: 1 addition & 0 deletions js/speech.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ function Speech(texts, options) {
this.gotoEnd = gotoEnd;

function pickEngine() {
if (isAzure(options.voice)) return azureTtsEngine;
if (isOpenai(options.voice)) return openaiTtsEngine;
if (isUseMyPhone(options.voice)) return phoneTtsEngine;
if (isNvidiaRiva(options.voice)) return nvidiaRivaTtsEngine;
Expand Down
126 changes: 112 additions & 14 deletions js/tts-engines.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ var ibmWatsonTtsEngine = new IbmWatsonTtsEngine();
var nvidiaRivaTtsEngine = new NvidiaRivaTtsEngine();
var phoneTtsEngine = new PhoneTtsEngine();
var openaiTtsEngine = new OpenaiTtsEngine();
var azureTtsEngine = new AzureTtsEngine();


/*
Expand Down Expand Up @@ -604,17 +605,6 @@ function AmazonPollyTtsEngine() {
}
}
}
function escapeXml(unsafe) {
return unsafe.replace(/[<>&'"]/g, function (c) {
switch (c) {
case '<': return '&lt;';
case '>': return '&gt;';
case '&': return '&amp;';
case '\'': return '&apos;';
case '"': return '&quot;';
}
})
}
var voices = [
{"voiceName":"AmazonPolly Turkish (Filiz)","lang":"tr-TR","gender":"female"},
{"voiceName":"AmazonPolly Swedish (Astrid)","lang":"sv-SE","gender":"female"},
Expand Down Expand Up @@ -1031,9 +1021,13 @@ function GoogleWavenetTtsEngine() {

function IbmWatsonTtsEngine() {
var isSpeaking = false;
var audio;
var audio, prefetchAudio;
this.speak = function(utterance, options, onEvent) {
const urlPromise = getAudioUrl(utterance, options.voice)
const urlPromise = Promise.resolve()
.then(() => {
if (prefetchAudio && prefetchAudio[0] == utterance && prefetchAudio[1] == options) return prefetchAudio[2]
else return getAudioUrl(utterance, options.voice)
})
audio = playAudio(urlPromise, options)
audio.startPromise
.then(() => {
Expand All @@ -1058,7 +1052,14 @@ function IbmWatsonTtsEngine() {
this.resume = function() {
return audio.resume()
};
this.prefetch = function(utterance, options) {
this.prefetch = async function(utterance, options) {
try {
const url = await getAudioUrl(utterance, options.voice)
prefetchAudio = [utterance, options, url]
}
catch (err) {
console.error(err)
}
};
this.setNextStartTime = function() {
};
Expand Down Expand Up @@ -1406,3 +1407,100 @@ function OpenaiTtsEngine() {
{"voiceName":"ChatGPT English (shimmer)","lang":"en-US","gender":"female"},
]
}


function AzureTtsEngine() {
var isSpeaking = false;
var audio, prefetchAudio;
this.speak = function(utterance, options, onEvent) {
const urlPromise = Promise.resolve()
.then(() => {
if (prefetchAudio && prefetchAudio[0] == utterance && prefetchAudio[1] == options) return prefetchAudio[2]
else return getAudioUrl(utterance, options.lang, options.voice)
})
audio = playAudio(urlPromise, options)
audio.startPromise
.then(() => {
onEvent({type: "start", charIndex: 0})
isSpeaking = true;
})
.catch(function(err) {
onEvent({type: "error", error: err})
})
audio.endPromise
.then(() => onEvent({type: "end", charIndex: utterance.length}),
err => onEvent({type: "error", error: err}))
.finally(() => isSpeaking = false)
};
this.isSpeaking = function(callback) {
callback(isSpeaking);
};
this.pause =
this.stop = function() {
audio.pause()
};
this.resume = function() {
return audio.resume()
};
this.prefetch = async function(utterance, options) {
try {
const url = await getAudioUrl(utterance, options.lang, options.voice)
prefetchAudio = [utterance, options, url]
}
catch (err) {
console.error(err)
}
};
this.setNextStartTime = function() {
};
this.getVoices = async function() {
try {
const {azureCreds, azureVoices} = await getSettings(["azureCreds", "azureVoices"])
if (!azureCreds) return []
if (azureVoices && azureVoices.expire > Date.now()) return azureVoices.list
const list = await this.fetchVoices(azureCreds.region, azureCreds.key)
await updateSettings({azureVoices: {list, expire: Date.now() + 24*3600*1000}})
return list
}
catch (err) {
console.error(err)
return []
}
}
this.fetchVoices = async function(region, key) {
const res = await fetch(`https://${region}.tts.speech.microsoft.com/cognitiveservices/voices/list`, {
method: "GET",
headers: {
"Ocp-Apim-Subscription-Key": key,
}
})
if (!res.ok) throw new Error("Server return " + res.status)
const voices = await res.json()
return voices.map(item => {
const name = item.ShortName.split("-")[2]
return {
voiceName: "Azure " + item.LocaleName + " - " + name,
lang: item.Locale,
gender: item.Gender == "Male" ? "male" : "female",
}
})
}
async function getAudioUrl(text, lang, voice) {
const matches = voice.voiceName.match(/^Azure .* - (\w+)$/)
const voiceName = voice.lang + "-" + matches[1]
const {azureCreds} = await getSettings(["azureCreds"])
const {region, key} = azureCreds
const res = await fetch(`https://${region}.tts.speech.microsoft.com/cognitiveservices/v1`, {
method: "POST",
headers: {
"Ocp-Apim-Subscription-Key": key,
"Content-Type": "application/ssml+xml",
"X-Microsoft-OutputFormat": "ogg-48khz-16bit-mono-opus",
},
body: `<speak version='1.0' xml:lang='${lang}'><voice name='${voiceName}'>${escapeXml(text)}</voice></speak>`
})
if (!res.ok) throw new Error("Server return " + res.status)
const blob = await res.blob()
return URL.createObjectURL(blob)
}
}
2 changes: 1 addition & 1 deletion manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"name": "__MSG_extension_name__",
"short_name": "__MSG_extension_short_name__",
"description": "__MSG_extension_description__",
"version": "2.5.0",
"version": "2.8.0",
"default_locale": "en",
"minimum_chrome_version": "99",

Expand Down

0 comments on commit 6db16a8

Please sign in to comment.