ken107 · pviotti · Aug 1, 2020 · Aug 1, 2020 · Aug 1, 2020 · Aug 1, 2020
diff --git a/custom-voices.html b/custom-voices.html
@@ -14,6 +14,7 @@
   </style>
 
   <script src="js/jquery-3.1.1.min.js"></script>
+  <script src="js/microsoft.cognitiveservices.speech.sdk.bundle-min.js"></script>
   <script src="js/defaults.js"></script>
   <script src="js/custom-voices.js"></script>
 </head>
@@ -45,6 +46,30 @@ <h2>Enable Custom Voices</h2>
       </div>
     </div>
 
+    <div class="card">
+      <div class="card-header">
+        Enter credentials to enable Azure Cognitive Services TTS voices.
+      </div>
+      <div class="card-body">
+        <form>
+          <div class="form-group">
+            <input type="text" class="form-control" id="azure-tts-key" placeholder="Subscription key" />
+          </div>
+          <div class="form-group">
+            <input type="text" class="form-control" id="azure-tts-region" placeholder="Region" />
+          </div>
+          <div class="form-group">
+            <button type="button" class="btn btn-primary" id="azure-save-button">Save</button>
+          </div>
+          <div class="form-group">
+            <img id="azure-progress" class="status progress" src="img/loading.gif" />
+            <div id="azure-success" class="status alert alert-success"></div>
+            <div id="azure-error" class="status alert alert-danger"></div>
+          </div>
+        </form>
+      </div>
+    </div>
+
     <div class="card">
       <div class="card-header">
         Enter GCP API key to enable Google Wavenet voices.

diff --git a/js/custom-voices.js b/js/custom-voices.js
@@ -1,6 +1,6 @@
 
 $(function() {
-  getSettings(["awsCreds", "gcpCreds", "ibmCreds"])
+  getSettings(["awsCreds", "gcpCreds", "ibmCreds", "azureCreds"])
     .then(function(items) {
       if (items.awsCreds) {
         $("#aws-access-key-id").val(obfuscate(items.awsCreds.accessKeyId));
@@ -13,11 +13,16 @@ $(function() {
         $("#ibm-api-key").val(obfuscate(items.ibmCreds.apiKey));
         $("#ibm-url").val(obfuscate(items.ibmCreds.url));
       }
+      if (items.azureCreds) {
+        $("#azure-tts-key").val(obfuscate(items.azureCreds.subKey));
+        $("#azure-tts-region").val(obfuscate(items.azureCreds.region));
+      }
     })
   $(".status").hide();
   $("#aws-save-button").click(awsSave);
   $("#gcp-save-button").click(gcpSave);
   $("#ibm-save-button").click(ibmSave);
+  $("#azure-save-button").click(azureSave);
 })
 
 function obfuscate(key) {
@@ -67,6 +72,50 @@ function testAws(accessKeyId, secretAccessKey) {
 }
 
 
+function azureSave() {
+  $(".status").hide();
+  var subKey = $("#azure-tts-key").val().trim();
+  var region = $("#azure-tts-region").val().trim();
+  if (subKey && region) {
+    $("#azure-progress").show();
+    testAzure(subKey, region);
+  }
+  else if (!subKey && !region) {
+    clearSettings(["azureCreds"])
+      .then(function() {
+        $("#azure-success").text("Azure Cognitive Services voices are disabled.").show();
+      })
+  }
+  else {
+    $("#azure-error").text("Missing required fields.").show();
+  }
+}
+
+function testAzure(subKey, region) {
+  const speechConfig = SpeechSDK.SpeechConfig.fromSubscription(subKey, region);
+  const audioConfig = SpeechSDK.AudioConfig.fromDefaultSpeakerOutput();
+
+  const synthesizer = new SpeechSDK.SpeechSynthesizer(speechConfig, audioConfig);
+  synthesizer.speakTextAsync("Azure Congitive Services test",
+      result => {
+          if (result) {
+              console.log(JSON.stringify(result));
+          }
+          synthesizer.close();
+          $("#azure-progress").hide();
+          updateSettings({azureCreds: {subscriptionKey: subKey, region: region}});
+          $("#azure-success").text("Azure Cognitive Services voices are enabled.").show();
+          $("#azure-tts-key").val(obfuscate(subKey));
+          $("#azure-tts-region").val(obfuscate(region));
+      },
+      error => {
+          console.log(error);
+          synthesizer.close();
+          $("#azure-progress").hide();
+          $("#azure-error").text("Test failed: " + err.message).show();
+      });
+}
+
 function gcpSave() {
   $(".status").hide();
   var apiKey = $("#gcp-api-key").val().trim();

diff --git a/js/defaults.js b/js/defaults.js
@@ -103,7 +103,7 @@ function setState(key, value) {
  * VOICES
  */
 function getVoices() {
-  return getSettings(["awsCreds", "gcpCreds"])
+  return getSettings(["awsCreds", "gcpCreds", "azureCreds"])
     .then(function(settings) {
       return Promise.all([
         browserTtsEngine.getVoices(),
@@ -112,6 +112,7 @@ function getVoices() {
         settings.awsCreds ? amazonPollyTtsEngine.getVoices() : [],
         settings.gcpCreds ? googleWavenetTtsEngine.getVoices() : googleWavenetTtsEngine.getFreeVoices(),
         ibmWatsonTtsEngine.getVoices(),
+        settings.azureCreds ? azureTtsEngine.getVoices() : []
       ])
     })
     .then(function(arr) {
@@ -135,6 +136,10 @@ function isAmazonCloud(voice) {
   return /^Amazon /.test(voice.voiceName);
 }
 
+function isAzure(voice) {
+  return /^Azure /.test(voice.voiceName);
+}
+
 function isMicrosoftCloud(voice) {
   return /^Microsoft /.test(voice.voiceName) && voice.voiceName.indexOf(' - ') == -1;
 }
@@ -156,7 +161,7 @@ function isIbmWatson(voice) {
 }
 
 function isRemoteVoice(voice) {
-  return isAmazonCloud(voice) || isMicrosoftCloud(voice) || isOpenFPT(voice) || isGoogleTranslate(voice) || isGoogleWavenet(voice) || isAmazonPolly(voice) || isIbmWatson(voice);
+  return isAmazonCloud(voice) || isAzure(voice) || isMicrosoftCloud(voice) || isOpenFPT(voice) || isGoogleTranslate(voice) || isGoogleWavenet(voice) || isAmazonPolly(voice) || isIbmWatson(voice);
 }
 
 function isPremiumVoice(voice) {

diff --git a/js/microsoft.cognitiveservices.speech.sdk.bundle-min.js b/js/microsoft.cognitiveservices.speech.sdk.bundle-min.js
diff --git a/js/speech.js b/js/speech.js
@@ -38,6 +38,7 @@ function Speech(texts, options) {
         })
     }
     if (isAmazonPolly(options.voice)) return amazonPollyTtsEngine;
+    if (isAzure(options.voice)) return azureTtsEngine;
     if (isGoogleWavenet(options.voice)) return googleWavenetTtsEngine;
     if (isIbmWatson(options.voice)) return ibmWatsonTtsEngine;
     if (isRemoteVoice(options.voice)) return remoteTtsEngine;

diff --git a/js/tts-engines.js b/js/tts-engines.js
@@ -5,6 +5,7 @@ var googleTranslateTtsEngine = new GoogleTranslateTtsEngine();
 var amazonPollyTtsEngine = new AmazonPollyTtsEngine();
 var googleWavenetTtsEngine = new GoogleWavenetTtsEngine();
 var ibmWatsonTtsEngine = new IbmWatsonTtsEngine();
+var azureTtsEngine = new AzureTtsEngine();
 
 
 /*
@@ -38,6 +39,162 @@ interface TtsEngine {
 }
 */
 
+function AzureTtsEngine() {
+  var audio = document.createElement("AUDIO");
+  var prefetchAudio;
+  var isSpeaking = false;
+  var speakPromise;
+  var synthesizer;
+  this.speak = function(utterance, options, onEvent) {
+    if (!options.volume) options.volume = 1;
+    if (!options.rate) options.rate = 1;
+    if (!options.pitch) options.pitch = 1;
+    audio.pause();
+    audio.volume = options.volume;
+    audio.defaultPlaybackRate = options.rate;
+    audio.onplay = function() {
+      onEvent({type: 'start', charIndex: 0});
+      isSpeaking = true;
+    };
+    audio.onended = function() {
+      onEvent({type: 'end', charIndex: utterance.length});
+      isSpeaking = false;
+    };
+    audio.onerror = function() {
+      onEvent({type: "error", errorMessage: audio.error.message});
+      isSpeaking = false;
+    };
+    speakPromise = Promise.resolve()
+      .then(function() {
+        if (prefetchAudio && prefetchAudio[0] == utterance && prefetchAudio[1] == options) return prefetchAudio[2];
+        else return getAudioUrl(utterance, options.lang, options.voice, options.pitch);
+      })
+      .then(function(url) {
+        audio.src = url;
+        return audio.play();
+      })
+      .catch(function(err) {
+        onEvent({
+          type: "error",
+          errorMessage: err.name == "NotAllowedError" ? JSON.stringify({code: "error_user_gesture_required"}) : err.message
+        })
+      })
+  };
+  this.isSpeaking = function(callback) {
+    callback(isSpeaking);
+  };
+  this.pause =
+  this.stop = function() {
+    speakPromise.then(function() {audio.pause()});
+  };
+  this.resume = function() {
+    audio.play();
+  };
+  this.setNextStartTime = function() {
+  };
+  this.getVoices = function() {
+    return voices; 
+    /*getSettings(["pollyVoices"])
+      .then(function(items) {
+        if (!items.pollyVoices || Date.now()-items.pollyVoices[0].ts > 24*3600*1000) updateVoices();
+        return items.pollyVoices || voices;
+      })*/
+  }
+  function updateVoices() {
+    // TODO - using list API: get access token, list voices, turn result into compatible format
+    // https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech#get-a-list-of-voices
+    /*ajaxGet(config.serviceUrl + "/read-aloud/list-voices/amazon")
+      .then(JSON.parse)
+      .then(function(list) {
+        list[0].ts = Date.now();
+        updateSettings({pollyVoices: list});
+      })*/
+  }
+  function getAudioUrl(text, lang, voice, pitch) {
+    assert(text && lang && voice && pitch != null);
+    /*var matches = voice.voiceName.match(/^Amazon .* \((\w+)\)( \+\w+)?$/);
+    var voiceId = matches[1];
+    var style = matches[2] && matches[2].substr(2);*/
+
+    return getSynthesizer()
+      .then(function(synthesizer) {
+        synthesizer.speakTextAsync(
+          text,
+          result => {
+              //const audioData = result.audioData;
+              URL.createObjectURL(result.audioData);
+              console.log(`Audio data byte size: ${audioData.byteLength}.`)
+              synthesizer.close();
+          },
+          error => {
+              console.log(error);
+              synthesizer.close();
+          });
+      });
+  }
+  function getSynthesizer() {
+    //return synthesizer || (synthesizer = createSynthesizer());
+    // TODO figure out how to reuse synthesizer object
+    return createSynthesizer();
+  }
+  function createSynthesizer() {
+    return getSettings(["azureCreds"])
+      .then(function(items) {
+        if (!items.azureCreds) throw new Error("Missing Azure credentials");
+        const speechConfig = SpeechSDK.SpeechConfig.fromSubscription(items.azureCreds.subscriptionKey, items.azureCreds.region);
+        return new SpeechSDK.SpeechSynthesizer(speechConfig);
+      })
+  }
+  /*function getOpts(text, voiceId, style) {
+    switch (style) {
+      case "newscaster":
+        return {
+          OutputFormat: "mp3",
+          Text: '<speak><amazon:domain name="news">' + escapeXml(text) + '</amazon:domain></speak>',
+          TextType: "ssml",
+          VoiceId: voiceId,
+          Engine: "neural"
+        }
+      case "conversational":
+        return {
+          OutputFormat: "mp3",
+          Text: '<speak><amazon:domain name="conversational">' + escapeXml(text) + '</amazon:domain></speak>',
+          TextType: "ssml",
+          VoiceId: voiceId,
+          Engine: "neural"
+        }
+      case "neural":
+        return {
+          OutputFormat: "mp3",
+          Text: text,
+          VoiceId: voiceId,
+          Engine: "neural"
+        }
+      default:
+        return {
+          OutputFormat: "mp3",
+          Text: text,
+          VoiceId: voiceId
+        }
+    }
+  }
+  function escapeXml(unsafe) {
+    return unsafe.replace(/[<>&'"]/g, function (c) {
+      switch (c) {
+          case '<': return '&lt;';
+          case '>': return '&gt;';
+          case '&': return '&amp;';
+          case '\'': return '&apos;';
+          case '"': return '&quot;';
+      }
+    })
+  }*/
+  var voices = [
+    {"voiceName":"Azure default voice","lang":"en-US","gender":"female"}
+  ]
+}
+
+
 function BrowserTtsEngine() {
   this.speak = function(text, options, onEvent) {
     brapi.tts.speak(text, {

diff --git a/manifest.json b/manifest.json
@@ -51,7 +51,8 @@
       "js/tts-engines.js",
       "js/speech.js",
       "js/document.js",
-      "js/events.js"
+      "js/events.js",
+      "js/microsoft.cognitiveservices.speech.sdk.bundle-min.js"
     ],
     "persistent": false
   },

diff --git a/options.html b/options.html
@@ -8,6 +8,7 @@
   <link rel="stylesheet" type="text/css" href="css/options.css">
 
   <script src="js/jquery-3.1.1.min.js"></script>
+  <script src="js/microsoft.cognitiveservices.speech.sdk.bundle-min.js"></script>
   <script src="js/defaults.js"></script>
   <script src="js/tts-engines.js"></script>
   <script src="js/options.js"></script>