adding basic speech support

Built in response to https://developer.ibm.com/answers/questions/254586/service-interconnected-speech-to-text-dialog-text.html?utm_campaign=answers&utm_medium=email&utm_source=answers-new-question&utm_content=answers-answer-question
nfriedly · Feb 24, 2016 · fd8408a · fd8408a · jijoamt · Mar 7, 2016
1 parent 1f7fcf8
commit fd8408a
Show file tree

Hide file tree

Showing 13 changed files with 173 additions and 17 deletions.
diff --git a/.cfignore b/.cfignore
@@ -1 +1,2 @@
-node_modules
+node_modules
+.env
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 node_modules
 /dialogs/dialog-id.json
+.env
diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
-# Dialog Node.js
+# Speech & Dialog Demo App
 
-  The Dialog starter application in Node.js is a sample that demonstrates how the IBM Watson [Dialog service][service_url] works in a specific context.
+  This is an extension of the IBM Watson [Dialog service][service_url] Dialog starter application that also incorporates 
+  the [Speech JS SDK](https://github.com/watson-developer-cloud/speech-javascript-sdk) to allow for voice interactions.
 <p align="center">
 <img src="http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/img/service-gifs/dialog.gif" width="400">
 </p>

diff --git a/app.js b/app.js
@@ -16,6 +16,8 @@
 
 'use strict';
 
+require('dotenv').config({silent: true});
+
 var express  = require('express'),
   app        = express(),
   fs         = require('fs'),
@@ -24,9 +26,16 @@ var express  = require('express'),
   extend     = require('util')._extend,
   watson     = require('watson-developer-cloud');
 
+
 // Bootstrap application settings
 require('./config/express')(app);
 
+// token endpoints
+// **Warning**: these endpoints should be guarded with additional authentication & authorization for production use
+app.use('/api/speech-to-text/', require('./stt-token.js'));
+app.use('/api/text-to-speech/', require('./tts-token.js'));
+
+
 // if bluemix credentials exists, then override local
 var credentials =  extend({
   url: '<url>',

diff --git a/manifest.yml b/manifest.yml
@@ -2,11 +2,19 @@ declared-services:
   dialog-service:
     label: dialog
     plan: standard
+  stt-service:
+      label: speech_to_text
+      plan: standard
+  tts-service:
+      label: text_to_speech
+      plan: standard
 applications:
 - services:
   - dialog-service
-  name: dialog-nodejs
-  command: node app.js
+  - stt-service
+  - tts-service
+  name: speech-dialog
+  command: npm start
   path: .
   memory: 512M
   env:

diff --git a/package.json b/package.json
@@ -1,15 +1,11 @@
 {
-  "name": "DialogNodejsStarterApp",
-  "version": "0.1.8",
-  "description": "A sample nodejs app for Bluemix that use the Dialog",
-  "engines": {
-    "node": ">=0.10.38"
-  },
+  "name": "speech-dialog",
+  "version": "1.0.0",
+  "description": "A simple extension of the Watson Dialog sample app to use the Speech JS SDK",
   "repository": {
     "type": "git",
     "url": "https://github.com/watson-developer-cloud/dialog-nodejs.git"
   },
-  "author": "IBM Corp.",
   "contributors": [
     {
       "name": "James Zhang",
@@ -22,6 +18,10 @@
     {
       "name": "German Attanasio Ruiz",
       "email": "germanatt@us.ibm.com"
+    },
+    {
+      "name": "Nathan Friedly",
+      "url": "http://nfriedly.com/"
     }
   ],
   "license": "Apache-2.0",
@@ -35,9 +35,11 @@
   "dependencies": {
     "async": "^1.5.1",
     "body-parser": "~1.14.1",
+    "dotenv": "^2.0.0",
     "errorhandler": "~1.4.1",
     "express": "~4.13.3",
     "request": "^2.67.0",
+    "vcap_services": "^0.1.7",
     "watson-developer-cloud": "~1.0.6"
   }
 }
diff --git a/public/css/style.css b/public/css/style.css
@@ -1066,9 +1066,24 @@ pre[class*=" language-"] {
 
 .chat-window {
   position: relative; }
+
   .chat-window--message-input {
     padding-top: 1.5rem;
-    padding-bottom: 1.5rem; }
+    padding-bottom: 1.5rem;
+    width: calc(100% - 37px);
+  }
+  .chat-window--microphone-button {
+    width: 32px;
+    height: 68px;
+    padding: 19px 2px;
+    cursor: pointer;
+    background-color: #00B2EF;
+    position: absolute; /* this is a hack, but it gets the job done */
+    right: 0;
+  }
+  .chat-window--microphone-button.active {
+    background-color: #d74108;
+  }
 
 .chat-box {
   position: relative;

diff --git a/public/images/icons/microphone.svg b/public/images/icons/microphone.svg
diff --git a/public/index.html b/public/index.html
@@ -55,10 +55,10 @@ <h1 class="banner--service-title base--h1">
 					<a href="http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/dialog.html" class="base--a">Documentation</a>
 				</li>
 				<li class="base--li banner--service-link-item">
-					<a href="https://bluemix.net/deploy?repository=https://github.com/watson-developer-cloud/dialog-nodejs.git" class="base--a">Fork and Deploy on Bluemix</a>
+					<a href="https://bluemix.net/deploy?repository=https://github.com/nfriedly/speech-dialog.git" class="base--a">Fork and Deploy on Bluemix</a>
 				</li>
 				<li class="base--li banner--service-link-item">
-					<a href="https://github.com/watson-developer-cloud/dialog-nodejs" class="base--a">Fork on Github</a>
+					<a href="https://github.com/nfriedly/speech-dialog" class="base--a">Fork on Github</a>
 				</li>
 			</div>
 		</div>
@@ -119,6 +119,7 @@ <h2 class="base--h2">
 				</div>
 
 				<input type="text" placeholder="Type a response and hit enter" value="" autocomplete="off" class="chat-window--message-input base--text-input">
+				<img class="chat-window--microphone-button" src="/images/icons/microphone.svg" alt="Record via Microphone"/>
 			</div>
 		</div>
 
@@ -159,6 +160,7 @@ <h6 class="base--h6">Profile</h6>
 
 
 	<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
+  	<script type="text/javascript" src="js/watson-speech.min.js"></script>
   	<script type="text/javascript" src="js/demo.js"></script>
 </body>
 </html>
diff --git a/public/js/demo.js b/public/js/demo.js
@@ -26,8 +26,32 @@ $(document).ready(function () {
     $jsonPanel = $('#json-panel .base--textarea'),
     $information = $('.data--information'),
     $profile = $('.data--profile'),
-    $loading = $('.loader');
+    $loading = $('.loader'),
+    $micButton = $('.chat-window--microphone-button');
+
+  // note: these tokens expire after an hour.
+  var getSTTToken = $.ajax('/api/speech-to-text/token');
+  var getTTSToken = $.ajax('/api/text-to-speech/token');
+
+  var deactivateMicButton = $micButton.removeClass.bind($micButton, 'active');
+
+  function record() {
+    getSTTToken.then(function(token) {
+      $micButton.addClass('active');
+      WatsonSpeech.SpeechToText.recognizeMicrophone({
+        token: token,
+        continuous: false,
+        outputElement: $chatInput[0],
+        keepMicrophone: navigator.userAgent.indexOf('Firefox') > 0
+      }).promise().then(function() {
+        converse($chatInput.val());
+      })
+      .then(deactivateMicButton)
+      .catch(deactivateMicButton);
+    });
+  }
 
+  $micButton.click(record);
 
   $chatInput.keyup(function(event){
     if(event.keyCode === 13) {
@@ -67,6 +91,13 @@ $(document).ready(function () {
         var texts = dialog.conversation.response;
         var response = texts.join('&lt;br/&gt;'); // &lt;br/&gt; is <br/>
 
+        getTTSToken.then(function(token) {
+          WatsonSpeech.TextToSpeech.synthesize({
+            text: texts,
+            token: token
+          }).addEventListener('ended', record); // trigger the button again once recording stops
+        });
+
         $chatInput.show();
         $chatInput[0].focus();
 
@@ -169,4 +200,4 @@ $(document).ready(function () {
   converse();
   scrollToInput();
 
-});
+});
diff --git a/public/js/watson-speech.min.js b/public/js/watson-speech.min.js
diff --git a/stt-token.js b/stt-token.js
@@ -0,0 +1,33 @@
+'use strict';
+
+var express      = require('express'),
+  router          = express.Router(),
+  vcapServices = require('vcap_services'),
+  extend       = require('util')._extend,
+  watson       = require('watson-developer-cloud');
+
+// set up an endpoint to serve speech-to-text auth tokens
+
+// For local development, replace username and password
+var sttConfig = extend({
+  version: 'v1',
+  url: 'https://stream.watsonplatform.net/speech-to-text/api',
+  username: '<username>',
+  password: '<password>'
+}, vcapServices.getCredentials('speech_to_text'));
+
+console.log(process.env.VCAP_SERVICES);
+
+var sttAuthService = watson.authorization(sttConfig);
+
+router.get('/token', function(req, res) {
+  sttAuthService.getToken({url: sttConfig.url}, function(err, token) {
+    if (err) {
+      console.log('Error retrieving token: ', err);
+      return res.status(500).send('Error retrieving token')
+    }
+    res.send(token);
+  });
+});
+
+module.exports = router;
diff --git a/tts-token.js b/tts-token.js
@@ -0,0 +1,31 @@
+'use strict';
+
+var express      = require('express'),
+  router          = express.Router(),
+  vcapServices = require('vcap_services'),
+  extend       = require('util')._extend,
+  watson       = require('watson-developer-cloud');
+
+// another endpoint for the text to speech service
+
+// For local development, replace username and password
+var ttsConfig = extend({
+  version: 'v1',
+  url: 'https://stream.watsonplatform.net/text-to-speech/api',
+  username: '<username>',
+  password: '<password>'
+}, vcapServices.getCredentials('text_to_speech'));
+
+var ttsAuthService = watson.authorization(ttsConfig);
+
+router.get('/token', function(req, res) {
+  ttsAuthService.getToken({url: ttsConfig.url}, function(err, token) {
+    if (err) {
+      console.log('Error retrieving token: ', err);
+      return res.status(500).send('Error retrieving token')
+    }
+    res.send(token);
+  });
+});
+
+module.exports = router;