Skip to content

Commit

Permalink
fix(whatsapp.gblib): Speech is now full duplex.
Browse files Browse the repository at this point in the history
  • Loading branch information
rodrigorodriguez committed May 1, 2020
1 parent 2a41d9c commit 6442ebf
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 194 deletions.
142 changes: 7 additions & 135 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 4 additions & 6 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
"Dário Vieira <dario.junior3@gmail.com>"
],
"engines": {
"node": "=10.15.2"
"node": "=10.15.2"
},
"license": "AGPL-3.0",
"preferGlobal": true,
Expand Down Expand Up @@ -49,10 +49,10 @@
},
"dependencies": {
"@azure/ms-rest-js": "2.0.4",
"@discordjs/opus": "0.1.0",
"@microsoft/microsoft-graph-client": "2.0.0",
"@types/validator": "12.0.1",
"adal-node": "0.2.1",
"any-shell-escape": "^0.1.1",
"async-promises": "0.2.2",
"azure-arm-cognitiveservices": "3.0.0",
"azure-arm-resource": "7.3.0",
Expand Down Expand Up @@ -84,11 +84,10 @@
"nexmo": "2.5.2",
"ngrok": "3.2.7",
"npm": "6.13.4",

"prism-media": "1.2.1",
"opn": "6.0.0",
"pragmatismo-io-framework": "1.0.20",
"prism-media": "1.2.1",
"public-ip": "4.0.0",
"public-ip": "4.0.0",
"readline": "1.3.0",
"reflect-metadata": "0.1.13",
"request-promise": "4.2.5",
Expand All @@ -107,7 +106,6 @@
"typescript": "3.7.4",
"url-join": "4.0.1",
"vbscript-to-typescript": "1.0.8",

"wait-until": "0.0.2",
"walk-promise": "0.2.0",
"washyourmouthoutwithsoap": "1.0.2"
Expand Down
98 changes: 47 additions & 51 deletions packages/core.gbapp/services/GBConversationalService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,16 @@ import { GBDialogStep, GBLog, GBMinInstance, IGBCoreService } from 'botlib';
import { AzureText } from 'pragmatismo-io-framework';
import { Messages } from '../strings';
import { GBServer } from '../../../src/app';
import { Readable } from 'stream'
import { GBAdminService } from '../../admin.gbapp/services/GBAdminService';
const urlJoin = require('url-join');
const PasswordGenerator = require("strict-password-generator").default;
const Nexmo = require('nexmo');
let sdk = require("microsoft-cognitiveservices-speech-sdk");
var fs = require('fs')
import { Readable } from 'stream'
import { GBAdminService } from '../../admin.gbapp/services/GBAdminService';
const prism = require('prism-media');
const { join } = require('path')
const shell = require('any-shell-escape')
const { exec } = require('child_process')
const fs = require('fs')
const sdk = require("microsoft-cognitiveservices-speech-sdk");

export interface LanguagePickerSettings {
defaultLocale?: string;
Expand Down Expand Up @@ -147,7 +149,7 @@ export class GBConversationalService {

try {
speechConfig.speechSynthesisLanguage = locale;
speechConfig.speechSynthesisVoiceName = "pt-BR-HeloisaRUS";
speechConfig.speechSynthesisVoiceName = "pt-BR-FranciscaNeural";

synthesizer.speakTextAsync(text,
(result) => {
Expand All @@ -158,15 +160,6 @@ export class GBConversationalService {
const oggFilenameOnly = `tmp${name}.ogg`;
const oggFilename = `work/${oggFilenameOnly}`;

const ffmpeg = prism.FFmpeg.getInfo();

console.log(`Using FFmpeg version ${ffmpeg.version}`);

if (ffmpeg.output.includes('--enable-libopus')) {
console.log('libopus is available!');
} else {
console.log('libopus is unavailable!');
}

const output = fs.createWriteStream(oggFilename);
const transcoder = new prism.FFmpeg({
Expand All @@ -183,7 +176,6 @@ export class GBConversationalService {
.pipe(transcoder)
.pipe(output);

console.log("synthesis finished.");

let url = urlJoin(GBServer.globals.publicAddress, 'audios', oggFilenameOnly);
resolve(url);
Expand All @@ -206,56 +198,60 @@ export class GBConversationalService {
let subscriptionKey = speechKey;
let serviceRegion = cloudRegion;

var samplingRate = 16000;
var frameDuration = 20;
var channels = 1;
var frameSize = samplingRate * frameDuration / 1000;

const oggFile = new Readable();
oggFile._read = () => { } // _read is required but you can noop it
oggFile.push(buffer);
oggFile.push(null);

const name = GBAdminService.getRndReadableIdentifier();

fs.writeFileSync(`work/tmp${name}.ogg`, buffer);
const dest = `work/tmp${name}.wav`;
const src = `work/tmp${name}.ogg`;
fs.writeFileSync(src, oggFile.read());

const makeMp3 = shell([
'node_modules/ffmpeg-static/ffmpeg.exe', '-y', '-v', 'error',
'-i', join(process.cwd(), src),
'-ar', '16000',
'-ac', '1',
'-acodec', 'pcm_s16le',
join(process.cwd(), dest)
])

exec(makeMp3, (error) => {
if (error) {
GBLog.error(error);
return Promise.reject(error);
} else {
let data = fs.readFileSync(dest);

let wr = fs.createWriteStream(`work/tmp${name}.pcm`);
wr.on('finish', () => {
let data = fs.readFileSync(`work/tmp${name}.pcm`);
let pushStream = sdk.AudioInputStream.createPushStream();
pushStream.write(data);
pushStream.close();

let pushStream = sdk.AudioInputStream.createPushStream();
pushStream.write(data);
pushStream.close();
let audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
let speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
speechConfig.speechRecognitionLanguage = locale;
let recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);

let audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
let speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, serviceRegion);
speechConfig.speechRecognitionLanguage = locale;
let recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
recognizer.recognizeOnceAsync(
(result) => {

recognizer.recognizeOnceAsync(
(result) => {
resolve(result.text ? result.text : 'Speech to Text failed: Audio not converted');

resolve(result.text ? result.text : 'Speech to Text failed: Audio not converted');
recognizer.close();
recognizer = undefined;
},
(err) => {
reject(err);

recognizer.close();
recognizer = undefined;
},
(err) => {
reject(err);
recognizer.close();
recognizer = undefined;
});

recognizer.close();
recognizer = undefined;
});
});
}
})

fs.createReadStream(`work/tmp${name}.ogg`)
.pipe(new prism.opus.OggDemuxer())
.pipe(new prism.opus.Decoder({
rate: samplingRate,
channels: channels, frameSize: frameSize
}))
.pipe(wr);
} catch (error) {
GBLog.error(error);
return Promise.reject(error);
Expand Down
Loading

0 comments on commit 6442ebf

Please sign in to comment.