Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use trusted URL for AudioWorklet #732

Merged
merged 18 commits into from Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
29 changes: 7 additions & 22 deletions gulpfile.js
Expand Up @@ -10,10 +10,6 @@
var webpack = require('webpack-stream');
var dtsBundleWebpack = require('dts-bundle-webpack');
var tsProject = ts.createProject('tsconfig.json');
var tsProject2015 = ts.createProject('tsconfig.json', {
target: 'es2015',
module: 'esnext'
});

gulp.task('build', gulp.series(function build() {
return gulp.src([
Expand All @@ -30,29 +26,18 @@
.pipe(tsProject())
.pipe(sourcemaps.write('.'))
.pipe(gulp.dest('distrib/lib'));
}));

gulp.task('build2015', gulp.series(function build() {
return gulp.src([
'src/**/*.ts',
'microsoft.cognitiveservices.speech.sdk.ts'],
{ base: '.' })
.pipe(eslint({
formatter: 'prose',
configuration: 'eslint.json'
}))
.pipe(eslint.format())
.pipe(eslint.failAfterError())
.pipe(sourcemaps.init())
.pipe(tsProject2015())
.pipe(sourcemaps.write('.'))
.pipe(gulp.dest('distrib/es2015'));
}, function () {
return gulp.src('./src/audioworklet/speech-processor.js')
.pipe(gulp.dest('./distrib/lib/src/common.browser'));
}));

gulp.task('bundle', gulp.series('build', function bundle() {
return gulp.src('bundleApp.js')
.pipe(webpack({
output: { filename: 'microsoft.cognitiveservices.speech.sdk.bundle.js' },
entry: {
'microsoft.cognitiveservices.speech.sdk.bundle': './bundleApp.js',
},
output: { filename: '[name].js' },
devtool: 'source-map',
module: {
rules: [{
Expand Down
4 changes: 2 additions & 2 deletions jest.config.js
Expand Up @@ -8,7 +8,7 @@ module.exports = {
"^.+\\.ts$": "ts-jest",
},
testRegex: "tests/.*Tests\\.ts$",
testPathIgnorePatterns: ["/lib/", "/es2015/", "/node_modules/", "/src/"],
testPathIgnorePatterns: ["/lib/", "/node_modules/", "/src/"],
moduleFileExtensions: ["ts", "js", "jsx", "json", "node"],
testEnvironment: "jsdom",
collectCoverage: false,
Expand All @@ -21,7 +21,7 @@ module.exports = {
"^.+\\.ts$": "ts-jest",
},
testRegex: "tests/.*Tests\\.ts$",
testPathIgnorePatterns: ["/lib/", "/es2015/", "/node_modules/", "/src/"],
testPathIgnorePatterns: ["/lib/", "/node_modules/", "/src/"],
moduleFileExtensions: ["ts", "js", "jsx", "json", "node"],
testEnvironment: "node",
collectCoverage: false,
Expand Down
5 changes: 2 additions & 3 deletions package.json
Expand Up @@ -38,11 +38,10 @@
"net": false
},
"main": "distrib/lib/microsoft.cognitiveservices.speech.sdk.js",
"module": "distrib/es2015/microsoft.cognitiveservices.speech.sdk.js",
"module": "distrib/lib/microsoft.cognitiveservices.speech.sdk.js",
"types": "distrib/lib/microsoft.cognitiveservices.speech.sdk.d.ts",
"files": [
"distrib/lib/**/*",
"distrib/es2015/**/*",
"distrib/browser/**/*",
"LICENSE",
"REDIST.txt"
Expand Down Expand Up @@ -83,7 +82,7 @@
"webpack-stream": "^7.0.0"
},
"scripts": {
"build": "gulp compress && gulp build2015",
"build": "gulp compress && gulp build",
"test": "npm run lint && npm run jest --coverage",
"jest": "jest",
"lint": "eslint -c .eslintrc.js --ext .ts src",
Expand Down
6 changes: 6 additions & 0 deletions src/common.browser/AudioWorkerUrl.ts
@@ -0,0 +1,6 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.

/* webpackChunkName: 'script_processor_audioWorklet' */
// eslint-disable-next-line @typescript-eslint/tslint/config
export const getAudioWorkerUrl = (): string => new URL("speech-processor.js", import.meta.url).toString();
96 changes: 54 additions & 42 deletions src/common.browser/PCMRecorder.ts
Expand Up @@ -2,6 +2,7 @@
// Licensed under the MIT license.

import { RiffPcmEncoder, Stream } from "../common/Exports";
import { getAudioWorkerUrl } from "./AudioWorkerUrl";
import { IRecorder } from "./IRecorder";

export class PcmRecorder implements IRecorder {
Expand Down Expand Up @@ -60,59 +61,70 @@ export class PcmRecorder implements IRecorder {
};
};

const connectWorkletToMicInput = (context: AudioContext): void => {
const workletNode = new AudioWorkletNode(context, "speech-processor");
workletNode.port.onmessage = (ev: MessageEvent): void => {
const inputFrame: Float32Array = ev.data as Float32Array;

if (outputStream && !outputStream.isClosed) {
const waveFrame = waveStreamEncoder.encode(inputFrame);
if (!!waveFrame) {
outputStream.writeStreamChunk({
buffer: waveFrame,
isEnd: false,
timeReceived: Date.now(),
});
}
}
};
micInput.connect(workletNode);
workletNode.connect(context.destination);
this.privMediaResources = {
scriptProcessorNode: workletNode,
source: micInput,
stream: mediaStream,
};
};

// https://webaudio.github.io/web-audio-api/#audioworklet
// Using AudioWorklet to improve audio quality and avoid audio glitches due to blocking the UI thread
const skipAudioWorklet = !!this.privSpeechProcessorScript && this.privSpeechProcessorScript.toLowerCase() === "ignore";

if (!!context.audioWorklet && !skipAudioWorklet) {
if (!this.privSpeechProcessorScript) {
const workletScript = `class SP extends AudioWorkletProcessor {
constructor(options) {
super(options);
}
process(inputs, outputs) {
const input = inputs[0];
const output = [];
for (let channel = 0; channel < input.length; channel += 1) {
output[channel] = input[channel];
}
this.port.postMessage(output[0]);
return true;
}
}
registerProcessor('speech-processor', SP);`;
const blob = new Blob([workletScript], { type: "application/javascript; charset=utf-8" });
this.privSpeechProcessorScript = URL.createObjectURL(blob);
}
this.privSpeechProcessorScript = getAudioWorkerUrl();

context.audioWorklet
.addModule(this.privSpeechProcessorScript)
.then((): void => {
const workletNode = new AudioWorkletNode(context, "speech-processor");
workletNode.port.onmessage = (ev: MessageEvent): void => {
const inputFrame: Float32Array = ev.data as Float32Array;

if (outputStream && !outputStream.isClosed) {
const waveFrame = waveStreamEncoder.encode(inputFrame);
if (!!waveFrame) {
outputStream.writeStreamChunk({
buffer: waveFrame,
isEnd: false,
timeReceived: Date.now(),
});
}
}
};
micInput.connect(workletNode);
workletNode.connect(context.destination);
this.privMediaResources = {
scriptProcessorNode: workletNode,
source: micInput,
stream: mediaStream,
};
connectWorkletToMicInput(context);
})
.catch((): void => {
attachScriptProcessor();
const workletScript = `class SP extends AudioWorkletProcessor {
constructor(options) {
super(options);
}
process(inputs, outputs) {
const input = inputs[0];
const output = [];
for (let channel = 0; channel < input.length; channel += 1) {
output[channel] = input[channel];
}
this.port.postMessage(output[0]);
return true;
}
}
registerProcessor('speech-processor', SP);`;
const blob = new Blob([workletScript], { type: "application/javascript; charset=utf-8" });
this.privSpeechProcessorScript = URL.createObjectURL(blob);

context.audioWorklet
.addModule(this.privSpeechProcessorScript)
.then((): void => {
connectWorkletToMicInput(context);
})
.catch((): void => {
attachScriptProcessor();
});
});
} else {
try {
Expand Down
5 changes: 5 additions & 0 deletions src/common.browser/WebsocketMessageAdapter.ts
Expand Up @@ -4,6 +4,9 @@
// Node.JS specific web socket / browser support.
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
/* eslint-disable @typescript-eslint/no-unsafe-call */
glharper marked this conversation as resolved.
Show resolved Hide resolved
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
import * as http from "http";
import * as net from "net";
import * as tls from "tls";
Expand Down Expand Up @@ -386,11 +389,13 @@ export class WebsocketMessageAdapter {
});
});
} else {
/* eslint-disable @typescript-eslint/no-unsafe-argument */
if (!!options.secureEndpoint) {
socketPromise = Promise.resolve(tls.connect(options));
} else {
socketPromise = Promise.resolve(net.connect(options));
}
/* eslint-enable @typescript-eslint/no-unsafe-argument */
}

return socketPromise;
Expand Down
3 changes: 3 additions & 0 deletions tests/AudioOutputStreamTests.ts
Expand Up @@ -7,6 +7,9 @@ import { Settings } from "./Settings";
import { closeAsyncObjects } from "./Utilities";

let objsToClose: any[];
jest.mock("../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

beforeAll(() => {
// Override inputs, if necessary
Expand Down
4 changes: 4 additions & 0 deletions tests/AutoSourceLangDetectionTests.ts
Expand Up @@ -27,6 +27,10 @@ import { Settings } from "./Settings";
import { closeAsyncObjects, WaitForCondition } from "./Utilities";
import { WaveFileAudioInput } from "./WaveFileAudioInputStream";

jest.mock("../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

let objsToClose: any[];

beforeAll((): void => {
Expand Down
4 changes: 4 additions & 0 deletions tests/ConnectionTests.ts
Expand Up @@ -22,6 +22,10 @@ import {

import * as fs from "fs";

jest.mock("../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

let objsToClose: any[];

beforeAll(() => {
Expand Down
4 changes: 4 additions & 0 deletions tests/ConversationTranscriberTests.ts
Expand Up @@ -23,6 +23,10 @@ import { Settings } from "./Settings";
import { WaveFileAudioInput } from "./WaveFileAudioInputStream";
import { closeAsyncObjects, RepeatingPullStream, WaitForCondition } from "./Utilities";

jest.mock("../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

let objsToClose: any[];

beforeAll(() => {
Expand Down
4 changes: 4 additions & 0 deletions tests/ConversationTranslatorTests.ts
Expand Up @@ -26,6 +26,10 @@ import {
} from "./Utilities";
import { WaveFileAudioInput } from "./WaveFileAudioInputStream";

jest.mock("../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

// eslint-disable-next-line no-console
const consoleInfo = console.info;

Expand Down
4 changes: 4 additions & 0 deletions tests/DiagnosticsTests.ts
Expand Up @@ -9,6 +9,10 @@ import { closeAsyncObjects, WaitForCondition } from "./Utilities";

let objsToClose: any[];

jest.mock("../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

beforeAll((): void => {
// Override inputs, if necessary
Settings.LoadSettings();
Expand Down
4 changes: 4 additions & 0 deletions tests/DialogServiceConnectorTests.ts
Expand Up @@ -46,6 +46,10 @@ import {
} from "./Utilities";
import { WaveFileAudioInput } from "./WaveFileAudioInputStream";

jest.mock("../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

// eslint-disable-next-line no-console
const consoleInfo = console.info;
const simpleMessageObj = { speak: "This is speech", text: "This is text", type: "message" };
Expand Down
4 changes: 4 additions & 0 deletions tests/DynamicGrammarTests.ts
Expand Up @@ -9,6 +9,10 @@ import {
} from "../src/common.speech/Exports";
import { Settings } from "./Settings";

jest.mock("../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

beforeAll(() => {
// Override inputs, if necessary
Settings.LoadSettings();
Expand Down
5 changes: 5 additions & 0 deletions tests/GeneralRecognizerTests.ts
Expand Up @@ -5,6 +5,11 @@ import * as sdk from "../microsoft.cognitiveservices.speech.sdk";
import { Settings } from "./Settings";
import { WaveFileAudioInput } from "./WaveFileAudioInputStream";

jest.mock("../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

let bufferSize: number;
beforeEach(() => {
// eslint-disable-next-line no-console
console.info("-------------------Starting test case: " + expect.getState().currentTestName + "---------------");
Expand Down
5 changes: 5 additions & 0 deletions tests/IntentRecognizerTests.ts
Expand Up @@ -18,6 +18,11 @@ import { WaveFileAudioInput } from "./WaveFileAudioInputStream";
import { AudioStreamFormatImpl } from "../src/sdk/Audio/AudioStreamFormat";

let objsToClose: any[];
jest.mock("../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

let bufferSize: number;

beforeAll(() => {
// override inputs, if necessary
Expand Down
4 changes: 4 additions & 0 deletions tests/LanguageModelTests.ts
Expand Up @@ -5,6 +5,10 @@ import * as sdk from "../microsoft.cognitiveservices.speech.sdk";
import { LanguageUnderstandingModelImpl } from "../src/sdk/LanguageUnderstandingModel";
import { Settings } from "./Settings";

jest.mock("../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

beforeAll(() => {
// Override inputs, if necessary
Settings.LoadSettings();
Expand Down
3 changes: 3 additions & 0 deletions tests/LongRunning/SpeechRecoAuthTokenErrorMessageTests.ts
Expand Up @@ -9,6 +9,9 @@ import { Settings } from "../Settings";
import { CreateRepeatingPullStream, WaitForCondition } from "../Utilities";

let objsToClose: any[];
jest.mock("../../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

beforeAll(() => {
// override inputs, if necessary
Expand Down
4 changes: 4 additions & 0 deletions tests/LongRunning/SpeechRecoAuthTokenRefreshTests.ts
Expand Up @@ -11,6 +11,10 @@ import { CreateRepeatingPullStream, WaitForCondition } from "../Utilities";

let objsToClose: any[];

jest.mock("../../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

beforeAll(() => {
// override inputs, if necessary
Settings.LoadSettings();
Expand Down
4 changes: 4 additions & 0 deletions tests/LongRunning/SpeechRecoReconnectTests.ts
Expand Up @@ -11,6 +11,10 @@ import { WaitForCondition } from "../Utilities";

let objsToClose: any[];

jest.mock("../../src/common.browser/AudioWorkerUrl", () => ({
getAudioWorkerUrl: (): string => "speech-processor.js"
}));

beforeAll((): void => {
// override inputs, if necessary
Settings.LoadSettings();
Expand Down