From e2328835b5cd658d453bdabcc4813b1d6d2a3580 Mon Sep 17 00:00:00 2001 From: Ilia Glazkov Date: Fri, 30 Oct 2020 11:37:06 -0700 Subject: [PATCH] Add speaker identification option. (#21) --- Readme.md | 6 +++++- src/AwsTranscribe.ts | 16 +++++----------- src/__tests__/AwsTranscribe.test.ts | 21 +++++++++++++++++++++ src/types.ts | 2 +- 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/Readme.md b/Readme.md index 8662265..fd2ec85 100644 --- a/Readme.md +++ b/Readme.md @@ -4,7 +4,7 @@ A client for Amazon Transcribe using the websocket interface ## Getting Started -With NPM install the module with: `npm install aws-transcribe --save` +With NPM install the module with: `npm install aws-transcribe --save` With YARN install the module with: `yarn add aws-transcribe` ## Example @@ -68,6 +68,10 @@ The `transcribeStreamConfig` is required and must have the following properties: - `languageCode` must be one of "en-US", "en-AU", "en-GB", "fr-CA", "fr-FR", "es-US" - `sampleRate` must be between 8000 and 44100 - the supported sample rate differs depending on the language code being used. For more information, go [here](https://docs.aws.amazon.com/transcribe/latest/dg/streaming.html) +It may also optionally include: + +- `showSpeakerLabel` - when `true`, [speaker identification](https://docs.aws.amazon.com/transcribe/latest/dg/diarization-streaming.html) will be enabled + ### StreamingClient EVENTS - `open` - when the socket to aws is opened diff --git a/src/AwsTranscribe.ts b/src/AwsTranscribe.ts index e80ac03..2ad2cfd 100644 --- a/src/AwsTranscribe.ts +++ b/src/AwsTranscribe.ts @@ -8,23 +8,21 @@ export class AwsTranscribe { private accessKeyId!: string private secretAccessKey!: string private sessionToken: string | undefined - private showSpeakerLabel?: boolean constructor(config?: ClientConfig) { // get from environment if config not provided this.setAccessKeyId(config?.accessKeyId || process.env.AWS_ACCESS_KEY_ID) this.setSecretAccessKey(config?.secretAccessKey || process.env.AWS_SECRET_ACCESS_KEY) this.setSessionToken(config?.sessionToken || process.env.AWS_SESSION_TOKEN) - this.setShowSpeakerLabel(config?.showSpeakerLabel || false) } private createPreSignedUrl(config: TranscribeStreamConfig) { const { region, languageCode, sampleRate, showSpeakerLabel } = config const endpoint = "transcribestreaming." + region + ".amazonaws.com:8443" - let query = "language-code=" + languageCode + "&media-encoding=pcm&sample-rate=" + sampleRate - if (showSpeakerLabel) { - query += '&show-speaker-label=true' - } + let query = "language-code=" + languageCode + "&media-encoding=pcm&sample-rate=" + sampleRate + if (showSpeakerLabel) { + query += '&show-speaker-label=' + showSpeakerLabel + } return createPresignedURL( "GET", @@ -39,7 +37,7 @@ export class AwsTranscribe { protocol: "wss", expires: 15, region: region, - query: query + query: query, } ) } @@ -64,8 +62,4 @@ export class AwsTranscribe { setSessionToken(sessionToken: string | undefined) { this.sessionToken = sessionToken } - - setShowSpeakerLabel(showSpeakerLabel: boolean | false) { - this.showSpeakerLabel = showSpeakerLabel - } } diff --git a/src/__tests__/AwsTranscribe.test.ts b/src/__tests__/AwsTranscribe.test.ts index d4a8ba7..ca100ac 100644 --- a/src/__tests__/AwsTranscribe.test.ts +++ b/src/__tests__/AwsTranscribe.test.ts @@ -124,6 +124,27 @@ describe("AwsTranscribe", () => { }) }) + it(`should include show-speaker-label parameter when given`, () => { + const region = "us-east-1" + const sampleRate = 8000 + const languageCode = "en-GB" + const showSpeakerLabel = true + + client.createStreamingClient({ + region, + sampleRate, + languageCode, + showSpeakerLabel, + }) + + expect(mockedCreatePresignedURL).toBeCalled() + const args = mockedCreatePresignedURL.mock.calls[0] + const options = args[5] + const query = options.query + + expect(query).toBe(`language-code=${languageCode}&media-encoding=pcm&sample-rate=${sampleRate}&show-speaker-label=true`) + }) + it(`should create and return an instance of Streaming client with the pre signed url`, () => { const region = "us-east-1" const sampleRate = 8000 diff --git a/src/types.ts b/src/types.ts index 2f14f2a..cf46862 100644 --- a/src/types.ts +++ b/src/types.ts @@ -22,7 +22,6 @@ export interface ClientConfig { accessKeyId?: string secretAccessKey?: string sessionToken?: string - showSpeakerLabel?: boolean } export interface TranscribeStreamConfig { @@ -79,6 +78,7 @@ interface TranscribeItem { EndTime: number StartTime: number Type: "pronunciation" | "punctuation" + Speaker: string } interface TranscribeAlternative {