Skip to content

Commit

Permalink
Merge pull request bbc#167 from sshniro/master
Browse files Browse the repository at this point in the history
Adding speech to text adapter for Google cloud platform
  • Loading branch information
Pietro committed Jul 19, 2019
2 parents 96f6e6d + 3a74e94 commit be93c08
Show file tree
Hide file tree
Showing 9 changed files with 107,221 additions and 0 deletions.
1 change: 1 addition & 0 deletions demo/select-stt-json-type.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const SttTypeSelect = props => {
<option value="vtt-youtube" disabled>Youtube VTT</option>
<option value="amazontranscribe">Amazon Transcribe</option>
<option value="digitalpaperedit">Digital Paper Edit</option>
<option value="google-stt">Google STT</option>
</select>;
};

Expand Down
5 changes: 5 additions & 0 deletions packages/stt-adapters/google-stt/example-usage.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import gcpSttToDraft from './index';
import gcpSttTedTalkTranscript from './sample/gcpSttPunctuation.sample.json';

console.log('Starting');
console.log(JSON.stringify(gcpSttToDraft(gcpSttTedTalkTranscript), null, 2));
118 changes: 118 additions & 0 deletions packages/stt-adapters/google-stt/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/**
* Converts GCP Speech to Text Json to DraftJs
* see `sample` folder for example of input and output as well as `example-usage.js`
*/

import generateEntitiesRanges from '../generate-entities-ranges/index.js';

const NANO_SECOND = 1000000000;

/**
* attribute for the sentences object containing the text. eg sentences ={ punct:'helo', ... }
* or eg sentences ={ text:'hello', ... }
* @param sentences
*/
export const getBestAlternativeSentence = sentences => {
if (sentences.alternatives.length === 0) {
return sentences[0];
}

const sentenceWithHighestConfidence = sentences.alternatives.reduce(function(
prev,
current
) {
return parseFloat(prev.confidence) > parseFloat(current.confidence)
? prev
: current;
});

return sentenceWithHighestConfidence;
};

export const trimLeadingAndTailingWhiteSpace = text => {
return text.trim();
};

/**
* GCP does not provide a nanosecond attribute if the word starts at 0 nanosecond
* @param startSecond
* @param nanoSecond
* @returns {number}
*/
const computeTimeInSeconds = (startSecond, nanoSecond) => {

let seconds = parseFloat(startSecond);

if (nanoSecond !== undefined) {
seconds = seconds + parseFloat(nanoSecond / NANO_SECOND);
}

return seconds;
};

/**
* Normalizes words so they can be used in
* the generic generateEntitiesRanges() method
**/
const normalizeWord = (currentWord, confidence) => {

return {
start: computeTimeInSeconds(currentWord.startTime.seconds, currentWord.startTime.nanos),
end: computeTimeInSeconds(currentWord.endTime.seconds, currentWord.endTime.nanos),
text: currentWord.word,
confidence: confidence
};
};

/**
* groups words list from GCP Speech to Text response.
* @param {array} sentences - array of sentence objects from GCP STT
*/
const groupWordsInParagraphs = sentences => {
const results = [];
let paragraph = {
words: [],
text: []
};

sentences.forEach((sentence) => {
const bestAlternative = getBestAlternativeSentence(sentence);
paragraph.text.push(trimLeadingAndTailingWhiteSpace(bestAlternative.transcript));

bestAlternative.words.forEach((word) => {
paragraph.words.push(normalizeWord(word, bestAlternative.confidence));
});
results.push(paragraph);
paragraph = { words: [], text: [] };
});

return results;
};

const gcpSttToDraft = gcpSttJson => {
const results = [];
// const speakerLabels = gcpSttJson.results[0]['alternatives'][0]['words'][0]['speakerTag']
// let speakerSegmentation = typeof(speakerLabels) != 'undefined';

const wordsByParagraphs = groupWordsInParagraphs(gcpSttJson.results);

wordsByParagraphs.forEach((paragraph, i) => {
const draftJsContentBlockParagraph = {
text: paragraph.text.join(' '),
type: 'paragraph',
data: {
speaker: paragraph.speaker ? `Speaker ${ paragraph.speaker }` : `TBC ${ i }`,
words: paragraph.words,
start: parseFloat(paragraph.words[0].start)
},
// the entities as ranges are each word in the space-joined text,
// so it needs to be compute for each the offset from the beginning of the paragraph and the length
entityRanges: generateEntitiesRanges(paragraph.words, 'text') // wordAttributeName
};
results.push(draftJsContentBlockParagraph);
});

return results;
};

export default gcpSttToDraft;
52 changes: 52 additions & 0 deletions packages/stt-adapters/google-stt/index.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import gcpSttToDraft, {
getBestAlternativeSentence,
trimLeadingAndTailingWhiteSpace
} from './index';
import draftTranscriptSample from './sample/googleSttToDraftJs.sample.js';
import gcpSttTedTalkTranscript from './sample/gcpSttPunctuation.sample.json';

describe('gcpSttToDraft', () => {
const result = gcpSttToDraft(gcpSttTedTalkTranscript);
it('Should be defined', () => {
expect(result).toBeDefined();
});

it('Should be equal to expected value', () => {
expect(result).toEqual(draftTranscriptSample);
});
});

describe('leading and tailing white space should be removed from text block', () => {
const sentence = ' this is a sentence ';
const expected = 'this is a sentence';

const result = trimLeadingAndTailingWhiteSpace(sentence);
it('should be equal to expected value', () => {
expect(result).toEqual(expected);
});
});

describe('Best alternative sentence should be returned', () => {
const sentences = {
alternatives: [
{
'transcript': 'this is the first sentence',
'confidence': 0.95,
},
{
'transcript': 'this is the first sentence alternative',
'confidence': 0.80,
}
]
};
const expected = {
'transcript': 'this is the first sentence',
'confidence': 0.95
};

it('Should be equal to expected value', () => {

const result = getBestAlternativeSentence(sentences);
expect(result).toEqual(expected);
});
});
Loading

0 comments on commit be93c08

Please sign in to comment.