Merge pull request bbc#167 from sshniro/master

Adding speech to text adapter for Google cloud platform
pietrop · Jul 19, 2019 · be93c08 · be93c08
2 parents 96f6e6d + 3a74e94
commit be93c08
Show file tree

Hide file tree

Showing 9 changed files with 107,221 additions and 0 deletions.
diff --git a/demo/select-stt-json-type.js b/demo/select-stt-json-type.js
@@ -18,6 +18,7 @@ const SttTypeSelect = props => {
     <option value="vtt-youtube" disabled>Youtube VTT</option>
     <option value="amazontranscribe">Amazon Transcribe</option>
     <option value="digitalpaperedit">Digital Paper Edit</option>
+    <option value="google-stt">Google STT</option>
   </select>;
 };
 

diff --git a/packages/stt-adapters/google-stt/example-usage.js b/packages/stt-adapters/google-stt/example-usage.js
@@ -0,0 +1,5 @@
+import gcpSttToDraft from './index';
+import gcpSttTedTalkTranscript from './sample/gcpSttPunctuation.sample.json';
+
+console.log('Starting');
+console.log(JSON.stringify(gcpSttToDraft(gcpSttTedTalkTranscript), null, 2));
diff --git a/packages/stt-adapters/google-stt/index.js b/packages/stt-adapters/google-stt/index.js
@@ -0,0 +1,118 @@
+/**
+ * Converts GCP Speech to Text Json to DraftJs
+ * see `sample` folder for example of input and output as well as `example-usage.js`
+ */
+
+import generateEntitiesRanges from '../generate-entities-ranges/index.js';
+
+const NANO_SECOND = 1000000000;
+
+/**
+ * attribute for the sentences object containing the text. eg sentences ={ punct:'helo', ... }
+ *  or eg sentences ={ text:'hello', ... }
+ * @param sentences
+ */
+export const getBestAlternativeSentence = sentences => {
+  if (sentences.alternatives.length === 0) {
+    return sentences[0];
+  }
+
+  const sentenceWithHighestConfidence = sentences.alternatives.reduce(function(
+    prev,
+    current
+  ) {
+    return parseFloat(prev.confidence) > parseFloat(current.confidence)
+      ? prev
+      : current;
+  });
+
+  return sentenceWithHighestConfidence;
+};
+
+export const trimLeadingAndTailingWhiteSpace = text => {
+  return text.trim();
+};
+
+/**
+ * GCP does not provide a nanosecond attribute if the word starts at 0 nanosecond
+ * @param startSecond
+ * @param nanoSecond
+ * @returns {number}
+ */
+const computeTimeInSeconds = (startSecond, nanoSecond) => {
+
+  let seconds = parseFloat(startSecond);
+
+  if (nanoSecond !== undefined) {
+    seconds = seconds + parseFloat(nanoSecond / NANO_SECOND);
+  }
+
+  return seconds;
+};
+
+/**
+ * Normalizes words so they can be used in
+ * the generic generateEntitiesRanges() method
+ **/
+const normalizeWord = (currentWord, confidence) => {
+
+  return {
+    start: computeTimeInSeconds(currentWord.startTime.seconds, currentWord.startTime.nanos),
+    end: computeTimeInSeconds(currentWord.endTime.seconds, currentWord.endTime.nanos),
+    text: currentWord.word,
+    confidence: confidence
+  };
+};
+
+/**
+ * groups words list from GCP Speech to Text response.
+ * @param {array} sentences - array of sentence objects from GCP STT
+ */
+const groupWordsInParagraphs = sentences => {
+  const results = [];
+  let paragraph = {
+    words: [],
+    text: []
+  };
+
+  sentences.forEach((sentence) => {
+    const bestAlternative = getBestAlternativeSentence(sentence);
+    paragraph.text.push(trimLeadingAndTailingWhiteSpace(bestAlternative.transcript));
+
+    bestAlternative.words.forEach((word) => {
+      paragraph.words.push(normalizeWord(word, bestAlternative.confidence));
+    });
+    results.push(paragraph);
+    paragraph = { words: [], text: [] };
+  });
+
+  return results;
+};
+
+const gcpSttToDraft = gcpSttJson => {
+  const results = [];
+  // const speakerLabels = gcpSttJson.results[0]['alternatives'][0]['words'][0]['speakerTag']
+  // let speakerSegmentation = typeof(speakerLabels) != 'undefined';
+
+  const wordsByParagraphs = groupWordsInParagraphs(gcpSttJson.results);
+
+  wordsByParagraphs.forEach((paragraph, i) => {
+    const draftJsContentBlockParagraph = {
+      text: paragraph.text.join(' '),
+      type: 'paragraph',
+      data: {
+        speaker: paragraph.speaker ? `Speaker ${ paragraph.speaker }` : `TBC ${ i }`,
+        words: paragraph.words,
+        start: parseFloat(paragraph.words[0].start)
+      },
+      // the entities as ranges are each word in the space-joined text,
+      // so it needs to be compute for each the offset from the beginning of the paragraph and the length
+      entityRanges: generateEntitiesRanges(paragraph.words, 'text') // wordAttributeName
+    };
+    results.push(draftJsContentBlockParagraph);
+  });
+
+  return results;
+};
+
+export default gcpSttToDraft;
diff --git a/packages/stt-adapters/google-stt/index.test.js b/packages/stt-adapters/google-stt/index.test.js
@@ -0,0 +1,52 @@
+import gcpSttToDraft, {
+  getBestAlternativeSentence,
+  trimLeadingAndTailingWhiteSpace
+} from './index';
+import draftTranscriptSample from './sample/googleSttToDraftJs.sample.js';
+import gcpSttTedTalkTranscript from './sample/gcpSttPunctuation.sample.json';
+
+describe('gcpSttToDraft', () => {
+  const result = gcpSttToDraft(gcpSttTedTalkTranscript);
+  it('Should be defined', () => {
+    expect(result).toBeDefined();
+  });
+
+  it('Should be equal to expected value', () => {
+    expect(result).toEqual(draftTranscriptSample);
+  });
+});
+
+describe('leading and tailing white space should be removed from text block', () => {
+  const sentence = ' this is   a sentence ';
+  const expected = 'this is   a sentence';
+
+  const result = trimLeadingAndTailingWhiteSpace(sentence);
+  it('should be equal to expected value', () => {
+    expect(result).toEqual(expected);
+  });
+});
+
+describe('Best alternative sentence should be returned', () => {
+  const sentences = {
+    alternatives: [
+      {
+        'transcript': 'this is the first sentence',
+        'confidence': 0.95,
+      },
+      {
+        'transcript': 'this is the first sentence alternative',
+        'confidence': 0.80,
+      }
+    ]
+  };
+  const expected = {
+    'transcript': 'this is the first sentence',
+    'confidence': 0.95
+  };
+
+  it('Should be equal to expected value', () => {
+
+    const result = getBestAlternativeSentence(sentences);
+    expect(result).toEqual(expected);
+  });
+});