Skip to content

Commit

Permalink
Record audio for transcription (#166)
Browse files Browse the repository at this point in the history
* record audio for transcription

* i can't spell
  • Loading branch information
notwaldorf committed Sep 13, 2018
1 parent 1232541 commit 9266efb
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 8 deletions.
14 changes: 14 additions & 0 deletions music/demos/transcription.html
Expand Up @@ -47,6 +47,20 @@ <h2>From an uploaded audio file</h2>
<p><b>Total Leaked Memory:</b> <code id="file-leaked-memory"></code></b></p>
</section>

<h2>From a microphone recording</h2>
<p>You can record piano audio from a microphone and transcribe it. If you're recording
something other than piano (like your voice), this will be transcribed but it
will probably be fairly noisy and incorrect.
</p>
<section>
<button id="recordBtn">Record</button>
<br>
<p><audio id="recorderPlayer" controls hidden></audio></p>
<p><b>Actual Transcription:</b> <code id="recorder-results" class="recorder"></code></p>
<p><b>It Took:</b> <code id="recorder-time" class="recorder"></code></p>
<p><b>Total Leaked Memory:</b> <code id="recorder-leaked-memory"></code></b></p>
</section>

<h2>From a test audio (250 frames / 8 seconds)</h2>
<p>We verify the model can transcribe a short sequence of piano audio, first computing its
<a href="https://librosa.github.io/librosa/generated/librosa.feature.melspectrogram.html" target="_blank"></a>mel spectogram</a>.
Expand Down
55 changes: 48 additions & 7 deletions music/demos/transcription.ts
Expand Up @@ -15,13 +15,16 @@
* limitations under the License.
*/
import * as tf from '@tensorflow/tfjs-core';
//@ts-ignore
import * as MediaRecorder from 'audio-recorder-polyfill';

import * as mm from '../src/index';
import {INoteSequence} from '../src/index';

// tslint:disable:max-line-length
import {loadAudioFromFile, loadAudioFromUrl} from '../src/transcription/audio_utils';

import {CHECKPOINTS_DIR, notesMatch, writeMemory, writeNoteSeqs, writeTimer} from './common';

// tslint:enable:max-line-length

mm.logging.verbosity = mm.logging.Level.DEBUG;
Expand Down Expand Up @@ -67,6 +70,43 @@ document.getElementById('melBtn').addEventListener('click', () => {
.then(() => writeMemory(tf.memory().numBytes));
});

// Audio recording. Only supported natively in Firefox and Safari.
// See https://caniuse.com/#feat=mediarecorder.
// For everything else, we use a polyfill.

// tslint:disable-next-line:no-any
const appeaseTsLintWindow = (window as any);
if (!appeaseTsLintWindow.MediaRecorder) {
mm.logging.log(
'Using the MediaRecorder polyfill.', 'Demo', mm.logging.Level.DEBUG);
appeaseTsLintWindow.MediaRecorder = MediaRecorder;
}

// tslint:disable-next-line:no-any
let recorder: any;
let isRecording = false;
const recordBtn = document.getElementById('recordBtn');
recordBtn.addEventListener('click', () => {
if (isRecording) {
isRecording = false;
recordBtn.textContent = 'Record';
recorder.stop();
} else {
isRecording = true;
recordBtn.textContent = 'Stop';
// Request permissions to record audio.
navigator.mediaDevices.getUserMedia({audio: true}).then(stream => {
recorder = new appeaseTsLintWindow.MediaRecorder(stream);

// tslint:disable-next-line:no-any
recorder.addEventListener('dataavailable', (e: any) => {
transcribeFromFile(e.data, 'recorder');
});
recorder.start();
});
}
});

let expectedNs: INoteSequence;
fetch(`${MEL_CKPT_URL}/${EXPECTED_NS_SUFFIX}`)
.then((response) => response.json())
Expand Down Expand Up @@ -105,11 +145,12 @@ async function transcribeFromAudio(oaf: mm.OnsetsAndFrames) {
'<b><span style="color:red">FALSE</span></b>';
}

async function transcribeFromFile(blob: Blob) {
setLoadingMessage('file');
async function transcribeFromFile(blob: Blob, prefix = 'file') {
setLoadingMessage(prefix);
const audio = await loadAudioFromFile(blob);

const audioEl = document.getElementById('filePlayer') as HTMLAudioElement;
const audioEl =
document.getElementById(`${prefix}Player`) as HTMLAudioElement;
audioEl.hidden = false;
audioEl.src = window.URL.createObjectURL(blob);

Expand All @@ -118,11 +159,11 @@ async function transcribeFromFile(blob: Blob) {
.then(async () => {
const start = performance.now();
const ns = await oafA.transcribeFromAudio(audio);
writeTimer('file-time', start);
writeNoteSeqs('file-results', [ns], undefined, true);
writeTimer(`${prefix}-time`, start);
writeNoteSeqs(`${prefix}-results`, [ns], undefined, true);
})
.then(() => oafA.dispose())
.then(() => writeMemory(tf.memory().numBytes, 'file-leaked-memory'));
.then(() => writeMemory(tf.memory().numBytes, `${prefix}-leaked-memory`));
}

function setLoadingMessage(className: string) {
Expand Down
1 change: 1 addition & 0 deletions music/package.json
Expand Up @@ -21,6 +21,7 @@
"@types/file-saver": "^1.3.0",
"@types/tape": "^4.2.32",
"@types/webmidi": "^2.0.2",
"audio-recorder-polyfill": "^0.1.2",
"browserify": "^14.4.0",
"clang-format": "^1.2.3",
"clone": "^1.0.4",
Expand Down
2 changes: 1 addition & 1 deletion music/src/core/logging.ts
Expand Up @@ -47,7 +47,7 @@ export function log(msg: string, prefix = 'Magenta.js', level = Level.INFO) {
}
if (verbosity >= level) {
const logMethod = (level === Level.WARN) ? console.warn : console.log;
logMethod(`%c${prefix}`, 'background:magenta; color:white', msg);
logMethod(`%c ${prefix} `, 'background:magenta; color:white', msg);
}
}

Expand Down
4 changes: 4 additions & 0 deletions music/yarn.lock
Expand Up @@ -274,6 +274,10 @@ async@^1.4.0, async@^1.5.2:
version "1.5.2"
resolved "https://registry.yarnpkg.com/async/-/async-1.5.2.tgz#ec6a61ae56480c0c3cb241c95618e20892f9672a"

audio-recorder-polyfill@^0.1.2:
version "0.1.2"
resolved "https://registry.yarnpkg.com/audio-recorder-polyfill/-/audio-recorder-polyfill-0.1.2.tgz#5a0659ea935dc9a51a15a5b759a67d9f237f9e71"

babel-code-frame@^6.22.0:
version "6.26.0"
resolved "https://registry.yarnpkg.com/babel-code-frame/-/babel-code-frame-6.26.0.tgz#63fd43f7dc1e3bb7ce35947db8fe369a3f58c74b"
Expand Down

0 comments on commit 9266efb

Please sign in to comment.