diff --git a/data/smoke_test/new-home-in-the-stars-16k.wav b/data/smoke_test/new-home-in-the-stars-16k.wav new file mode 100644 index 0000000000..bbefd166f5 Binary files /dev/null and b/data/smoke_test/new-home-in-the-stars-16k.wav differ diff --git a/native_client/test/concurrent_streams.py b/native_client/test/concurrent_streams.py new file mode 100644 index 0000000000..51b9977447 --- /dev/null +++ b/native_client/test/concurrent_streams.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function + +import argparse +import numpy as np +import wave + +from deepspeech import Model + + +# These constants control the beam search decoder + +# Beam width used in the CTC decoder when building candidate transcriptions +BEAM_WIDTH = 500 + +# The alpha hyperparameter of the CTC decoder. Language Model weight +LM_ALPHA = 0.75 + +# The beta hyperparameter of the CTC decoder. Word insertion bonus. +LM_BETA = 1.85 + + +# These constants are tied to the shape of the graph used (changing them changes +# the geometry of the first layer), so make sure you use the same constants that +# were used during training + +# Number of MFCC features to use +N_FEATURES = 26 + +# Size of the context window used for producing timesteps in the input vector +N_CONTEXT = 9 + + +def main(): + parser = argparse.ArgumentParser(description='Running DeepSpeech inference.') + parser.add_argument('--model', required=True, + help='Path to the model (protocol buffer binary file)') + parser.add_argument('--alphabet', required=True, + help='Path to the configuration file specifying the alphabet used by the network') + parser.add_argument('--lm', nargs='?', + help='Path to the language model binary file') + parser.add_argument('--trie', nargs='?', + help='Path to the language model trie file created with native_client/generate_trie') + parser.add_argument('--audio1', required=True, + help='First audio file to use in interleaved streams') + parser.add_argument('--audio2', required=True, + help='Second audio file to use in interleaved streams') + args = parser.parse_args() + + ds = Model(args.model, N_FEATURES, N_CONTEXT, args.alphabet, BEAM_WIDTH) + + if args.lm and args.trie: + ds.enableDecoderWithLM(args.alphabet, args.lm, args.trie, LM_ALPHA, LM_BETA) + + with wave.open(args.audio1, 'rb') as fin: + fs1 = fin.getframerate() + audio1 = np.frombuffer(fin.readframes(fin.getnframes()), np.int16) + + with wave.open(args.audio2, 'rb') as fin: + fs2 = fin.getframerate() + audio2 = np.frombuffer(fin.readframes(fin.getnframes()), np.int16) + + stream1 = ds.setupStream(sample_rate=fs1) + stream2 = ds.setupStream(sample_rate=fs2) + + splits1 = np.array_split(audio1, 10) + splits2 = np.array_split(audio2, 10) + + for part1, part2 in zip(splits1, splits2): + ds.feedAudioContent(stream1, part1) + ds.feedAudioContent(stream2, part2) + + print(ds.finishStream(stream1)) + print(ds.finishStream(stream2)) + +if __name__ == '__main__': + main() diff --git a/taskcluster/tc-python-tests-prod.sh b/taskcluster/tc-python-tests-prod.sh index 6803082eb0..b735a30eb0 100644 --- a/taskcluster/tc-python-tests-prod.sh +++ b/taskcluster/tc-python-tests-prod.sh @@ -39,4 +39,6 @@ LD_LIBRARY_PATH=${PY37_LDPATH}:$LD_LIBRARY_PATH pip install --verbose --only-bin run_prod_inference_tests +run_prod_concurrent_stream_tests + virtualenv_deactivate "${pyver}" "${PYENV_NAME}" diff --git a/taskcluster/tc-tests-utils.sh b/taskcluster/tc-tests-utils.sh index dc1e7f3cc9..5455d748f7 100755 --- a/taskcluster/tc-tests-utils.sh +++ b/taskcluster/tc-tests-utils.sh @@ -419,6 +419,26 @@ run_all_inference_tests() assert_correct_warning_upsampling "${phrase_pbmodel_withlm_mono_8k}" } +run_prod_concurrent_stream_tests() +{ + set +e + output=$(python ${TASKCLUSTER_TMP_DIR}/test_sources/concurrent_streams.py \ + --model ${TASKCLUSTER_TMP_DIR}/${model_name_mmap} \ + --alphabet ${TASKCLUSTER_TMP_DIR}/alphabet.txt \ + --lm ${TASKCLUSTER_TMP_DIR}/lm.binary \ + --trie ${TASKCLUSTER_TMP_DIR}/trie \ + --audio1 ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav \ + --audio2 ${TASKCLUSTER_TMP_DIR}/new-home-in-the-stars-16k.wav 2>/dev/null) + status=$? + set -e + + output1=$(echo ${output} | head -n 1) + output2=$(echo ${output} | tail -n 1) + + assert_correct_ldc93s1_prodmodel "${output1}" "${status}" + assert_correct_inference "${output2}" "i must find a new home in the stars" "${status}" +} + run_prod_inference_tests() { set +e @@ -540,6 +560,7 @@ download_data() cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/alphabet.txt ${TASKCLUSTER_TMP_DIR}/alphabet.txt cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.pruned.lm ${TASKCLUSTER_TMP_DIR}/lm.binary cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/smoke_test/vocab.trie ${TASKCLUSTER_TMP_DIR}/trie + cp -R ${DS_ROOT_TASK}/DeepSpeech/ds/native_client/test ${TASKCLUSTER_TMP_DIR}/test_sources } download_material()