Skip to content

Commit

Permalink
Speech keyword detector tutorial
Browse files Browse the repository at this point in the history
Adds a basic training script for a simple audio model to our examples.
See third_party/docs_src/tutorials/audio_recognition.md for full documentation

PiperOrigin-RevId: 165025732
  • Loading branch information
petewarden authored and tensorflower-gardener committed Aug 11, 2017
1 parent e9a8d75 commit 0c6fd17
Show file tree
Hide file tree
Showing 28 changed files with 4,321 additions and 13 deletions.
1 change: 1 addition & 0 deletions tensorflow/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ filegroup(
"//tensorflow/examples/label_image:all_files",
"//tensorflow/examples/learn:all_files",
"//tensorflow/examples/saved_model:all_files",
"//tensorflow/examples/speech_commands:all_files",
"//tensorflow/examples/tutorials/estimators:all_files",
"//tensorflow/examples/tutorials/mnist:all_files",
"//tensorflow/examples/tutorials/word2vec:all_files",
Expand Down
2 changes: 2 additions & 0 deletions tensorflow/contrib/framework/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ tf_custom_op_py_library(
"python/framework/tensor_util.py",
"python/ops/__init__.py",
"python/ops/arg_scope.py",
"python/ops/audio_ops.py",
"python/ops/checkpoint_ops.py",
"python/ops/ops.py",
"python/ops/prettyprint_ops.py",
Expand All @@ -50,6 +51,7 @@ tf_custom_op_py_library(
":gen_variable_ops",
"//tensorflow/contrib/util:util_py",
"//tensorflow/python:array_ops",
"//tensorflow/python:audio_ops_gen",
"//tensorflow/python:control_flow_ops",
"//tensorflow/python:framework",
"//tensorflow/python:framework_for_generated_wrappers",
Expand Down
36 changes: 36 additions & 0 deletions tensorflow/contrib/framework/python/ops/audio_ops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

# pylint: disable=g-short-docstring-punctuation
"""Audio processing and decoding ops.
@@decode_wav
@@encode_wav
@@audio_spectrogram
@@mfcc
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


# go/tf-wildcard-import
# pylint: disable=wildcard-import
from tensorflow.python.ops.gen_audio_ops import *
# pylint: enable=wildcard-import

from tensorflow.python.util.all_util import remove_undocumented

remove_undocumented(__name__, [])
49 changes: 38 additions & 11 deletions tensorflow/core/lib/wav/wav_io.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,17 @@ Status ReadValue(const string& data, T* value, int* offset) {
return Status::OK();
}

Status ReadString(const string& data, int expected_length, string* value,
int* offset) {
const int new_offset = *offset + expected_length;
if (new_offset > data.size()) {
return errors::InvalidArgument("Data too short when trying to read string");
}
*value = string(data.begin() + *offset, data.begin() + new_offset);
*offset = new_offset;
return Status::OK();
}

} // namespace

Status EncodeAudioAsS16LEWav(const float* audio, size_t sample_rate,
Expand Down Expand Up @@ -254,17 +265,33 @@ Status DecodeLin16WaveAsFloatVector(const string& wav_string,
// Skip over this unused section.
offset += 2;
}
TF_RETURN_IF_ERROR(ExpectText(wav_string, kDataChunkId, &offset));
uint32 data_size;
TF_RETURN_IF_ERROR(ReadValue<uint32>(wav_string, &data_size, &offset));
*sample_count = data_size / bytes_per_sample;
const uint32 data_count = *sample_count * *channel_count;
float_values->resize(data_count);
for (int i = 0; i < data_count; ++i) {
int16 single_channel_value = 0;
TF_RETURN_IF_ERROR(
ReadValue<int16>(wav_string, &single_channel_value, &offset));
(*float_values)[i] = Int16SampleToFloat(single_channel_value);

bool was_data_found = false;
while (offset < wav_string.size()) {
string chunk_id;
TF_RETURN_IF_ERROR(ReadString(wav_string, 4, &chunk_id, &offset));
uint32 chunk_size;
TF_RETURN_IF_ERROR(ReadValue<uint32>(wav_string, &chunk_size, &offset));
if (chunk_id == kDataChunkId) {
if (was_data_found) {
return errors::InvalidArgument("More than one data chunk found in WAV");
}
was_data_found = true;
*sample_count = chunk_size / bytes_per_sample;
const uint32 data_count = *sample_count * *channel_count;
float_values->resize(data_count);
for (int i = 0; i < data_count; ++i) {
int16 single_channel_value = 0;
TF_RETURN_IF_ERROR(
ReadValue<int16>(wav_string, &single_channel_value, &offset));
(*float_values)[i] = Int16SampleToFloat(single_channel_value);
}
} else {
offset += chunk_size;
}
}
if (!was_data_found) {
return errors::InvalidArgument("No data chunk found in WAV");
}
return Status::OK();
}
Expand Down
4 changes: 2 additions & 2 deletions tensorflow/core/ops/audio_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ Status DecodeWavShapeFn(InferenceContext* c) {
Status EncodeWavShapeFn(InferenceContext* c) {
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
c->set_output(0, c->Scalar());
return Status::OK();
}
Expand Down Expand Up @@ -104,7 +104,7 @@ Status MfccShapeFn(InferenceContext* c) {
ShapeHandle spectrogram;
TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 3, &spectrogram));
ShapeHandle unused;
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused));
TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));

int32 dct_coefficient_count;
TF_RETURN_IF_ERROR(
Expand Down
Loading

0 comments on commit 0c6fd17

Please sign in to comment.