Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions examples/models/parakeet/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,13 +152,14 @@ int main(int argc, char** argv) {
ET_LOG(Error, "Preprocessing failed.");
return 1;
}
auto mel_features = preprocess_result.get();
auto preprocess_out = preprocess_result.get();

// --- Transcribe ---
ET_LOG(Info, "Running TDT greedy decode...");
auto result = runner.transcribe(mel_features, [](const std::string& piece) {
std::cout << piece << std::flush;
});
auto result = runner.transcribe(
preprocess_out.features,
[](const std::string& piece) { std::cout << piece << std::flush; },
preprocess_out.length);

if (!result.ok()) {
ET_LOG(Error, "Transcription failed.");
Expand Down
2 changes: 1 addition & 1 deletion extension/asr/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ endif()
include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)

set(runner_deps executorch_core extension_module extension_tensor
tokenizers::tokenizers
extension_llm_runner tokenizers::tokenizers
)

# Define runner library
Expand Down
16 changes: 12 additions & 4 deletions extension/asr/runner/transducer_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ Error TransducerRunner::load() {
return Error::Ok;
}

Result<::executorch::extension::TensorPtr> TransducerRunner::preprocess(
Result<PreprocessResult> TransducerRunner::preprocess(
::executorch::extension::TensorPtr raw_audio) {
if (!is_loaded()) {
ET_CHECK_OK_OR_RETURN_ERROR(load());
Expand Down Expand Up @@ -229,20 +229,28 @@ Result<::executorch::extension::TensorPtr> TransducerRunner::preprocess(
"Preprocessor returned unexpected output.");

auto mel = outputs[0].toTensor();
return std::make_shared<::executorch::aten::Tensor>(std::move(mel));
int64_t mel_len = mel.sizes()[1]; // default to tensor dim
if (outputs.size() >= 2 && outputs[1].isTensor()) {
mel_len = outputs[1].toTensor().const_data_ptr<int64_t>()[0];
}
return PreprocessResult{
std::make_shared<::executorch::aten::Tensor>(std::move(mel)), mel_len};
}

Result<std::vector<Token>> TransducerRunner::transcribe(
::executorch::extension::TensorPtr preprocessed_features,
std::function<void(const std::string&)> token_callback) {
std::function<void(const std::string&)> token_callback,
int64_t features_length) {
if (!is_loaded()) {
ET_CHECK_OK_OR_RETURN_ERROR(load());
}

stats_.inference_start_ms = ::executorch::extension::llm::time_in_ms();

// --- Encode ---
int64_t mel_len_value = preprocessed_features->size(1);
// Use provided length, or fall back to tensor dimension
int64_t mel_len_value =
features_length > 0 ? features_length : preprocessed_features->size(1);
std::vector<int64_t> mel_len_data = {mel_len_value};
auto mel_len = ::executorch::extension::from_blob(
mel_len_data.data(), {1}, ::executorch::aten::ScalarType::Long);
Expand Down
13 changes: 11 additions & 2 deletions extension/asr/runner/transducer_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ using ::executorch::extension::llm::Stats;
using ::executorch::runtime::Error;
using ::executorch::runtime::Result;

/**
* Preprocessed audio features with actual (unpadded) length.
*/
struct PreprocessResult {
::executorch::extension::TensorPtr features;
int64_t length; // Actual number of valid frames (excluding padding)
};

/**
* A decoded token with frame-level timing information.
*/
Expand Down Expand Up @@ -97,7 +105,7 @@ class ET_EXPERIMENTAL TransducerRunner {
* @returns Preprocessed features tensor (e.g., mel spectrogram),
* ready to pass to transcribe().
*/
Result<::executorch::extension::TensorPtr> preprocess(
Result<PreprocessResult> preprocess(
::executorch::extension::TensorPtr raw_audio);

/**
Expand All @@ -112,7 +120,8 @@ class ET_EXPERIMENTAL TransducerRunner {
*/
Result<std::vector<Token>> transcribe(
::executorch::extension::TensorPtr preprocessed_features,
std::function<void(const std::string&)> token_callback = {});
std::function<void(const std::string&)> token_callback = {},
int64_t features_length = -1);

/**
* Returns a reference to the loaded tokenizer, or nullptr if not loaded.
Expand Down
Loading