Skip to content

Commit

Permalink
Fairly complete version of common-factored out audio async handler fo…
Browse files Browse the repository at this point in the history
…r both stdin and SDL stream versions (next up is making one stream.cpp to rule them all, via factory method and CLI params to choose source)
  • Loading branch information
shanelenagh committed Feb 15, 2024
1 parent 9ee53a7 commit 59a1906
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 46 deletions.
23 changes: 5 additions & 18 deletions examples/audio-stdin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@
#include <cstring>
#include <cassert>

// Because the original happened to handle OS signals in the same library as
// handled the audio, this is implemented here.
// TODO: split this out to something a bit more coherent

namespace stream_constants {
float S16_TO_F32_SCALE_FACTOR = 0.000030517578125f;
}

audio_stdin::audio_stdin(int len_ms) : audio_async(len_ms) { }

Expand All @@ -25,8 +18,7 @@ passed in needs to already be open, and that the destructor doesn't close it.
bool audio_stdin::init(whisper_params params, int sample_rate) {

audio_async::init(params, sample_rate);
m_audio.resize(0); // resize to reclaim this memory, as it isn't needed (floats buffer filled on the fly)
m_in_buffer.resize((m_sample_rate*m_len_ms)/1000);
m_audio.resize((m_sample_rate*m_len_ms)/1000);

return true;
}
Expand All @@ -49,18 +41,13 @@ void audio_stdin::get(int ms, std::vector<float> & result) {

size_t n_samples = (m_sample_rate * ms) / 1000;

assert(n_samples <= m_in_buffer.size()/sizeof(int16_t));
assert(n_samples <= m_audio.size()/sizeof(int16_t));
// stdin is PCM mono 16khz in s16le format. Use ffmpeg to make that happen.
int nread = read(STDIN_FILENO, m_in_buffer.data(), n_samples*sizeof(int16_t) /*m_in_buffer.size()*/);
int nread = read(STDIN_FILENO, m_audio.data(), n_samples*sizeof(int16_t) /*m_in_buffer.size()*/);
if (nread <= 0) {
m_running = false;
return;
}

int float_sample_count = nread / sizeof(int16_t);
result.resize(float_sample_count);
for (int i = 0; i < float_sample_count; i++) {
result[i] = m_in_buffer[i] * stream_constants::S16_TO_F32_SCALE_FACTOR;
}
}
transfer_buffer(result, 0, nread / sizeof(int16_t));
}
}
6 changes: 1 addition & 5 deletions examples/audio-stdin.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
//
// Stdin wav capture
//
class audio_stdin : public audio_async {
class audio_stdin : public audio_async<int16_t> {
public:
audio_stdin(int len_ms);
~audio_stdin();
Expand All @@ -20,10 +20,6 @@ class audio_stdin : public audio_async {
// get audio data from the circular buffer
// Returns false if the stream's closed.
void get(int ms, std::vector<float> & audio) override;

private:
// Since the data we plan on receiving needs converting, we need somewhere to hold it while we do that
std::vector<int16_t> m_in_buffer;
};

// Return false if need to quit - goes false at eof?
Expand Down
75 changes: 53 additions & 22 deletions examples/common-audioasync.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
#include <thread>
#include <cstring>

namespace audioasync_constants {
const float S16_TO_F32_SCALE_FACTOR = 0.000030517578125f;
}

// command-line parameters
struct whisper_params {
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
Expand Down Expand Up @@ -37,9 +41,9 @@ struct whisper_params {
};

//
// Abstract interface for audio capture
// Abstract class for audio capture
//
class audio_async {
template <typename T = float> class audio_async {
public:
audio_async(int len_ms) {
m_len_ms = len_ms;
Expand All @@ -55,17 +59,17 @@ class audio_async {
return true;
}

virtual bool resume() {
bool resume() {
m_running = true;
return true;
}

virtual bool pause() {
bool pause() {
m_running = false;
return true;
}

virtual bool clear() {
bool clear() {
{
std::lock_guard<std::mutex> lock(m_mutex);

Expand All @@ -75,7 +79,7 @@ class audio_async {
return true;
}

virtual bool is_running() {
bool is_running() {
return m_running;
}

Expand Down Expand Up @@ -107,19 +111,12 @@ class audio_async {
s0 += m_audio.size();
}

if (s0 + n_samples > m_audio.size()) {
const size_t n0 = m_audio.size() - s0;

memcpy(result.data(), &m_audio[s0], n0 * sizeof(float));
memcpy(&result[n0], &m_audio[0], (n_samples - n0) * sizeof(float));
} else {
memcpy(result.data(), &m_audio[s0], n_samples * sizeof(float));
}
transfer_buffer(result, s0, n_samples);
}
}

// callback to be called by audio source
virtual void callback(uint8_t * stream, int len) {
// buffer callback persistence to be called by audio source ingester
void callback(uint8_t * stream, int len) {
if (!m_running) {
return;
}
Expand All @@ -129,7 +126,7 @@ class audio_async {
if (n_samples > m_audio.size()) {
n_samples = m_audio.size();

stream += (len - (n_samples * sizeof(float)));
stream += (len - (n_samples * sizeof(T)));
}

//fprintf(stderr, "%s: %zu samples, pos %zu, len %zu\n", __func__, n_samples, m_audio_pos, m_audio_len);
Expand All @@ -140,13 +137,13 @@ class audio_async {
if (m_audio_pos + n_samples > m_audio.size()) {
const size_t n0 = m_audio.size() - m_audio_pos;

memcpy(&m_audio[m_audio_pos], stream, n0 * sizeof(float));
memcpy(&m_audio[0], stream + n0 * sizeof(float), (n_samples - n0) * sizeof(float));
memcpy(&m_audio[m_audio_pos], stream, n0 * sizeof(T));
memcpy(&m_audio[0], stream + n0 * sizeof(T), (n_samples - n0) * sizeof(T));

m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
m_audio_len = m_audio.size();
} else {
memcpy(&m_audio[m_audio_pos], stream, n_samples * sizeof(float));
memcpy(&m_audio[m_audio_pos], stream, n_samples * sizeof(T));

m_audio_pos = (m_audio_pos + n_samples) % m_audio.size();
m_audio_len = std::min(m_audio_len + n_samples, m_audio.size());
Expand All @@ -161,7 +158,41 @@ class audio_async {
std::atomic_bool m_running;
std::mutex m_mutex;

std::vector<float> m_audio;
std::vector<T> m_audio;
size_t m_audio_pos = 0;
size_t m_audio_len = 0;
};

void transfer_buffer(std::vector<float> & result, int start, int sampleCount);
};

// Fast memcpy transfer for float
template <> inline void audio_async<float>::transfer_buffer(std::vector<float> & result, int start, int sampleCount) {
result.resize(sampleCount);
if (start + sampleCount > m_audio.size()) {
const size_t n0 = m_audio.size() - start;

memcpy(result.data(), &m_audio[start], n0 * sizeof(float));
memcpy(&result[n0], &m_audio[0], (sampleCount - n0) * sizeof(float));
} else {
memcpy(result.data(), &m_audio[start], sampleCount * sizeof(float));
}
}

// Conversion from int16_t to float for s16le buffers
template <> inline void audio_async<int16_t>::transfer_buffer(std::vector<float> & result, int start, int sampleCount) {
result.resize(sampleCount);
if (start + sampleCount > m_audio.size()) {
const size_t n0 = m_audio.size() - start;

for (int i = 0; i < n0; i++) {
result[i] = m_audio[start + i] * audioasync_constants::S16_TO_F32_SCALE_FACTOR;
}
for (int i = n0; i < sampleCount; i++) {
result[i] = m_audio[i] * audioasync_constants::S16_TO_F32_SCALE_FACTOR;
}
} else {
for (int i = 0; i < sampleCount; i++) {
result[i] = m_audio[start + i] * audioasync_constants::S16_TO_F32_SCALE_FACTOR;
}
}
}
2 changes: 1 addition & 1 deletion examples/common-sdl.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
//
// SDL Audio capture
//
class audio_async_sdl : public audio_async {
class audio_async_sdl : public audio_async<float> {
public:
audio_async_sdl(int len_ms);
~audio_async_sdl();
Expand Down

0 comments on commit 59a1906

Please sign in to comment.