From 57b0a4f94fc16a4aba3583266da0d2aa4460420d Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Sat, 30 Aug 2025 10:22:38 +1000 Subject: [PATCH 1/6] Pull upstream RTC changes --- components/third_party/esp-webrtc-solution | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/third_party/esp-webrtc-solution b/components/third_party/esp-webrtc-solution index 7c670a5..0ad48d5 160000 --- a/components/third_party/esp-webrtc-solution +++ b/components/third_party/esp-webrtc-solution @@ -1 +1 @@ -Subproject commit 7c670a52aa4d47dea240ba3610c33286b480940e +Subproject commit 0ad48d537082f9c24d4fc228863b6138e3e78417 From 672e5e531f8699116a994df80f844b5b602bced1 Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Wed, 3 Sep 2025 16:06:59 +1000 Subject: [PATCH 2/6] Update dependencies - Drop dependency on _esp_webrtc_ - Depend on separated _esp_peer_ - Use upstream _esp_capture_ --- components/livekit/CMakeLists.txt | 3 +-- components/livekit/idf_component.yml | 17 ++++------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/components/livekit/CMakeLists.txt b/components/livekit/CMakeLists.txt index cdf6c5f..8c50fea 100644 --- a/components/livekit/CMakeLists.txt +++ b/components/livekit/CMakeLists.txt @@ -9,11 +9,10 @@ idf_component_register( esp_codec_dev esp_netif esp_websocket_client - esp_webrtc + esp_peer json mbedtls media_lib_sal - peer_default webrtc_utils nanopb khash diff --git a/components/livekit/idf_component.yml b/components/livekit/idf_component.yml index 6b107d0..b3a11e7 100644 --- a/components/livekit/idf_component.yml +++ b/components/livekit/idf_component.yml @@ -8,23 +8,14 @@ version: 0.2.0 dependencies: idf: ">=5.4" espressif/esp_websocket_client: ^1.4.0 + espressif/esp_codec_dev: "~1.4" + espressif/esp_capture: "~0.7" + esp_peer: + path: ../third_party/esp-webrtc-solution/components/esp_peer media_lib_sal: path: ../third_party/esp-webrtc-solution/components/media_lib_sal - esp_webrtc: - path: ../third_party/esp-webrtc-solution/components/esp_webrtc - peer_default: - path: ../third_party/esp-webrtc-solution/components/esp_webrtc/impl/peer_default - public: true # Required to prevent linker error webrtc_utils: path: ../third_party/esp-webrtc-solution/components/webrtc_utils - capture_audio_enc: - path: ../third_party/esp-webrtc-solution/components/esp_capture/src/impl/capture_audio_enc - capture_video_enc: - path: ../third_party/esp-webrtc-solution/components/esp_capture/src/impl/capture_video_enc - capture_audio_src: - path: ../third_party/esp-webrtc-solution/components/esp_capture/src/impl/capture_audio_src - capture_video_src: - path: ../third_party/esp-webrtc-solution/components/esp_capture/src/impl/capture_video_src nanopb: path: ../third_party/nanopb khash: From 1396b5e20a74384d5af8622a0f9729d92e7c4b65 Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Wed, 3 Sep 2025 16:08:24 +1000 Subject: [PATCH 3/6] API changes for dependency upgrade --- components/livekit/core/engine.c | 48 +++++----- components/livekit/core/engine.h | 3 - components/livekit/core/livekit.c | 10 +- components/livekit/core/peer.c | 7 +- components/livekit/core/peer.h | 2 - components/livekit/core/system.c | 150 +++++++++++++++--------------- components/livekit/core/system.h | 11 ++- examples/minimal/main/media.c | 34 ++----- examples/voice_agent/main/media.c | 34 ++----- 9 files changed, 129 insertions(+), 170 deletions(-) diff --git a/components/livekit/core/engine.c b/components/livekit/core/engine.c index 429897b..2c1f3d4 100644 --- a/components/livekit/core/engine.c +++ b/components/livekit/core/engine.c @@ -20,6 +20,7 @@ #include "freertos/event_groups.h" #include "media_lib_os.h" #include "esp_codec_dev.h" +#include "esp_capture_sink.h" #include #include #include "esp_log.h" @@ -102,7 +103,7 @@ typedef struct { peer_handle_t sub_peer_handle; esp_codec_dev_handle_t renderer_handle; - esp_capture_path_handle_t capturer_path; + esp_capture_sink_handle_t capturer_path; bool is_media_streaming; char* server_url; @@ -196,24 +197,24 @@ static void on_peer_sub_audio_frame(esp_peer_audio_frame_t* frame, void *ctx) // MARK: - Published media -/// Converts `esp_peer_audio_codec_t` to equivalent `esp_capture_codec_type_t` value. -static inline esp_capture_codec_type_t capture_audio_codec_type(esp_peer_audio_codec_t peer_codec) +/// Converts `esp_peer_audio_codec_t` to equivalent `esp_capture_format_id_t` value. +static inline esp_capture_format_id_t capture_audio_codec_type(esp_peer_audio_codec_t peer_codec) { switch (peer_codec) { - case ESP_PEER_AUDIO_CODEC_G711A: return ESP_CAPTURE_CODEC_TYPE_G711A; - case ESP_PEER_AUDIO_CODEC_G711U: return ESP_CAPTURE_CODEC_TYPE_G711U; - case ESP_PEER_AUDIO_CODEC_OPUS: return ESP_CAPTURE_CODEC_TYPE_OPUS; - default: return ESP_CAPTURE_CODEC_TYPE_NONE; + case ESP_PEER_AUDIO_CODEC_G711A: return ESP_CAPTURE_FMT_ID_G711A; + case ESP_PEER_AUDIO_CODEC_G711U: return ESP_CAPTURE_FMT_ID_G711U; + case ESP_PEER_AUDIO_CODEC_OPUS: return ESP_CAPTURE_FMT_ID_OPUS; + default: return ESP_CAPTURE_FMT_ID_NONE; } } -/// Converts `esp_peer_video_codec_t` to equivalent `esp_capture_codec_type_t` value. -static inline esp_capture_codec_type_t capture_video_codec_type(esp_peer_video_codec_t peer_codec) +/// Converts `esp_peer_video_codec_t` to equivalent `esp_capture_format_id_t` value. +static inline esp_capture_format_id_t capture_video_codec_type(esp_peer_video_codec_t peer_codec) { switch (peer_codec) { - case ESP_PEER_VIDEO_CODEC_H264: return ESP_CAPTURE_CODEC_TYPE_H264; - case ESP_PEER_VIDEO_CODEC_MJPEG: return ESP_CAPTURE_CODEC_TYPE_MJPEG; - default: return ESP_CAPTURE_CODEC_TYPE_NONE; + case ESP_PEER_VIDEO_CODEC_H264: return ESP_CAPTURE_FMT_ID_H264; + case ESP_PEER_VIDEO_CODEC_MJPEG: return ESP_CAPTURE_FMT_ID_MJPEG; + default: return ESP_CAPTURE_FMT_ID_NONE; } } @@ -224,14 +225,14 @@ static inline void _media_stream_send_audio(engine_t *eng) esp_capture_stream_frame_t audio_frame = { .stream_type = ESP_CAPTURE_STREAM_TYPE_AUDIO, }; - while (esp_capture_acquire_path_frame(eng->capturer_path, &audio_frame, true) == ESP_CAPTURE_ERR_OK) { + while (esp_capture_sink_acquire_frame(eng->capturer_path, &audio_frame, true) == ESP_CAPTURE_ERR_OK) { esp_peer_audio_frame_t audio_send_frame = { .pts = audio_frame.pts, .data = audio_frame.data, .size = audio_frame.size, }; peer_send_audio(eng->pub_peer_handle, &audio_send_frame); - esp_capture_release_path_frame(eng->capturer_path, &audio_frame); + esp_capture_sink_release_frame(eng->capturer_path, &audio_frame); } } @@ -242,14 +243,14 @@ static inline void _media_stream_send_video(engine_t *eng) esp_capture_stream_frame_t video_frame = { .stream_type = ESP_CAPTURE_STREAM_TYPE_VIDEO, }; - if (esp_capture_acquire_path_frame(eng->capturer_path, &video_frame, true) == ESP_CAPTURE_ERR_OK) { + if (esp_capture_sink_acquire_frame(eng->capturer_path, &video_frame, true) == ESP_CAPTURE_ERR_OK) { esp_peer_video_frame_t video_send_frame = { .pts = video_frame.pts, .data = video_frame.data, .size = video_frame.size, }; peer_send_video(eng->pub_peer_handle, &video_send_frame); - esp_capture_release_path_frame(eng->capturer_path, &video_frame); + esp_capture_sink_release_frame(eng->capturer_path, &video_frame); } } @@ -276,7 +277,7 @@ static engine_err_t media_stream_begin(engine_t *eng) } media_lib_thread_handle_t handle = NULL; eng->is_media_streaming = true; - if (media_lib_thread_create_from_scheduler(&handle, STREAM_THREAD_NAME, media_stream_task, eng) != ESP_OK) { + if (media_lib_thread_create_from_scheduler(&handle, "lk_eng_stream", media_stream_task, eng) != ESP_OK) { ESP_LOGE(TAG, "Failed to create media stream thread"); eng->is_media_streaming = false; return ENGINE_ERR_MEDIA; @@ -1127,13 +1128,13 @@ engine_handle_t engine_init(const engine_options_t *options) esp_capture_sink_cfg_t sink_cfg = { .audio_info = { - .codec = capture_audio_codec_type(eng->options.media.audio_info.codec), + .format_id = capture_audio_codec_type(eng->options.media.audio_info.codec), .sample_rate = eng->options.media.audio_info.sample_rate, .channel = eng->options.media.audio_info.channel, .bits_per_sample = 16, }, .video_info = { - .codec = capture_video_codec_type(eng->options.media.video_info.codec), + .format_id = capture_video_codec_type(eng->options.media.video_info.codec), .width = eng->options.media.video_info.width, .height = eng->options.media.video_info.height, .fps = eng->options.media.video_info.fps, @@ -1143,17 +1144,18 @@ engine_handle_t engine_init(const engine_options_t *options) // TODO: Can we ensure the renderer is valid? If not, return error. eng->renderer_handle = options->media.renderer; } - if (esp_capture_setup_path( + + if (esp_capture_sink_setup( eng->options.media.capturer, - ESP_CAPTURE_PATH_PRIMARY, + 0, // Path index &sink_cfg, &eng->capturer_path ) != ESP_CAPTURE_ERR_OK) { goto _init_failed; } - if (esp_capture_enable_path( + if (esp_capture_sink_enable( eng->capturer_path, - ESP_CAPTURE_RUN_TYPE_ALWAYS + ESP_CAPTURE_RUN_MODE_ALWAYS ) != ESP_CAPTURE_ERR_OK) { goto _init_failed; } diff --git a/components/livekit/core/engine.h b/components/livekit/core/engine.h index a512f69..f9956c1 100644 --- a/components/livekit/core/engine.h +++ b/components/livekit/core/engine.h @@ -17,7 +17,6 @@ #pragma once #include "esp_peer.h" -#include "esp_peer_signaling.h" #include "esp_capture.h" #include "av_render.h" @@ -25,8 +24,6 @@ #include "common.h" #include "protocol.h" -#define STREAM_THREAD_NAME "lk_stream" - #ifdef __cplusplus extern "C" { #endif diff --git a/components/livekit/core/livekit.c b/components/livekit/core/livekit.c index c4296fa..9e6b9fa 100644 --- a/components/livekit/core/livekit.c +++ b/components/livekit/core/livekit.c @@ -180,8 +180,8 @@ livekit_err_t livekit_room_create(livekit_room_handle_t *handle, const livekit_r if (handle == NULL || options == NULL) { return LIVEKIT_ERR_INVALID_ARG; } - if (!system_is_media_lib_setup()) { - ESP_LOGE(TAG, "Must perform system initialization before creating a room"); + if (!system_init_is_done()) { + ESP_LOGE(TAG, "System initialization not performed or failed"); return LIVEKIT_ERR_SYSTEM_INIT; } @@ -412,8 +412,10 @@ livekit_err_t livekit_room_rpc_unregister(livekit_room_handle_t handle, const ch livekit_err_t livekit_system_init(void) { - if (!system_setup_media_lib()) { - return LIVEKIT_ERR_SYSTEM_INIT; + esp_err_t ret = system_init(); + if (ret != ESP_OK) { + ESP_LOGE(TAG, "System initialization failed"); + return ret; } return LIVEKIT_ERR_NONE; } \ No newline at end of file diff --git a/components/livekit/core/peer.c b/components/livekit/core/peer.c index 8b32417..83fbed2 100644 --- a/components/livekit/core/peer.c +++ b/components/livekit/core/peer.c @@ -18,8 +18,6 @@ #include "esp_log.h" #include "esp_peer.h" #include "esp_peer_default.h" -#include "esp_peer_signaling.h" -#include "esp_webrtc_defaults.h" #include "media_lib_os.h" #include "esp_codec_dev.h" #include "utils.h" @@ -30,9 +28,6 @@ static const char *SUB_TAG = "livekit_peer.sub"; static const char *PUB_TAG = "livekit_peer.pub"; #define TAG(peer) (peer->options.role == PEER_ROLE_SUBSCRIBER ? SUB_TAG : PUB_TAG) -#define SUB_THREAD_NAME (PEER_THREAD_NAME_PREFIX "sub") -#define PUB_THREAD_NAME (PEER_THREAD_NAME_PREFIX "pub") - #define RELIABLE_CHANNEL_LABEL "_reliable" #define LOSSY_CHANNEL_LABEL "_lossy" #define STREAM_ID_INVALID 0xFFFF @@ -364,7 +359,7 @@ peer_err_t peer_connect(peer_handle_t handle) peer->running = true; media_lib_thread_handle_t thread; const char* thread_name = peer->options.role == PEER_ROLE_SUBSCRIBER ? - SUB_THREAD_NAME : PUB_THREAD_NAME; + "lk_peer_sub" : "lk_peer_pub"; if (media_lib_thread_create_from_scheduler(&thread, thread_name, peer_task, peer) != ESP_PEER_ERR_NONE) { ESP_LOGE(TAG(peer), "Failed to create thread"); return PEER_ERR_RTC; diff --git a/components/livekit/core/peer.h b/components/livekit/core/peer.h index 3b18886..526617e 100644 --- a/components/livekit/core/peer.h +++ b/components/livekit/core/peer.h @@ -19,8 +19,6 @@ #include "common.h" #include "protocol.h" -#define PEER_THREAD_NAME_PREFIX "lk_peer_" - #ifdef __cplusplus extern "C" { #endif diff --git a/components/livekit/core/system.c b/components/livekit/core/system.c index 251c796..90308ee 100644 --- a/components/livekit/core/system.c +++ b/components/livekit/core/system.c @@ -14,99 +14,95 @@ * limitations under the License. */ -#include -#include "esp_log.h" -#include "webrtc_utils_time.h" +#include "esp_capture.h" #include "media_lib_os.h" #include "media_lib_adapter.h" -#include "system.h" -#include "peer.h" -#include "engine.h" -#define VIDEO_ENCODE_THREAD_NAME "venc" -#define AUDIO_ENCODE_THREAD_NAME "aenc" -#define AUDIO_DECODE_THREAD_NAME "Adec" -#define AEC_SRC_READ_THREAD_NAME "SrcRead" -#define AEC_BUFFER_IN_THREAD_NAME "buffer_in" +#include "system.h" -static const char *TAG = "livekit_system"; -static bool is_media_lib_setup = false; +// MARK: - Thread schedulers -static void thread_scheduler(const char *thread_name, media_lib_thread_cfg_t *thread_cfg) +/// Thread scheduler for `media_lib_sal`. +static void media_lib_scheduler(const char *name, media_lib_thread_cfg_t *cfg) { - ESP_LOGD(TAG, "Scheduling thread '%s'", thread_name); - - // LiveKit threads - if (strncmp(thread_name, PEER_THREAD_NAME_PREFIX, strlen(PEER_THREAD_NAME_PREFIX)) == 0) { - thread_cfg->stack_size = 25 * 1024; - thread_cfg->priority = 18; - thread_cfg->core_id = 1; - return; - } - if (strcmp(thread_name, STREAM_THREAD_NAME) == 0) { - thread_cfg->stack_size = 4 * 1024; - thread_cfg->priority = 15; - thread_cfg->core_id = 1; - return; - } + // Thread names by components: + // esp_capture: venc_0, aenc_0, buffer_in, AUD_SRC + // av_render: Adec, ARender + // livekit: lk_peer_sub, lk_peer_pub, lk_eng_stream - // Media lib threads - if (strcmp(thread_name, AUDIO_DECODE_THREAD_NAME) == 0) { - thread_cfg->stack_size = 40 * 1024; - thread_cfg->priority = 10; - thread_cfg->core_id = 1; - return; - } - if (strcmp(thread_name, AUDIO_ENCODE_THREAD_NAME) == 0) { - // Required for Opus - thread_cfg->stack_size = 40 * 1024; - thread_cfg->priority = 10; - return; - } - if (strcmp(thread_name, AEC_SRC_READ_THREAD_NAME) == 0) { - thread_cfg->stack_size = 40 * 1024; - thread_cfg->priority = 16; - thread_cfg->core_id = 0; - return; - } - if (strcmp(thread_name, AEC_BUFFER_IN_THREAD_NAME) == 0) { - thread_cfg->stack_size = 6 * 1024; - thread_cfg->priority = 10; - thread_cfg->core_id = 0; - return; - } - if (strcmp(thread_name, VIDEO_ENCODE_THREAD_NAME) == 0) { + if (strcmp(name, "venc_0") == 0) { #if CONFIG_IDF_TARGET_ESP32S3 - thread_cfg->stack_size = 20 * 1024; + // Large stack size required for H264 when not using a hardware encoder + cfg->stack_size = 20 * 1024; #endif - thread_cfg->priority = 10; - return; + cfg->priority = 10; + } else if (strcmp(name, "aenc_0") == 0) { + // Large stack size required for Opus + cfg->stack_size = 40 * 1024; + cfg->priority = 10; + cfg->core_id = 1; + } else if (strcmp(name, "buffer_in") == 0) { + cfg->stack_size = 6 * 1024; + cfg->priority = 10; + cfg->core_id = 0; + } else if (strcmp(name, "AUD_SRC") == 0) { + cfg->stack_size = 40 * 1024; + cfg->priority = 15; + } else if (strcmp(name, "lk_peer_sub") == 0 || strcmp(name, "lk_peer_pub") == 0) { + cfg->stack_size = 25 * 1024; + cfg->priority = 18; + cfg->core_id = 1; + } else if (strcmp(name, "lk_eng_stream") == 0) { + cfg->stack_size = 4 * 1024; + cfg->priority = 15; + cfg->core_id = 1; + } else if (strcmp(name, "Adec") == 0) { + cfg->stack_size = 40 * 1024; + cfg->priority = 15; + cfg->core_id = 0; + } else if (strcmp(name, "ARender") == 0) { + cfg->priority = 20; } } -bool system_setup_media_lib(void) +/// Thread scheduler for `esp_capture`. +static void capture_scheduler(const char *name, esp_capture_thread_schedule_cfg_t *cfg) { - esp_err_t ret = media_lib_add_default_adapter(); - if (ret != ESP_OK) { - ESP_LOGE(TAG, "Failed to setup media lib"); - return false; - } - media_lib_thread_set_schedule_cb(thread_scheduler); - is_media_lib_setup = true; - return true; + media_lib_thread_cfg_t media_lib_cfg = { + .stack_size = cfg->stack_size, + .priority = cfg->priority, + .core_id = cfg->core_id, + }; + media_lib_scheduler(name, &media_lib_cfg); + + cfg->stack_in_ext = true; + cfg->stack_size = media_lib_cfg.stack_size; + cfg->priority = media_lib_cfg.priority; + cfg->core_id = media_lib_cfg.core_id; } -bool system_is_media_lib_setup(void) +// MARK: - Public API + +static bool init_performed = false; + +esp_err_t system_init(void) { - return is_media_lib_setup; + if (init_performed) { + return ESP_OK; + } + esp_err_t ret = media_lib_add_default_adapter(); + if (ret != ESP_OK) return ret; + + ret = esp_capture_set_thread_scheduler(capture_scheduler); + if (ret != ESP_OK) return ret; + + media_lib_thread_set_schedule_cb(media_lib_scheduler); + + init_performed = true; + return ESP_OK; } -bool system_sync_time(void) +bool system_init_is_done(void) { - esp_err_t ret = webrtc_utils_time_sync_init(); - if (ret != ESP_OK) { - ESP_LOGE(TAG, "Failed to sync time"); - return false; - } - return true; -} \ No newline at end of file + return init_performed; +} diff --git a/components/livekit/core/system.h b/components/livekit/core/system.h index 7310719..b654c5a 100644 --- a/components/livekit/core/system.h +++ b/components/livekit/core/system.h @@ -16,14 +16,19 @@ #pragma once +#include +#include "esp_err.h" + #ifdef __cplusplus extern "C" { #endif -bool system_setup_media_lib(void); -bool system_is_media_lib_setup(void); +/// Performs one time system initialization tasks. +esp_err_t system_init(void); -bool system_sync_time(void); +/// Returns whether system initialization has been performed +/// with @ref system_init. +bool system_init_is_done(void); #ifdef __cplusplus } diff --git a/examples/minimal/main/media.c b/examples/minimal/main/media.c index b1e42f1..9bd9057 100644 --- a/examples/minimal/main/media.c +++ b/examples/minimal/main/media.c @@ -1,12 +1,11 @@ #include "esp_check.h" #include "esp_log.h" #include "codec_init.h" -#include "esp_capture_path_simple.h" -#include "esp_capture_audio_enc.h" #include "av_render_default.h" #include "esp_audio_dec_default.h" #include "esp_audio_enc_default.h" #include "esp_capture_defaults.h" +#include "esp_capture_sink.h" #include "media.h" @@ -16,15 +15,13 @@ static const char *TAG = "media"; ESP_RETURN_ON_FALSE(pointer != NULL, -1, TAG, message) typedef struct { - esp_capture_aenc_if_t *audio_encoder; + esp_capture_sink_handle_t capturer_handle; esp_capture_audio_src_if_t *audio_source; - esp_capture_path_if_t *capture_path; - esp_capture_path_handle_t capturer_handle; } capture_system_t; typedef struct { audio_render_handle_t audio_renderer; - av_render_handle_t av_renderer_handle; + av_render_handle_t av_renderer_handle; } renderer_system_t; static capture_system_t capturer_system; @@ -32,11 +29,6 @@ static renderer_system_t renderer_system; static int build_capturer_system(void) { - // 1. Create audio encoder - capturer_system.audio_encoder = esp_capture_new_audio_encoder(); - NULL_CHECK(capturer_system.audio_encoder, "Failed to create audio encoder"); - - // 2. Create audio source esp_codec_dev_handle_t record_handle = get_record_handle(); NULL_CHECK(record_handle, "Failed to get record handle"); @@ -48,18 +40,9 @@ static int build_capturer_system(void) capturer_system.audio_source = esp_capture_new_audio_aec_src(&codec_cfg); NULL_CHECK(capturer_system.audio_source, "Failed to create audio source"); - // 3. Create capture path - esp_capture_simple_path_cfg_t path_cfg = { - .aenc = capturer_system.audio_encoder, - }; - capturer_system.capture_path = esp_capture_build_simple_path(&path_cfg); - NULL_CHECK(capturer_system.capture_path, "Failed to create capture path"); - - // 4. Create capture system esp_capture_cfg_t cfg = { .sync_mode = ESP_CAPTURE_SYNC_MODE_AUDIO, - .audio_src = capturer_system.audio_source, - .capture_path = capturer_system.capture_path, + .audio_src = capturer_system.audio_source }; esp_capture_open(&cfg, &capturer_system.capturer_handle); NULL_CHECK(capturer_system.capturer_handle, "Failed to open capture system"); @@ -68,9 +51,11 @@ static int build_capturer_system(void) static int build_renderer_system(void) { - // 1. Create audio renderer + esp_codec_dev_handle_t render_device = get_playback_handle(); + NULL_CHECK(render_device, "Failed to get render device handle"); + i2s_render_cfg_t i2s_cfg = { - .play_handle = get_playback_handle() + .play_handle = render_device }; renderer_system.audio_renderer = av_render_alloc_i2s_render(&i2s_cfg); NULL_CHECK(renderer_system.audio_renderer, "Failed to create I2S renderer"); @@ -78,8 +63,6 @@ static int build_renderer_system(void) // Set initial speaker volume esp_codec_dev_set_out_vol(i2s_cfg.play_handle, CONFIG_DEFAULT_PLAYBACK_VOL); - // 2. Create AV renderer - // For this example, this only includes an audio renderer. av_render_cfg_t render_cfg = { .audio_render = renderer_system.audio_renderer, .audio_raw_fifo_size = 8 * 4096, @@ -89,7 +72,6 @@ static int build_renderer_system(void) renderer_system.av_renderer_handle = av_render_open(&render_cfg); NULL_CHECK(renderer_system.av_renderer_handle, "Failed to create AV renderer"); - // 3. Set frame info av_render_audio_frame_info_t frame_info = { .sample_rate = 16000, .channel = 2, diff --git a/examples/voice_agent/main/media.c b/examples/voice_agent/main/media.c index b1e42f1..9bd9057 100644 --- a/examples/voice_agent/main/media.c +++ b/examples/voice_agent/main/media.c @@ -1,12 +1,11 @@ #include "esp_check.h" #include "esp_log.h" #include "codec_init.h" -#include "esp_capture_path_simple.h" -#include "esp_capture_audio_enc.h" #include "av_render_default.h" #include "esp_audio_dec_default.h" #include "esp_audio_enc_default.h" #include "esp_capture_defaults.h" +#include "esp_capture_sink.h" #include "media.h" @@ -16,15 +15,13 @@ static const char *TAG = "media"; ESP_RETURN_ON_FALSE(pointer != NULL, -1, TAG, message) typedef struct { - esp_capture_aenc_if_t *audio_encoder; + esp_capture_sink_handle_t capturer_handle; esp_capture_audio_src_if_t *audio_source; - esp_capture_path_if_t *capture_path; - esp_capture_path_handle_t capturer_handle; } capture_system_t; typedef struct { audio_render_handle_t audio_renderer; - av_render_handle_t av_renderer_handle; + av_render_handle_t av_renderer_handle; } renderer_system_t; static capture_system_t capturer_system; @@ -32,11 +29,6 @@ static renderer_system_t renderer_system; static int build_capturer_system(void) { - // 1. Create audio encoder - capturer_system.audio_encoder = esp_capture_new_audio_encoder(); - NULL_CHECK(capturer_system.audio_encoder, "Failed to create audio encoder"); - - // 2. Create audio source esp_codec_dev_handle_t record_handle = get_record_handle(); NULL_CHECK(record_handle, "Failed to get record handle"); @@ -48,18 +40,9 @@ static int build_capturer_system(void) capturer_system.audio_source = esp_capture_new_audio_aec_src(&codec_cfg); NULL_CHECK(capturer_system.audio_source, "Failed to create audio source"); - // 3. Create capture path - esp_capture_simple_path_cfg_t path_cfg = { - .aenc = capturer_system.audio_encoder, - }; - capturer_system.capture_path = esp_capture_build_simple_path(&path_cfg); - NULL_CHECK(capturer_system.capture_path, "Failed to create capture path"); - - // 4. Create capture system esp_capture_cfg_t cfg = { .sync_mode = ESP_CAPTURE_SYNC_MODE_AUDIO, - .audio_src = capturer_system.audio_source, - .capture_path = capturer_system.capture_path, + .audio_src = capturer_system.audio_source }; esp_capture_open(&cfg, &capturer_system.capturer_handle); NULL_CHECK(capturer_system.capturer_handle, "Failed to open capture system"); @@ -68,9 +51,11 @@ static int build_capturer_system(void) static int build_renderer_system(void) { - // 1. Create audio renderer + esp_codec_dev_handle_t render_device = get_playback_handle(); + NULL_CHECK(render_device, "Failed to get render device handle"); + i2s_render_cfg_t i2s_cfg = { - .play_handle = get_playback_handle() + .play_handle = render_device }; renderer_system.audio_renderer = av_render_alloc_i2s_render(&i2s_cfg); NULL_CHECK(renderer_system.audio_renderer, "Failed to create I2S renderer"); @@ -78,8 +63,6 @@ static int build_renderer_system(void) // Set initial speaker volume esp_codec_dev_set_out_vol(i2s_cfg.play_handle, CONFIG_DEFAULT_PLAYBACK_VOL); - // 2. Create AV renderer - // For this example, this only includes an audio renderer. av_render_cfg_t render_cfg = { .audio_render = renderer_system.audio_renderer, .audio_raw_fifo_size = 8 * 4096, @@ -89,7 +72,6 @@ static int build_renderer_system(void) renderer_system.av_renderer_handle = av_render_open(&render_cfg); NULL_CHECK(renderer_system.av_renderer_handle, "Failed to create AV renderer"); - // 3. Set frame info av_render_audio_frame_info_t frame_info = { .sample_rate = 16000, .channel = 2, From 397868f7b3ba9f5c3b4390cb2d3730fec84ab99a Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Wed, 3 Sep 2025 16:08:46 +1000 Subject: [PATCH 4/6] Example sdkconfig change --- examples/minimal/sdkconfig.defaults | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/minimal/sdkconfig.defaults b/examples/minimal/sdkconfig.defaults index b40466a..3235a6b 100644 --- a/examples/minimal/sdkconfig.defaults +++ b/examples/minimal/sdkconfig.defaults @@ -23,7 +23,7 @@ CONFIG_MBEDTLS_SSL_DTLS_SRTP=y CONFIG_MBEDTLS_SSL_PROTO_DTLS=y CONFIG_PARTITION_TABLE_CUSTOM=y CONFIG_SPIRAM_FETCH_INSTRUCTIONS=y -CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=1024 +CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=256 CONFIG_SPIRAM_MALLOC_RESERVE_INTERNAL=8192 CONFIG_SPIRAM_MODE_OCT=y CONFIG_SPIRAM_SPEED_80M=y From c553c7876bc6292c83851ea6c8a7d389facd3dc1 Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Thu, 4 Sep 2025 10:30:54 +1000 Subject: [PATCH 5/6] Use esp_peer from registry --- components/livekit/idf_component.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/components/livekit/idf_component.yml b/components/livekit/idf_component.yml index b3a11e7..dc96149 100644 --- a/components/livekit/idf_component.yml +++ b/components/livekit/idf_component.yml @@ -10,8 +10,7 @@ dependencies: espressif/esp_websocket_client: ^1.4.0 espressif/esp_codec_dev: "~1.4" espressif/esp_capture: "~0.7" - esp_peer: - path: ../third_party/esp-webrtc-solution/components/esp_peer + espressif/esp_peer: ^1.2.3 media_lib_sal: path: ../third_party/esp-webrtc-solution/components/media_lib_sal webrtc_utils: From e8af7a711256693537ccdf64f488a43992a8b079 Mon Sep 17 00:00:00 2001 From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com> Date: Thu, 4 Sep 2025 11:12:08 +1000 Subject: [PATCH 6/6] Remove dependency on webrtc_utils --- components/livekit/CMakeLists.txt | 1 - components/livekit/idf_component.yml | 2 -- 2 files changed, 3 deletions(-) diff --git a/components/livekit/CMakeLists.txt b/components/livekit/CMakeLists.txt index 8c50fea..61b5a0e 100644 --- a/components/livekit/CMakeLists.txt +++ b/components/livekit/CMakeLists.txt @@ -13,7 +13,6 @@ idf_component_register( json mbedtls media_lib_sal - webrtc_utils nanopb khash ) diff --git a/components/livekit/idf_component.yml b/components/livekit/idf_component.yml index dc96149..45e76dc 100644 --- a/components/livekit/idf_component.yml +++ b/components/livekit/idf_component.yml @@ -13,8 +13,6 @@ dependencies: espressif/esp_peer: ^1.2.3 media_lib_sal: path: ../third_party/esp-webrtc-solution/components/media_lib_sal - webrtc_utils: - path: ../third_party/esp-webrtc-solution/components/webrtc_utils nanopb: path: ../third_party/nanopb khash: