livekit · ladvoc · Oct 10, 2025 · Oct 10, 2025
diff --git a/README.md b/README.md
@@ -24,7 +24,7 @@ Use this SDK to add realtime video, audio and data features to your ESP32 projec
 
 - **Supported chips**: ESP32-S3 and ESP32-P4
 - **Bidirectional audio**: Opus encoding, acoustic echo cancellation (AEC)
-- **Bidirectional video**: *video support coming soon*
+- **Video publishing**: H.264 encoding, subscribing coming soon
 - **AI Agents**: interact with agents in the cloud built with [LiveKit Agents](https://docs.livekit.io/agents/)
 - **Real-time data**: data packets, remote method calls (RPC)
 
@@ -38,6 +38,7 @@ One of the best ways to get started with LiveKit is by reviewing the [examples](
 
 - [**Voice AI Agent**](./components/livekit/examples/voice_agent/README.md): Conversational AI voice agent that interacts with hardware based on user requests.
 - [**Minimal**](./components/livekit/examples/minimal/README.md): Basic example of connecting to a LiveKit room with bidirectional audio.
+- [**Minimal Video**](./components/livekit/examples/minimal_video/README.md): Equivalent to the minimal example with video publishing.
 
 Once you have chosen an example to be your starting point, create a fresh project from it locally using the following command:
 

diff --git a/components/livekit/README.md b/components/livekit/README.md
@@ -10,6 +10,6 @@ Use this SDK to add realtime video, audio and data features to your ESP32 projec
 
 - **Supported chips**: ESP32-S3 and ESP32-P4
 - **Bidirectional audio**: Opus encoding, acoustic echo cancellation (AEC)
-- **Bidirectional video**: *video support coming soon*
+- **Video publishing**: H.264 encoding, subscribing coming soon
 - **AI Agents**: interact with agents in the cloud built with [LiveKit Agents](https://docs.livekit.io/agents/)
 - **Real-time data**: data packets, remote method calls (RPC)
diff --git a/components/livekit/core/engine.c b/components/livekit/core/engine.c
@@ -329,9 +329,11 @@ static engine_err_t send_add_video_track(engine_t *eng)
         .type = LIVEKIT_PB_TRACK_TYPE_VIDEO,
         .source = LIVEKIT_PB_TRACK_SOURCE_CAMERA,
         .muted = false,
+        .width = video_layer.width,
+        .height = video_layer.height,
         .layers_count = 1,
         .layers = { video_layer },
-        .audio_features_count = 0
+        .backup_codec_policy = LIVEKIT_PB_BACKUP_CODEC_POLICY_REGRESSION
     };
 
     if (signal_send_add_track(eng->signal_handle, &req) != SIGNAL_ERR_NONE) {
@@ -341,36 +343,21 @@ static engine_err_t send_add_video_track(engine_t *eng)
     return ENGINE_ERR_NONE;
 }
 
-/// Begins media streaming and sends add track requests.
-static engine_err_t publish_tracks(engine_t *eng)
+/// Send add track requests based on the media options.
+///
+/// Note: SFU expects add track request before publisher peer offer is sent.
+///
+static engine_err_t send_add_track_requests(engine_t *eng)
 {
-    if (eng->options.media.audio_info.codec == ESP_PEER_AUDIO_CODEC_NONE &&
-        eng->options.media.video_info.codec == ESP_PEER_VIDEO_CODEC_NONE) {
-        ESP_LOGI(TAG, "No media tracks to publish");
-        return ENGINE_ERR_NONE;
+    if (eng->options.media.audio_info.codec != ESP_PEER_AUDIO_CODEC_NONE &&
+        send_add_audio_track(eng) != ENGINE_ERR_NONE) {
+        return ENGINE_ERR_SIGNALING;
     }
-
-    int ret = ENGINE_ERR_OTHER;
-    do {
-        if (media_stream_begin(eng) != ENGINE_ERR_NONE) {
-            ret = ENGINE_ERR_MEDIA;
-            break;
-        }
-        if (eng->options.media.audio_info.codec != ESP_PEER_AUDIO_CODEC_NONE &&
-            send_add_audio_track(eng) != ENGINE_ERR_NONE) {
-            ret = ENGINE_ERR_SIGNALING;
-            break;
-        }
-        if (eng->options.media.video_info.codec != ESP_PEER_VIDEO_CODEC_NONE &&
-            send_add_video_track(eng) != ENGINE_ERR_NONE) {
-            ret = ENGINE_ERR_SIGNALING;
-            break;
-        }
-        return ENGINE_ERR_NONE;
-    } while (0);
-
-    media_stream_end(eng);
-    return ret;
+    if (eng->options.media.video_info.codec != ESP_PEER_VIDEO_CODEC_NONE &&
+        send_add_video_track(eng) != ENGINE_ERR_NONE) {
+        return ENGINE_ERR_SIGNALING;
+    }
+    return ENGINE_ERR_NONE;
 }
 
 // MARK: - Signal event handlers
@@ -420,6 +407,7 @@ static void on_peer_sdp(const char *sdp, peer_role_t role, void *ctx)
     event_enqueue(eng, &ev, false);
 }
 
+
 static bool on_peer_data_packet(livekit_pb_data_packet_t* packet, void *ctx)
 {
     engine_t *eng = (engine_t *)ctx;
@@ -833,7 +821,9 @@ static bool handle_state_connecting(engine_t *eng, const engine_event_t *ev)
             break;
         case EV_SIG_STATE:
             signal_state_t sig_state = ev->detail.sig_state;
-            if (sig_state == SIGNAL_STATE_DISCONNECTED) {
+            if (sig_state == SIGNAL_STATE_CONNECTED) {
+                send_add_track_requests(eng);
+            } else if(sig_state == SIGNAL_STATE_DISCONNECTED) {
                 eng->failure_reason = LIVEKIT_FAILURE_REASON_OTHER;
                 eng->state = ENGINE_STATE_BACKOFF;
             } else if (sig_state & SIGNAL_STATE_FAILED_ANY) {
@@ -886,7 +876,7 @@ static bool handle_state_connected(engine_t *eng, const engine_event_t *ev)
         case _EV_STATE_ENTER:
             eng->retry_count = 0;
             eng->failure_reason = LIVEKIT_FAILURE_REASON_NONE;
-            publish_tracks(eng);
+            media_stream_begin(eng);
             break;
         case EV_CMD_CLOSE:
             signal_send_leave(eng->signal_handle);
@@ -1066,6 +1056,45 @@ static void engine_task(void *arg)
     vTaskDelete(NULL);
 }
 
+static engine_err_t enable_capture_sink(engine_t *eng)
+{
+    esp_capture_sink_cfg_t sink_cfg = {
+        .audio_info = {
+            .format_id = capture_audio_codec_type(eng->options.media.audio_info.codec),
+            .sample_rate = eng->options.media.audio_info.sample_rate,
+            .channel = eng->options.media.audio_info.channel,
+            .bits_per_sample = 16,
+        },
+        .video_info = {
+            .format_id = ESP_CAPTURE_FMT_ID_H264,
+            .width = (uint16_t)eng->options.media.video_info.width,
+            .height = (uint16_t)eng->options.media.video_info.height,
+            .fps = (uint8_t)eng->options.media.video_info.fps,
+        },
+    };
+
+    if (esp_capture_sink_setup(
+        eng->options.media.capturer,
+        0, // Path index
+        &sink_cfg,
+        &eng->capturer_path
+    ) != ESP_CAPTURE_ERR_OK) {
+        ESP_LOGE(TAG, "Capture sink setup failed");
+        return ENGINE_ERR_MEDIA;
+    }
+
+    // TODO: Add muxer
+
+    if (esp_capture_sink_enable(
+        eng->capturer_path,
+        ESP_CAPTURE_RUN_MODE_ALWAYS
+    ) != ESP_CAPTURE_ERR_OK) {
+        ESP_LOGE(TAG, "Capture sink enable failed");
+        return ENGINE_ERR_MEDIA;
+    }
+    return ENGINE_ERR_NONE;
+}
+
 // MARK: - Public API
 
 engine_handle_t engine_init(const engine_options_t *options)
@@ -1117,38 +1146,9 @@ engine_handle_t engine_init(const engine_options_t *options)
     if (eng->signal_handle == NULL) {
         goto _init_failed;
     }
+    eng->renderer_handle = options->media.renderer;
 
-    esp_capture_sink_cfg_t sink_cfg = {
-        .audio_info = {
-            .format_id = capture_audio_codec_type(eng->options.media.audio_info.codec),
-            .sample_rate = eng->options.media.audio_info.sample_rate,
-            .channel = eng->options.media.audio_info.channel,
-            .bits_per_sample = 16,
-        },
-        .video_info = {
-            .format_id = capture_video_codec_type(eng->options.media.video_info.codec),
-            .width = (uint16_t)eng->options.media.video_info.width,
-            .height = (uint16_t)eng->options.media.video_info.height,
-            .fps = (uint8_t)eng->options.media.video_info.fps,
-        },
-    };
-    if (options->media.audio_info.codec != ESP_PEER_AUDIO_CODEC_NONE) {
-        // TODO: Can we ensure the renderer is valid? If not, return error.
-        eng->renderer_handle = options->media.renderer;
-    }
-
-    if (esp_capture_sink_setup(
-        eng->options.media.capturer,
-        0, // Path index
-        &sink_cfg,
-        &eng->capturer_path
-    ) != ESP_CAPTURE_ERR_OK) {
-        goto _init_failed;
-    }
-    if (esp_capture_sink_enable(
-        eng->capturer_path,
-        ESP_CAPTURE_RUN_MODE_ALWAYS
-    ) != ESP_CAPTURE_ERR_OK) {
+    if (enable_capture_sink(eng) != ENGINE_ERR_NONE) {
         goto _init_failed;
     }
     return eng;

diff --git a/components/livekit/examples/minimal_video/.gitignore b/components/livekit/examples/minimal_video/.gitignore
@@ -0,0 +1,7 @@
+# IDF
+**/sdkconfig
+**/sdkconfig.old
+**/build
+**/managed_components
+**/dependencies.lock
+**/dist
diff --git a/components/livekit/examples/minimal_video/CMakeLists.txt b/components/livekit/examples/minimal_video/CMakeLists.txt
@@ -0,0 +1,6 @@
+# The following lines of boilerplate have to be in your project's CMakeLists
+# in this exact order for cmake to work correctly
+cmake_minimum_required(VERSION 3.5)
+set(COMPONENTS main) # Trim build
+include($ENV{IDF_PATH}/tools/cmake/project.cmake)
+project(minimal_video)
diff --git a/components/livekit/examples/minimal_video/README.md b/components/livekit/examples/minimal_video/README.md
@@ -0,0 +1,61 @@
+# Minimal Video
+
+Basic example of connecting to a LiveKit room with bidirectional audio and video publishing.
+
+## Configuration
+
+> [!TIP]
+> Options can either be set through *menuconfig* or added to *sdkconfig* as shown below.
+
+### Credentials
+
+**Option A**: Use a LiveKit Sandbox to get up and running quickly. Setup a LiveKit Sandbox from your [Cloud Project](https://cloud.livekit.io/projects/p_/sandbox), and use its ID in your configuration:
+
+```ini
+CONFIG_LK_EXAMPLE_USE_SANDBOX=y
+CONFIG_LK_EXAMPLE_SANDBOX_ID="my-project-xxxxxx"
+```
+
+**Option B**: Specify a server URL and pregenerated token:
+
+```ini
+CONFIG_LK_EXAMPLE_USE_PREGENERATED=y
+CONFIG_LK_EXAMPLE_TOKEN="your-jwt-token"
+CONFIG_LK_EXAMPLE_SERVER_URL="ws://localhost:7880"
+```
+
+### Network
+
+Connect using WiFi as follows:
+
+```ini
+CONFIG_LK_EXAMPLE_USE_WIFI=y
+CONFIG_LK_EXAMPLE_WIFI_SSID="<your SSID>"
+CONFIG_LK_EXAMPLE_WIFI_PASSWORD="<your password>"
+```
+
+### Development Board
+
+This example uses the Espressif [*codec_board*](https://components.espressif.com/components/tempotian/codec_board/) component to access board-specific peripherals for media capture and rendering. Supported boards are [defined here](https://github.com/espressif/esp-webrtc-solution/blob/65d13427dd83c37264b6cff966d60af0f84f649c/components/codec_board/board_cfg.txt). Locate the name of your board, and set it as follows:
+
+```ini
+CONFIG_LK_EXAMPLE_CODEC_BOARD_TYPE="ESP32_P4_DEV_V14"
+```
+
+## Build & Flash
+
+Navigate to this directory in your terminal. Run the following command to build your application, flash it to your board, and monitor serial output:
+
+```sh
+idf.py flash monitor
+```
+
+Once running, the example will establish a network connection, connect to a LiveKit room, and print the following message:
+
+```txt
+I (19508) livekit_example: Room state: Connected
+```
+
+## Next Steps
+
+With a room connection established, you can connect another client (another ESP32, [LiveKit Meet](https://meet.livekit.io), etc.) or dispatch an [agent](https://docs.livekit.io/agents/) to talk with.
diff --git a/components/livekit/examples/minimal_video/main/CMakeLists.txt b/components/livekit/examples/minimal_video/main/CMakeLists.txt
@@ -0,0 +1,2 @@
+idf_component_register(SRCS "main.c" "example.c" "board.c" "media.c"
+                       INCLUDE_DIRS ".")
diff --git a/components/livekit/examples/minimal_video/main/Kconfig.projbuild b/components/livekit/examples/minimal_video/main/Kconfig.projbuild
@@ -0,0 +1,60 @@
+menu "LiveKit Example"
+
+    config LK_EXAMPLE_CODEC_BOARD_TYPE
+        string "Codec board type"
+        default "DUMMY_CODEC_BOARD"
+        help
+            The model of dev board you are using. See board_cfg.txt from the codec
+            board component for a list of supported boards.
+
+    choice LK_EXAMPLE_CONNECTION_METHOD
+        prompt "Choose room connection method"
+        help
+            Choose how to connect to the room in the example.
+
+        config LK_EXAMPLE_USE_SANDBOX
+            bool "Sandbox token"
+            help
+                Use a sandbox token server for room authentication.
+
+        config LK_EXAMPLE_USE_PREGENERATED
+            bool "Pre-generated token"
+            help
+                Use a pre-generated token and server URL for room connection.
+    endchoice
+
+    config LK_EXAMPLE_SERVER_URL
+        depends on LK_EXAMPLE_USE_PREGENERATED
+        string "Server URL"
+        default "ws://localhost:7880"
+        help
+            The server URL to use for room connection.
+    config LK_EXAMPLE_TOKEN
+        depends on LK_EXAMPLE_USE_PREGENERATED
+        string "Token"
+        help
+            The token to use for room connection.
+
+    config LK_EXAMPLE_SANDBOX_ID
+        depends on LK_EXAMPLE_USE_SANDBOX
+        string "Sandbox ID"
+        help
+            The ID of the sandbox token server to use.
+    config LK_EXAMPLE_ROOM_NAME
+        depends on LK_EXAMPLE_USE_SANDBOX
+        string "Room name (optional)"
+        help
+            Specific room name sandbox tokens will be generated with.
+    config LK_EXAMPLE_PARTICIPANT_NAME
+        depends on LK_EXAMPLE_USE_SANDBOX
+        string "Participant name (optional)"
+        help
+            Specific participant name sandbox tokens will be generated with.
+
+    config LK_EXAMPLE_SPEAKER_VOLUME
+        int "Default speaker volume (0-100%)"
+        default 85
+        range 0 100
+        help
+            Default playback volume for speaker output.
+endmenu
diff --git a/components/livekit/examples/minimal_video/main/board.c b/components/livekit/examples/minimal_video/main/board.c
@@ -0,0 +1,19 @@
+#include "esp_log.h"
+#include "board.h"
+#include "codec_init.h"
+#include "codec_board.h"
+#include <math.h>
+
+static const char *TAG = "board";
+
+void board_init()
+{
+    ESP_LOGI(TAG, "Initializing board");
+
+    set_codec_board_type(CONFIG_LK_EXAMPLE_CODEC_BOARD_TYPE);
+    // Notes when use playback and record at same time, must set reuse_dev = false
+    codec_init_cfg_t cfg = {
+        .reuse_dev = false
+    };
+    init_codec(&cfg);
+}
diff --git a/components/livekit/examples/minimal_video/main/board.h b/components/livekit/examples/minimal_video/main/board.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// Initialize board.
+void board_init(void);
+
+#ifdef __cplusplus
+}
+#endif
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		idf_component_register(SRCS "main.c" "example.c" "board.c" "media.c"
		INCLUDE_DIRS ".")