From 02c97d33642078206150d88c01f6e287c66c606f Mon Sep 17 00:00:00 2001
From: Jacob Gelman <3182119+ladvoc@users.noreply.github.com>
Date: Fri, 10 Oct 2025 14:55:59 +1100
Subject: [PATCH] Video publishing support

---
 README.md                                     |   3 +-
 components/livekit/README.md                  |   2 +-
 components/livekit/core/engine.c              | 124 +++++------
 .../livekit/examples/minimal_video/.gitignore |   7 +
 .../examples/minimal_video/CMakeLists.txt     |   6 +
 .../livekit/examples/minimal_video/README.md  |  61 ++++++
 .../minimal_video/main/CMakeLists.txt         |   2 +
 .../minimal_video/main/Kconfig.projbuild      |  60 ++++++
 .../examples/minimal_video/main/board.c       |  19 ++
 .../examples/minimal_video/main/board.h       |  12 ++
 .../examples/minimal_video/main/example.c     |  96 +++++++++
 .../examples/minimal_video/main/example.h     |  13 ++
 .../minimal_video/main/idf_component.yml      |  21 ++
 .../examples/minimal_video/main/main.c        |  24 +++
 .../examples/minimal_video/main/media.c       | 202 ++++++++++++++++++
 .../examples/minimal_video/main/media.h       |  39 ++++
 .../examples/minimal_video/partitions.csv     |   5 +
 .../examples/minimal_video/sdkconfig.defaults |  39 ++++
 .../livekit/protocol/livekit_models.pb.h      |  11 +-
 components/livekit/protocol/livekit_rtc.pb.h  |  21 +-
 .../protocol/protobufs/livekit_models.options |   2 +-
 .../protocol/protobufs/livekit_rtc.options    |   7 +-
 22 files changed, 697 insertions(+), 79 deletions(-)
 create mode 100644 components/livekit/examples/minimal_video/.gitignore
 create mode 100755 components/livekit/examples/minimal_video/CMakeLists.txt
 create mode 100644 components/livekit/examples/minimal_video/README.md
 create mode 100755 components/livekit/examples/minimal_video/main/CMakeLists.txt
 create mode 100644 components/livekit/examples/minimal_video/main/Kconfig.projbuild
 create mode 100755 components/livekit/examples/minimal_video/main/board.c
 create mode 100644 components/livekit/examples/minimal_video/main/board.h
 create mode 100644 components/livekit/examples/minimal_video/main/example.c
 create mode 100644 components/livekit/examples/minimal_video/main/example.h
 create mode 100644 components/livekit/examples/minimal_video/main/idf_component.yml
 create mode 100644 components/livekit/examples/minimal_video/main/main.c
 create mode 100644 components/livekit/examples/minimal_video/main/media.c
 create mode 100644 components/livekit/examples/minimal_video/main/media.h
 create mode 100755 components/livekit/examples/minimal_video/partitions.csv
 create mode 100644 components/livekit/examples/minimal_video/sdkconfig.defaults

diff --git a/README.md b/README.md
index a09a15b..d96a470 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@ Use this SDK to add realtime video, audio and data features to your ESP32 projec
 
 - **Supported chips**: ESP32-S3 and ESP32-P4
 - **Bidirectional audio**: Opus encoding, acoustic echo cancellation (AEC)
-- **Bidirectional video**: *video support coming soon*
+- **Video publishing**: H.264 encoding, subscribing coming soon
 - **AI Agents**: interact with agents in the cloud built with [LiveKit Agents](https://docs.livekit.io/agents/)
 - **Real-time data**: data packets, remote method calls (RPC)
 
@@ -38,6 +38,7 @@ One of the best ways to get started with LiveKit is by reviewing the [examples](
 
 - [**Voice AI Agent**](./components/livekit/examples/voice_agent/README.md): Conversational AI voice agent that interacts with hardware based on user requests.
 - [**Minimal**](./components/livekit/examples/minimal/README.md): Basic example of connecting to a LiveKit room with bidirectional audio.
+- [**Minimal Video**](./components/livekit/examples/minimal_video/README.md): Equivalent to the minimal example with video publishing.
 
 Once you have chosen an example to be your starting point, create a fresh project from it locally using the following command:
 
diff --git a/components/livekit/README.md b/components/livekit/README.md
index 32fd035..433ae9d 100644
--- a/components/livekit/README.md
+++ b/components/livekit/README.md
@@ -10,6 +10,6 @@ Use this SDK to add realtime video, audio and data features to your ESP32 projec
 
 - **Supported chips**: ESP32-S3 and ESP32-P4
 - **Bidirectional audio**: Opus encoding, acoustic echo cancellation (AEC)
-- **Bidirectional video**: *video support coming soon*
+- **Video publishing**: H.264 encoding, subscribing coming soon
 - **AI Agents**: interact with agents in the cloud built with [LiveKit Agents](https://docs.livekit.io/agents/)
 - **Real-time data**: data packets, remote method calls (RPC)
diff --git a/components/livekit/core/engine.c b/components/livekit/core/engine.c
index e226467..e0288ac 100644
--- a/components/livekit/core/engine.c
+++ b/components/livekit/core/engine.c
@@ -329,9 +329,11 @@ static engine_err_t send_add_video_track(engine_t *eng)
         .type = LIVEKIT_PB_TRACK_TYPE_VIDEO,
         .source = LIVEKIT_PB_TRACK_SOURCE_CAMERA,
         .muted = false,
+        .width = video_layer.width,
+        .height = video_layer.height,
         .layers_count = 1,
         .layers = { video_layer },
-        .audio_features_count = 0
+        .backup_codec_policy = LIVEKIT_PB_BACKUP_CODEC_POLICY_REGRESSION
     };
 
     if (signal_send_add_track(eng->signal_handle, &req) != SIGNAL_ERR_NONE) {
@@ -341,36 +343,21 @@ static engine_err_t send_add_video_track(engine_t *eng)
     return ENGINE_ERR_NONE;
 }
 
-/// Begins media streaming and sends add track requests.
-static engine_err_t publish_tracks(engine_t *eng)
+/// Send add track requests based on the media options.
+///
+/// Note: SFU expects add track request before publisher peer offer is sent.
+///
+static engine_err_t send_add_track_requests(engine_t *eng)
 {
-    if (eng->options.media.audio_info.codec == ESP_PEER_AUDIO_CODEC_NONE &&
-        eng->options.media.video_info.codec == ESP_PEER_VIDEO_CODEC_NONE) {
-        ESP_LOGI(TAG, "No media tracks to publish");
-        return ENGINE_ERR_NONE;
+    if (eng->options.media.audio_info.codec != ESP_PEER_AUDIO_CODEC_NONE &&
+        send_add_audio_track(eng) != ENGINE_ERR_NONE) {
+        return ENGINE_ERR_SIGNALING;
     }
-
-    int ret = ENGINE_ERR_OTHER;
-    do {
-        if (media_stream_begin(eng) != ENGINE_ERR_NONE) {
-            ret = ENGINE_ERR_MEDIA;
-            break;
-        }
-        if (eng->options.media.audio_info.codec != ESP_PEER_AUDIO_CODEC_NONE &&
-            send_add_audio_track(eng) != ENGINE_ERR_NONE) {
-            ret = ENGINE_ERR_SIGNALING;
-            break;
-        }
-        if (eng->options.media.video_info.codec != ESP_PEER_VIDEO_CODEC_NONE &&
-            send_add_video_track(eng) != ENGINE_ERR_NONE) {
-            ret = ENGINE_ERR_SIGNALING;
-            break;
-        }
-        return ENGINE_ERR_NONE;
-    } while (0);
-
-    media_stream_end(eng);
-    return ret;
+    if (eng->options.media.video_info.codec != ESP_PEER_VIDEO_CODEC_NONE &&
+        send_add_video_track(eng) != ENGINE_ERR_NONE) {
+        return ENGINE_ERR_SIGNALING;
+    }
+    return ENGINE_ERR_NONE;
 }
 
 // MARK: - Signal event handlers
@@ -420,6 +407,7 @@ static void on_peer_sdp(const char *sdp, peer_role_t role, void *ctx)
     event_enqueue(eng, &ev, false);
 }
 
+
 static bool on_peer_data_packet(livekit_pb_data_packet_t* packet, void *ctx)
 {
     engine_t *eng = (engine_t *)ctx;
@@ -833,7 +821,9 @@ static bool handle_state_connecting(engine_t *eng, const engine_event_t *ev)
             break;
         case EV_SIG_STATE:
             signal_state_t sig_state = ev->detail.sig_state;
-            if (sig_state == SIGNAL_STATE_DISCONNECTED) {
+            if (sig_state == SIGNAL_STATE_CONNECTED) {
+                send_add_track_requests(eng);
+            } else if(sig_state == SIGNAL_STATE_DISCONNECTED) {
                 eng->failure_reason = LIVEKIT_FAILURE_REASON_OTHER;
                 eng->state = ENGINE_STATE_BACKOFF;
             } else if (sig_state & SIGNAL_STATE_FAILED_ANY) {
@@ -886,7 +876,7 @@ static bool handle_state_connected(engine_t *eng, const engine_event_t *ev)
         case _EV_STATE_ENTER:
             eng->retry_count = 0;
             eng->failure_reason = LIVEKIT_FAILURE_REASON_NONE;
-            publish_tracks(eng);
+            media_stream_begin(eng);
             break;
         case EV_CMD_CLOSE:
             signal_send_leave(eng->signal_handle);
@@ -1066,6 +1056,45 @@ static void engine_task(void *arg)
     vTaskDelete(NULL);
 }
 
+static engine_err_t enable_capture_sink(engine_t *eng)
+{
+    esp_capture_sink_cfg_t sink_cfg = {
+        .audio_info = {
+            .format_id = capture_audio_codec_type(eng->options.media.audio_info.codec),
+            .sample_rate = eng->options.media.audio_info.sample_rate,
+            .channel = eng->options.media.audio_info.channel,
+            .bits_per_sample = 16,
+        },
+        .video_info = {
+            .format_id = ESP_CAPTURE_FMT_ID_H264,
+            .width = (uint16_t)eng->options.media.video_info.width,
+            .height = (uint16_t)eng->options.media.video_info.height,
+            .fps = (uint8_t)eng->options.media.video_info.fps,
+        },
+    };
+
+    if (esp_capture_sink_setup(
+        eng->options.media.capturer,
+        0, // Path index
+        &sink_cfg,
+        &eng->capturer_path
+    ) != ESP_CAPTURE_ERR_OK) {
+        ESP_LOGE(TAG, "Capture sink setup failed");
+        return ENGINE_ERR_MEDIA;
+    }
+
+    // TODO: Add muxer
+
+    if (esp_capture_sink_enable(
+        eng->capturer_path,
+        ESP_CAPTURE_RUN_MODE_ALWAYS
+    ) != ESP_CAPTURE_ERR_OK) {
+        ESP_LOGE(TAG, "Capture sink enable failed");
+        return ENGINE_ERR_MEDIA;
+    }
+    return ENGINE_ERR_NONE;
+}
+
 // MARK: - Public API
 
 engine_handle_t engine_init(const engine_options_t *options)
@@ -1117,38 +1146,9 @@ engine_handle_t engine_init(const engine_options_t *options)
     if (eng->signal_handle == NULL) {
         goto _init_failed;
     }
+    eng->renderer_handle = options->media.renderer;
 
-    esp_capture_sink_cfg_t sink_cfg = {
-        .audio_info = {
-            .format_id = capture_audio_codec_type(eng->options.media.audio_info.codec),
-            .sample_rate = eng->options.media.audio_info.sample_rate,
-            .channel = eng->options.media.audio_info.channel,
-            .bits_per_sample = 16,
-        },
-        .video_info = {
-            .format_id = capture_video_codec_type(eng->options.media.video_info.codec),
-            .width = (uint16_t)eng->options.media.video_info.width,
-            .height = (uint16_t)eng->options.media.video_info.height,
-            .fps = (uint8_t)eng->options.media.video_info.fps,
-        },
-    };
-    if (options->media.audio_info.codec != ESP_PEER_AUDIO_CODEC_NONE) {
-        // TODO: Can we ensure the renderer is valid? If not, return error.
-        eng->renderer_handle = options->media.renderer;
-    }
-
-    if (esp_capture_sink_setup(
-        eng->options.media.capturer,
-        0, // Path index
-        &sink_cfg,
-        &eng->capturer_path
-    ) != ESP_CAPTURE_ERR_OK) {
-        goto _init_failed;
-    }
-    if (esp_capture_sink_enable(
-        eng->capturer_path,
-        ESP_CAPTURE_RUN_MODE_ALWAYS
-    ) != ESP_CAPTURE_ERR_OK) {
+    if (enable_capture_sink(eng) != ENGINE_ERR_NONE) {
         goto _init_failed;
     }
     return eng;
diff --git a/components/livekit/examples/minimal_video/.gitignore b/components/livekit/examples/minimal_video/.gitignore
new file mode 100644
index 0000000..8100efc
--- /dev/null
+++ b/components/livekit/examples/minimal_video/.gitignore
@@ -0,0 +1,7 @@
+# IDF
+**/sdkconfig
+**/sdkconfig.old
+**/build
+**/managed_components
+**/dependencies.lock
+**/dist
\ No newline at end of file
diff --git a/components/livekit/examples/minimal_video/CMakeLists.txt b/components/livekit/examples/minimal_video/CMakeLists.txt
new file mode 100755
index 0000000..37d0d61
--- /dev/null
+++ b/components/livekit/examples/minimal_video/CMakeLists.txt
@@ -0,0 +1,6 @@
+# The following lines of boilerplate have to be in your project's CMakeLists
+# in this exact order for cmake to work correctly
+cmake_minimum_required(VERSION 3.5)
+set(COMPONENTS main) # Trim build
+include($ENV{IDF_PATH}/tools/cmake/project.cmake)
+project(minimal_video)
\ No newline at end of file
diff --git a/components/livekit/examples/minimal_video/README.md b/components/livekit/examples/minimal_video/README.md
new file mode 100644
index 0000000..5f450d9
--- /dev/null
+++ b/components/livekit/examples/minimal_video/README.md
@@ -0,0 +1,61 @@
+# Minimal Video
+
+Basic example of connecting to a LiveKit room with bidirectional audio and video publishing.
+
+## Configuration
+
+> [!TIP]
+> Options can either be set through *menuconfig* or added to *sdkconfig* as shown below.
+
+### Credentials
+
+**Option A**: Use a LiveKit Sandbox to get up and running quickly. Setup a LiveKit Sandbox from your [Cloud Project](https://cloud.livekit.io/projects/p_/sandbox), and use its ID in your configuration:
+
+```ini
+CONFIG_LK_EXAMPLE_USE_SANDBOX=y
+CONFIG_LK_EXAMPLE_SANDBOX_ID="my-project-xxxxxx"
+```
+
+**Option B**: Specify a server URL and pregenerated token:
+
+```ini
+CONFIG_LK_EXAMPLE_USE_PREGENERATED=y
+CONFIG_LK_EXAMPLE_TOKEN="your-jwt-token"
+CONFIG_LK_EXAMPLE_SERVER_URL="ws://localhost:7880"
+```
+
+### Network
+
+Connect using WiFi as follows:
+
+```ini
+CONFIG_LK_EXAMPLE_USE_WIFI=y
+CONFIG_LK_EXAMPLE_WIFI_SSID="<your SSID>"
+CONFIG_LK_EXAMPLE_WIFI_PASSWORD="<your password>"
+```
+
+### Development Board
+
+This example uses the Espressif [*codec_board*](https://components.espressif.com/components/tempotian/codec_board/) component to access board-specific peripherals for media capture and rendering. Supported boards are [defined here](https://github.com/espressif/esp-webrtc-solution/blob/65d13427dd83c37264b6cff966d60af0f84f649c/components/codec_board/board_cfg.txt). Locate the name of your board, and set it as follows:
+
+```ini
+CONFIG_LK_EXAMPLE_CODEC_BOARD_TYPE="ESP32_P4_DEV_V14"
+```
+
+## Build & Flash
+
+Navigate to this directory in your terminal. Run the following command to build your application, flash it to your board, and monitor serial output:
+
+```sh
+idf.py flash monitor
+```
+
+Once running, the example will establish a network connection, connect to a LiveKit room, and print the following message:
+
+```txt
+I (19508) livekit_example: Room state: Connected
+```
+
+## Next Steps
+
+With a room connection established, you can connect another client (another ESP32, [LiveKit Meet](https://meet.livekit.io), etc.) or dispatch an [agent](https://docs.livekit.io/agents/) to talk with.
diff --git a/components/livekit/examples/minimal_video/main/CMakeLists.txt b/components/livekit/examples/minimal_video/main/CMakeLists.txt
new file mode 100755
index 0000000..6e75b98
--- /dev/null
+++ b/components/livekit/examples/minimal_video/main/CMakeLists.txt
@@ -0,0 +1,2 @@
+idf_component_register(SRCS "main.c" "example.c" "board.c" "media.c"
+                       INCLUDE_DIRS ".")
\ No newline at end of file
diff --git a/components/livekit/examples/minimal_video/main/Kconfig.projbuild b/components/livekit/examples/minimal_video/main/Kconfig.projbuild
new file mode 100644
index 0000000..d4a1ba4
--- /dev/null
+++ b/components/livekit/examples/minimal_video/main/Kconfig.projbuild
@@ -0,0 +1,60 @@
+menu "LiveKit Example"
+
+    config LK_EXAMPLE_CODEC_BOARD_TYPE
+        string "Codec board type"
+        default "DUMMY_CODEC_BOARD"
+        help
+            The model of dev board you are using. See board_cfg.txt from the codec
+            board component for a list of supported boards.
+
+    choice LK_EXAMPLE_CONNECTION_METHOD
+        prompt "Choose room connection method"
+        help
+            Choose how to connect to the room in the example.
+
+        config LK_EXAMPLE_USE_SANDBOX
+            bool "Sandbox token"
+            help
+                Use a sandbox token server for room authentication.
+
+        config LK_EXAMPLE_USE_PREGENERATED
+            bool "Pre-generated token"
+            help
+                Use a pre-generated token and server URL for room connection.
+    endchoice
+
+    config LK_EXAMPLE_SERVER_URL
+        depends on LK_EXAMPLE_USE_PREGENERATED
+        string "Server URL"
+        default "ws://localhost:7880"
+        help
+            The server URL to use for room connection.
+    config LK_EXAMPLE_TOKEN
+        depends on LK_EXAMPLE_USE_PREGENERATED
+        string "Token"
+        help
+            The token to use for room connection.
+
+    config LK_EXAMPLE_SANDBOX_ID
+        depends on LK_EXAMPLE_USE_SANDBOX
+        string "Sandbox ID"
+        help
+            The ID of the sandbox token server to use.
+    config LK_EXAMPLE_ROOM_NAME
+        depends on LK_EXAMPLE_USE_SANDBOX
+        string "Room name (optional)"
+        help
+            Specific room name sandbox tokens will be generated with.
+    config LK_EXAMPLE_PARTICIPANT_NAME
+        depends on LK_EXAMPLE_USE_SANDBOX
+        string "Participant name (optional)"
+        help
+            Specific participant name sandbox tokens will be generated with.
+
+    config LK_EXAMPLE_SPEAKER_VOLUME
+        int "Default speaker volume (0-100%)"
+        default 85
+        range 0 100
+        help
+            Default playback volume for speaker output.
+endmenu
diff --git a/components/livekit/examples/minimal_video/main/board.c b/components/livekit/examples/minimal_video/main/board.c
new file mode 100755
index 0000000..5b19089
--- /dev/null
+++ b/components/livekit/examples/minimal_video/main/board.c
@@ -0,0 +1,19 @@
+#include "esp_log.h"
+#include "board.h"
+#include "codec_init.h"
+#include "codec_board.h"
+#include <math.h>
+
+static const char *TAG = "board";
+
+void board_init()
+{
+    ESP_LOGI(TAG, "Initializing board");
+
+    set_codec_board_type(CONFIG_LK_EXAMPLE_CODEC_BOARD_TYPE);
+    // Notes when use playback and record at same time, must set reuse_dev = false
+    codec_init_cfg_t cfg = {
+        .reuse_dev = false
+    };
+    init_codec(&cfg);
+}
\ No newline at end of file
diff --git a/components/livekit/examples/minimal_video/main/board.h b/components/livekit/examples/minimal_video/main/board.h
new file mode 100644
index 0000000..2830b42
--- /dev/null
+++ b/components/livekit/examples/minimal_video/main/board.h
@@ -0,0 +1,12 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// Initialize board.
+void board_init(void);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/components/livekit/examples/minimal_video/main/example.c b/components/livekit/examples/minimal_video/main/example.c
new file mode 100644
index 0000000..b0fec92
--- /dev/null
+++ b/components/livekit/examples/minimal_video/main/example.c
@@ -0,0 +1,96 @@
+#include "esp_log.h"
+#include "livekit.h"
+#include "livekit_sandbox.h"
+#include "media.h"
+#include "board.h"
+#include "example.h"
+
+static const char *TAG = "livekit_example";
+
+static livekit_room_handle_t room_handle;
+
+/// Invoked when the room's connection state changes.
+static void on_state_changed(livekit_connection_state_t state, void* ctx)
+{
+    ESP_LOGI(TAG, "Room state changed: %s", livekit_connection_state_str(state));
+
+    livekit_failure_reason_t reason = livekit_room_get_failure_reason(room_handle);
+    if (reason != LIVEKIT_FAILURE_REASON_NONE) {
+        ESP_LOGE(TAG, "Failure reason: %s", livekit_failure_reason_str(reason));
+    }
+}
+
+void join_room()
+{
+    if (room_handle != NULL) {
+        ESP_LOGE(TAG, "Room already created");
+        return;
+    }
+
+    livekit_room_options_t room_options = {
+        .publish = {
+            .kind = LIVEKIT_MEDIA_TYPE_BOTH,
+            .audio_encode = {
+                .codec = LIVEKIT_AUDIO_CODEC_OPUS,
+                .sample_rate = 16000,
+                .channel_count = 1
+            },
+            .video_encode = {
+                .codec = LIVEKIT_VIDEO_CODEC_H264,
+                .width = 1920,
+                .height = 1080,
+                .fps = 25
+            },
+            .capturer = media_get_capturer()
+        },
+        .subscribe = {
+            .kind = LIVEKIT_MEDIA_TYPE_AUDIO,
+            .renderer = media_get_renderer()
+        },
+        .on_state_changed = on_state_changed
+    };
+    if (livekit_room_create(&room_handle, &room_options) != LIVEKIT_ERR_NONE) {
+        ESP_LOGE(TAG, "Failed to create room");
+        return;
+    }
+
+    livekit_err_t connect_res;
+#ifdef CONFIG_LK_EXAMPLE_USE_SANDBOX
+    // Option A: Sandbox token server.
+    livekit_sandbox_res_t res = {};
+    livekit_sandbox_options_t gen_options = {
+        .sandbox_id = CONFIG_LK_EXAMPLE_SANDBOX_ID,
+        .room_name = CONFIG_LK_EXAMPLE_ROOM_NAME,
+        .participant_name = CONFIG_LK_EXAMPLE_PARTICIPANT_NAME
+    };
+    if (!livekit_sandbox_generate(&gen_options, &res)) {
+        ESP_LOGE(TAG, "Failed to generate sandbox token");
+        return;
+    }
+    connect_res = livekit_room_connect(room_handle, res.server_url, res.token);
+    livekit_sandbox_res_free(&res);
+#else
+    // Option B: Pre-generated token.
+    connect_res = livekit_room_connect(room_handle, CONFIG_LK_EXAMPLE_SERVER_URL, CONFIG_LK_EXAMPLE_TOKEN);
+#endif
+
+    if (connect_res != LIVEKIT_ERR_NONE) {
+        ESP_LOGE(TAG, "Failed to connect to room");
+    }
+}
+
+void leave_room()
+{
+    if (room_handle == NULL) {
+        ESP_LOGE(TAG, "Room not created");
+        return;
+    }
+    if (livekit_room_close(room_handle) != LIVEKIT_ERR_NONE) {
+        ESP_LOGE(TAG, "Failed to leave room");
+    }
+    if (livekit_room_destroy(room_handle) != LIVEKIT_ERR_NONE) {
+        ESP_LOGE(TAG, "Failed to destroy room");
+        return;
+    }
+    room_handle = NULL;
+}
\ No newline at end of file
diff --git a/components/livekit/examples/minimal_video/main/example.h b/components/livekit/examples/minimal_video/main/example.h
new file mode 100644
index 0000000..f576f95
--- /dev/null
+++ b/components/livekit/examples/minimal_video/main/example.h
@@ -0,0 +1,13 @@
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void join_room();
+void leave_room();
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/components/livekit/examples/minimal_video/main/idf_component.yml b/components/livekit/examples/minimal_video/main/idf_component.yml
new file mode 100644
index 0000000..ca4e5da
--- /dev/null
+++ b/components/livekit/examples/minimal_video/main/idf_component.yml
@@ -0,0 +1,21 @@
+dependencies:
+  idf: ">=5.4"
+  livekit/livekit:
+    version: "0.3.1"
+    override_path: ../../../
+  livekit/sandbox_token:
+    version: ~0.1.0
+    override_path: ../../../../sandbox_token
+  livekit/example_utils:
+    version: ~0.1.0
+    override_path: ../../../../example_utils
+  tempotian/codec_board: ~1.0.0
+  # The following components are required to connect to WiFi for ESP32-P4-based
+  # development board that supports WiFi. The versions here are known to work with
+  # the ESP32-P4-Function-EV-Board out-of-the-box.
+  espressif/esp_hosted:
+    version: ~2.0.13
+    rules: [{if: "target in [esp32p4]"}]
+  espressif/esp_wifi_remote:
+    version: ~0.14.3
+    rules: [{if: "target in [esp32p4]"}]
\ No newline at end of file
diff --git a/components/livekit/examples/minimal_video/main/main.c b/components/livekit/examples/minimal_video/main/main.c
new file mode 100644
index 0000000..c45dda4
--- /dev/null
+++ b/components/livekit/examples/minimal_video/main/main.c
@@ -0,0 +1,24 @@
+#include "esp_log.h"
+#include "board.h"
+#include "esp_netif_sntp.h"
+#include "example.h"
+#include "livekit_example_utils.h"
+#include "media.h"
+
+#include "livekit.h"
+
+void app_main(void)
+{
+    esp_log_level_set("*", ESP_LOG_INFO);
+
+    livekit_system_init();
+    board_init();
+    media_init();
+    esp_sntp_config_t sntp_config = ESP_NETIF_SNTP_DEFAULT_CONFIG_MULTIPLE(2,
+        ESP_SNTP_SERVER_LIST("time.google.com", "pool.ntp.org"));
+    esp_netif_sntp_init(&sntp_config);
+
+    if (lk_example_network_connect()) {
+        join_room(); // See example.c
+    }
+}
\ No newline at end of file
diff --git a/components/livekit/examples/minimal_video/main/media.c b/components/livekit/examples/minimal_video/main/media.c
new file mode 100644
index 0000000..091691e
--- /dev/null
+++ b/components/livekit/examples/minimal_video/main/media.c
@@ -0,0 +1,202 @@
+#include "av_render_default.h"
+#include "esp_audio_dec_default.h"
+#include "esp_audio_enc_default.h"
+#include "esp_capture_defaults.h"
+#include "esp_capture_sink.h"
+#include "esp_check.h"
+#include "esp_log.h"
+#include "esp_video_dec_default.h"
+#include "esp_video_device.h"
+#include "esp_video_enc_default.h"
+#include "esp_video_init.h"
+#include "codec_board.h"
+#include "codec_init.h"
+
+#include "media.h"
+
+static const char *TAG = "media";
+
+#define NULL_CHECK(pointer, message) \
+    ESP_RETURN_ON_FALSE(pointer != NULL, -1, TAG, message)
+
+typedef struct {
+    esp_capture_sink_handle_t capturer_handle;
+    esp_capture_video_src_if_t *video_source;
+    esp_capture_audio_src_if_t *audio_source;
+} capture_system_t;
+
+typedef struct {
+    audio_render_handle_t audio_renderer;
+    video_render_handle_t video_renderer;
+    av_render_handle_t av_renderer_handle;
+} renderer_system_t;
+
+static capture_system_t  capturer_system;
+static renderer_system_t renderer_system;
+
+static esp_capture_video_src_if_t* create_camera_source(void)
+{
+    camera_cfg_t cam_pin_cfg = {};
+    int ret = get_camera_cfg(&cam_pin_cfg);
+    if (ret != 0) {
+        return NULL;
+    }
+#if CONFIG_IDF_TARGET_ESP32P4
+    esp_video_init_csi_config_t csi_config = { 0 };
+    esp_video_init_dvp_config_t dvp_config = { 0 };
+    esp_video_init_config_t cam_config = { 0 };
+    if (cam_pin_cfg.type == CAMERA_TYPE_MIPI) {
+        csi_config.sccb_config.i2c_handle = get_i2c_bus_handle(0);
+        csi_config.sccb_config.freq = 100000;
+        csi_config.reset_pin = cam_pin_cfg.reset;
+        csi_config.pwdn_pin = cam_pin_cfg.pwr;
+        ESP_LOGI(TAG, "Use i2c handle %p", csi_config.sccb_config.i2c_handle);
+        cam_config.csi = &csi_config;
+    } else if (cam_pin_cfg.type == CAMERA_TYPE_DVP) {
+        dvp_config.reset_pin = cam_pin_cfg.reset;
+        dvp_config.pwdn_pin = cam_pin_cfg.pwr;
+        dvp_config.dvp_pin.data_width = CAM_CTLR_DATA_WIDTH_8;
+        dvp_config.dvp_pin.data_io[0] = cam_pin_cfg.data[0];
+        dvp_config.dvp_pin.data_io[1] = cam_pin_cfg.data[1];
+        dvp_config.dvp_pin.data_io[2] = cam_pin_cfg.data[2];
+        dvp_config.dvp_pin.data_io[3] = cam_pin_cfg.data[3];
+        dvp_config.dvp_pin.data_io[4] = cam_pin_cfg.data[4];
+        dvp_config.dvp_pin.data_io[5] = cam_pin_cfg.data[5];
+        dvp_config.dvp_pin.data_io[6] = cam_pin_cfg.data[6];
+        dvp_config.dvp_pin.data_io[7] = cam_pin_cfg.data[7];
+        dvp_config.dvp_pin.vsync_io = cam_pin_cfg.vsync;
+        dvp_config.dvp_pin.pclk_io = cam_pin_cfg.pclk;
+        dvp_config.dvp_pin.xclk_io = cam_pin_cfg.xclk;
+        dvp_config.dvp_pin.de_io = cam_pin_cfg.de;
+        dvp_config.xclk_freq = 20000000;
+        cam_config.dvp = &dvp_config;
+    }
+    ret = esp_video_init(&cam_config);
+    if (ret != ESP_OK) {
+        ESP_LOGE(TAG, "Camera init failed with error 0x%x", ret);
+        return NULL;
+    }
+    esp_capture_video_v4l2_src_cfg_t v4l2_cfg = {
+        .dev_name = "/dev/video0",
+        .buf_count = 2,
+    };
+    return esp_capture_new_video_v4l2_src(&v4l2_cfg);
+#else
+    if (cam_pin_cfg.type == CAMERA_TYPE_DVP) {
+        esp_capture_video_dvp_src_cfg_t dvp_config = { 0 };
+        dvp_config.buf_count = 2;
+        dvp_config.reset_pin = cam_pin_cfg.reset;
+        dvp_config.pwr_pin = cam_pin_cfg.pwr;
+        dvp_config.data[0] = cam_pin_cfg.data[0];
+        dvp_config.data[1] = cam_pin_cfg.data[1];
+        dvp_config.data[2] = cam_pin_cfg.data[2];
+        dvp_config.data[3] = cam_pin_cfg.data[3];
+        dvp_config.data[4] = cam_pin_cfg.data[4];
+        dvp_config.data[5] = cam_pin_cfg.data[5];
+        dvp_config.data[6] = cam_pin_cfg.data[6];
+        dvp_config.data[7] = cam_pin_cfg.data[7];
+        dvp_config.vsync_pin = cam_pin_cfg.vsync;
+        dvp_config.href_pin = cam_pin_cfg.href;
+        dvp_config.pclk_pin = cam_pin_cfg.pclk;
+        dvp_config.xclk_pin = cam_pin_cfg.xclk;
+        dvp_config.xclk_freq = 20000000;
+        return esp_capture_new_video_dvp_src(&dvp_config);
+    }
+#endif
+}
+
+static int build_capturer_system(void)
+{
+    capturer_system.video_source = create_camera_source();
+    NULL_CHECK(capturer_system.video_source, "Failed to create camera source");
+
+    esp_codec_dev_handle_t record_handle = get_record_handle();
+    NULL_CHECK(record_handle, "Failed to get record handle");
+
+    // For supported boards, prefer using an acoustic echo cancellation (AEC) source
+    // for applications requiring hands-free voice communication:
+    //
+    // esp_capture_audio_aec_src_cfg_t codec_cfg = {
+    //     .record_handle = record_handle,
+    //     .channel = 4,
+    //     .channel_mask = 1 | 2
+    // };
+    // capturer_system.audio_source = esp_capture_new_audio_aec_src(&codec_cfg);
+
+    esp_capture_audio_dev_src_cfg_t codec_cfg = {
+        .record_handle = record_handle,
+    };
+    capturer_system.audio_source = esp_capture_new_audio_dev_src(&codec_cfg);
+
+    NULL_CHECK(capturer_system.audio_source, "Failed to create audio source");
+
+    esp_capture_cfg_t cfg = {
+        .sync_mode = ESP_CAPTURE_SYNC_MODE_AUDIO,
+        .audio_src = capturer_system.audio_source,
+        .video_src = capturer_system.video_source
+    };
+    esp_capture_open(&cfg, &capturer_system.capturer_handle);
+    NULL_CHECK(capturer_system.capturer_handle, "Failed to open capture system");
+    return 0;
+}
+
+static int build_renderer_system(void)
+{
+    esp_codec_dev_handle_t render_device = get_playback_handle();
+    NULL_CHECK(render_device, "Failed to get render device handle");
+
+    i2s_render_cfg_t i2s_cfg = {
+        .play_handle = render_device,
+        .fixed_clock = true
+    };
+    renderer_system.audio_renderer = av_render_alloc_i2s_render(&i2s_cfg);
+    NULL_CHECK(renderer_system.audio_renderer, "Failed to create I2S renderer");
+
+    // Set initial speaker volume
+    esp_codec_dev_set_out_vol(i2s_cfg.play_handle, CONFIG_LK_EXAMPLE_SPEAKER_VOLUME);
+
+    av_render_cfg_t render_cfg = {
+        .audio_render = renderer_system.audio_renderer,
+        .audio_raw_fifo_size = 8 * 4096,
+        .audio_render_fifo_size = 100 * 1024,
+        .video_raw_fifo_size = 500 * 1024,
+        .allow_drop_data = false,
+    };
+    renderer_system.av_renderer_handle = av_render_open(&render_cfg);
+    NULL_CHECK(renderer_system.av_renderer_handle, "Failed to create AV renderer");
+
+    av_render_audio_frame_info_t frame_info = {
+        .sample_rate = 16000,
+        .channel = 2,
+        .bits_per_sample = 16,
+    };
+    av_render_set_fixed_frame_info(renderer_system.av_renderer_handle, &frame_info);
+
+    return 0;
+}
+
+int media_init(void)
+{
+    // Register default audio encoder and decoder
+    esp_audio_enc_register_default();
+    esp_audio_dec_register_default();
+
+    // Register default video encoder and decoder
+    esp_video_enc_register_default();
+    esp_video_dec_register_default();
+
+    // Build capturer and renderer systems
+    build_capturer_system();
+    build_renderer_system();
+    return 0;
+}
+
+esp_capture_handle_t media_get_capturer(void)
+{
+    return capturer_system.capturer_handle;
+}
+
+av_render_handle_t media_get_renderer(void)
+{
+    return renderer_system.av_renderer_handle;
+}
\ No newline at end of file
diff --git a/components/livekit/examples/minimal_video/main/media.h b/components/livekit/examples/minimal_video/main/media.h
new file mode 100644
index 0000000..3b07788
--- /dev/null
+++ b/components/livekit/examples/minimal_video/main/media.h
@@ -0,0 +1,39 @@
+
+#pragma once
+
+#include "esp_capture.h"
+#include "av_render.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/// Initializes the capturer and renderer systems.
+int media_init(void);
+
+/// Returns the capturer handle.
+///
+/// This handle is provided to a LiveKit room when initialized to enable
+/// publishing tracks from captured media (i.e. audio from a microphone and/or
+/// video from a camera).
+///
+/// How the capturer is configured is determined by the requirements of
+/// your application and the hardware you are using.
+///
+esp_capture_handle_t media_get_capturer(void);
+
+/// Returns the renderer handle.
+///
+/// This handle is provided to a LiveKit room when initialized to enable
+/// rendering media from subscribed tracks (i.e. playing audio through a
+/// speaker and/or displaying video to a screen).
+///
+/// How the renderer is configured is determined by the requirements of
+/// your application and the hardware you are using.
+///
+av_render_handle_t media_get_renderer(void);
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/components/livekit/examples/minimal_video/partitions.csv b/components/livekit/examples/minimal_video/partitions.csv
new file mode 100755
index 0000000..8fc1d57
--- /dev/null
+++ b/components/livekit/examples/minimal_video/partitions.csv
@@ -0,0 +1,5 @@
+# Name,   Type, SubType, Offset,  Size, Flags
+# Note: if you change the phy_init or app partition offset, make sure to change the offset in Kconfig.projbuild
+nvs,      data, nvs,     0x9000,  0x6000,
+phy_init, data, phy,     0xf000,  0x1000,
+factory,  app,  factory, 0x10000, 0x3C0000
\ No newline at end of file
diff --git a/components/livekit/examples/minimal_video/sdkconfig.defaults b/components/livekit/examples/minimal_video/sdkconfig.defaults
new file mode 100644
index 0000000..6bcd6fb
--- /dev/null
+++ b/components/livekit/examples/minimal_video/sdkconfig.defaults
@@ -0,0 +1,39 @@
+# This file was generated using idf.py save-defconfig. It can be edited manually.
+# Espressif IoT Development Framework (ESP-IDF) 5.4.1 Project Minimal Configuration
+#
+CONFIG_IDF_TARGET="esp32p4"
+CONFIG_LK_EXAMPLE_CODEC_BOARD_TYPE="ESP32_P4_DEV_V14"
+CONFIG_DEFAULT_PLAYBACK_VOL=85
+
+CONFIG_CACHE_L2_CACHE_256KB=y
+CONFIG_CACHE_L2_CACHE_LINE_128B=y
+CONFIG_CODEC_I2C_BACKWARD_COMPATIBLE=n
+CONFIG_COMPILER_OPTIMIZATION_PERF=y
+CONFIG_ESP_CONSOLE_USB_SERIAL_JTAG=y
+CONFIG_ESP_VIDEO_ENABLE_ISP_PIPELINE_CONTROLLER=y
+CONFIG_ESP_WS_CLIENT_ENABLE_DYNAMIC_BUFFER=y
+CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK=y
+CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
+CONFIG_ESPTOOLPY_FLASHSIZE_4MB=y
+CONFIG_FREERTOS_HZ=1000
+CONFIG_FREERTOS_TIMER_TASK_STACK_DEPTH=4096
+CONFIG_IDF_EXPERIMENTAL_FEATURES=y
+CONFIG_LWIP_MAX_UDP_PCBS=1024
+CONFIG_LWIP_SNTP_MAX_SERVERS=2
+CONFIG_LWIP_TCPIP_RECVMBOX_SIZE=64
+CONFIG_LWIP_UDP_RECVMBOX_SIZE=64
+CONFIG_MBEDTLS_EXTERNAL_MEM_ALLOC=y
+CONFIG_MBEDTLS_SSL_DTLS_SRTP=y
+CONFIG_MBEDTLS_SSL_PROTO_DTLS=y
+CONFIG_PARTITION_TABLE_CUSTOM=y
+CONFIG_SLAVE_IDF_TARGET_ESP32C6=y
+CONFIG_SPIRAM_FETCH_INSTRUCTIONS=y
+CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=256
+CONFIG_SPIRAM_MALLOC_RESERVE_INTERNAL=8192
+CONFIG_SPIRAM_SPEED_200M=y
+CONFIG_SPIRAM_TRY_ALLOCATE_WIFI_LWIP=y
+CONFIG_SPIRAM=y
+
+# For the camera included with the Espressif ESP32-P4-Function-EV-Board.
+CONFIG_CAMERA_SC2336=y
+CONFIG_CAMERA_SC2336_MIPI_RAW8_1024x600_30FPS=y
\ No newline at end of file
diff --git a/components/livekit/protocol/livekit_models.pb.h b/components/livekit/protocol/livekit_models.pb.h
index debd389..271fb82 100644
--- a/components/livekit/protocol/livekit_models.pb.h
+++ b/components/livekit/protocol/livekit_models.pb.h
@@ -283,6 +283,7 @@ typedef struct livekit_pb_video_layer {
     livekit_pb_video_quality_t quality;
     uint32_t width;
     uint32_t height;
+    uint32_t ssrc;
 } livekit_pb_video_layer_t;
 
 typedef struct livekit_pb_active_speaker_update {
@@ -767,7 +768,7 @@ extern "C" {
 #define LIVEKIT_PB_ENCRYPTION_INIT_DEFAULT       {0}
 #define LIVEKIT_PB_SIMULCAST_CODEC_INFO_INIT_DEFAULT {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 #define LIVEKIT_PB_TRACK_INFO_INIT_DEFAULT       {NULL, _LIVEKIT_PB_TRACK_TYPE_MIN, 0, NULL, 0, 0, {_LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN}}
-#define LIVEKIT_PB_VIDEO_LAYER_INIT_DEFAULT      {_LIVEKIT_PB_VIDEO_QUALITY_MIN, 0, 0}
+#define LIVEKIT_PB_VIDEO_LAYER_INIT_DEFAULT      {_LIVEKIT_PB_VIDEO_QUALITY_MIN, 0, 0, 0}
 #define LIVEKIT_PB_DATA_PACKET_INIT_DEFAULT      {0, {LIVEKIT_PB_USER_PACKET_INIT_DEFAULT}, NULL, 0, NULL, 0, ""}
 #define LIVEKIT_PB_ACTIVE_SPEAKER_UPDATE_INIT_DEFAULT {{{NULL}, NULL}}
 #define LIVEKIT_PB_SPEAKER_INFO_INIT_DEFAULT     {{{NULL}, NULL}, 0, 0}
@@ -811,7 +812,7 @@ extern "C" {
 #define LIVEKIT_PB_ENCRYPTION_INIT_ZERO          {0}
 #define LIVEKIT_PB_SIMULCAST_CODEC_INFO_INIT_ZERO {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}}
 #define LIVEKIT_PB_TRACK_INFO_INIT_ZERO          {NULL, _LIVEKIT_PB_TRACK_TYPE_MIN, 0, NULL, 0, 0, {_LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN}}
-#define LIVEKIT_PB_VIDEO_LAYER_INIT_ZERO         {_LIVEKIT_PB_VIDEO_QUALITY_MIN, 0, 0}
+#define LIVEKIT_PB_VIDEO_LAYER_INIT_ZERO         {_LIVEKIT_PB_VIDEO_QUALITY_MIN, 0, 0, 0}
 #define LIVEKIT_PB_DATA_PACKET_INIT_ZERO         {0, {LIVEKIT_PB_USER_PACKET_INIT_ZERO}, NULL, 0, NULL, 0, ""}
 #define LIVEKIT_PB_ACTIVE_SPEAKER_UPDATE_INIT_ZERO {{{NULL}, NULL}}
 #define LIVEKIT_PB_SPEAKER_INFO_INIT_ZERO        {{{NULL}, NULL}, 0, 0}
@@ -884,6 +885,7 @@ extern "C" {
 #define LIVEKIT_PB_VIDEO_LAYER_QUALITY_TAG       1
 #define LIVEKIT_PB_VIDEO_LAYER_WIDTH_TAG         2
 #define LIVEKIT_PB_VIDEO_LAYER_HEIGHT_TAG        3
+#define LIVEKIT_PB_VIDEO_LAYER_SSRC_TAG          5
 #define LIVEKIT_PB_ACTIVE_SPEAKER_UPDATE_SPEAKERS_TAG 1
 #define LIVEKIT_PB_SPEAKER_INFO_SID_TAG          1
 #define LIVEKIT_PB_SPEAKER_INFO_LEVEL_TAG        2
@@ -1141,7 +1143,8 @@ X(a, STATIC,   REPEATED, UENUM,    audio_features,   19)
 #define LIVEKIT_PB_VIDEO_LAYER_FIELDLIST(X, a) \
 X(a, STATIC,   SINGULAR, UENUM,    quality,           1) \
 X(a, STATIC,   SINGULAR, UINT32,   width,             2) \
-X(a, STATIC,   SINGULAR, UINT32,   height,            3)
+X(a, STATIC,   SINGULAR, UINT32,   height,            3) \
+X(a, STATIC,   SINGULAR, UINT32,   ssrc,              5)
 #define LIVEKIT_PB_VIDEO_LAYER_CALLBACK NULL
 #define LIVEKIT_PB_VIDEO_LAYER_DEFAULT NULL
 
@@ -1614,7 +1617,7 @@ extern const pb_msgdesc_t livekit_pb_webhook_config_t_msg;
 #define LIVEKIT_PB_SIP_DTMF_SIZE                 9
 #define LIVEKIT_PB_TIMED_VERSION_SIZE            22
 #define LIVEKIT_PB_VIDEO_CONFIGURATION_SIZE      2
-#define LIVEKIT_PB_VIDEO_LAYER_SIZE              14
+#define LIVEKIT_PB_VIDEO_LAYER_SIZE              20
 #define LIVEKIT_PB_VP8_MUNGER_STATE_SIZE         31
 
 /* Mapping from canonical names (mangle_names or overridden package name) */
diff --git a/components/livekit/protocol/livekit_rtc.pb.h b/components/livekit/protocol/livekit_rtc.pb.h
index ad418bc..a2fc087 100644
--- a/components/livekit/protocol/livekit_rtc.pb.h
+++ b/components/livekit/protocol/livekit_rtc.pb.h
@@ -49,16 +49,20 @@ typedef struct livekit_pb_simulcast_codec {
 
 typedef struct livekit_pb_add_track_request {
     /* client ID of track, to match it when RTC track is received */
-    char cid[16];
+    char cid[3];
     char name[16];
     livekit_pb_track_type_t type;
+    /* to be deprecated in favor of layers */
+    uint32_t width;
+    uint32_t height;
     /* true to add track and initialize to muted */
     bool muted;
     livekit_pb_track_source_t source;
     pb_size_t layers_count;
     livekit_pb_video_layer_t layers[1];
+    livekit_pb_backup_codec_policy_t backup_codec_policy;
     pb_size_t audio_features_count;
-    livekit_pb_audio_track_feature_t audio_features[8];
+    livekit_pb_audio_track_feature_t audio_features[1];
 } livekit_pb_add_track_request_t;
 
 typedef struct livekit_pb_trickle_request {
@@ -434,6 +438,7 @@ extern "C" {
 
 #define livekit_pb_add_track_request_t_type_ENUMTYPE livekit_pb_track_type_t
 #define livekit_pb_add_track_request_t_source_ENUMTYPE livekit_pb_track_source_t
+#define livekit_pb_add_track_request_t_backup_codec_policy_ENUMTYPE livekit_pb_backup_codec_policy_t
 #define livekit_pb_add_track_request_t_audio_features_ENUMTYPE livekit_pb_audio_track_feature_t
 
 #define livekit_pb_trickle_request_t_target_ENUMTYPE livekit_pb_signal_target_t
@@ -493,7 +498,7 @@ extern "C" {
 #define LIVEKIT_PB_SIGNAL_REQUEST_INIT_DEFAULT   {0, {LIVEKIT_PB_SESSION_DESCRIPTION_INIT_DEFAULT}}
 #define LIVEKIT_PB_SIGNAL_RESPONSE_INIT_DEFAULT  {0, {LIVEKIT_PB_JOIN_RESPONSE_INIT_DEFAULT}}
 #define LIVEKIT_PB_SIMULCAST_CODEC_INIT_DEFAULT  {{{NULL}, NULL}, {{NULL}, NULL}}
-#define LIVEKIT_PB_ADD_TRACK_REQUEST_INIT_DEFAULT {"", "", _LIVEKIT_PB_TRACK_TYPE_MIN, 0, _LIVEKIT_PB_TRACK_SOURCE_MIN, 0, {LIVEKIT_PB_VIDEO_LAYER_INIT_DEFAULT}, 0, {_LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN}}
+#define LIVEKIT_PB_ADD_TRACK_REQUEST_INIT_DEFAULT {"", "", _LIVEKIT_PB_TRACK_TYPE_MIN, 0, 0, 0, _LIVEKIT_PB_TRACK_SOURCE_MIN, 0, {LIVEKIT_PB_VIDEO_LAYER_INIT_DEFAULT}, _LIVEKIT_PB_BACKUP_CODEC_POLICY_MIN, 0, {_LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN}}
 #define LIVEKIT_PB_TRICKLE_REQUEST_INIT_DEFAULT  {NULL, _LIVEKIT_PB_SIGNAL_TARGET_MIN, 0}
 #define LIVEKIT_PB_MUTE_TRACK_REQUEST_INIT_DEFAULT {{{NULL}, NULL}, 0}
 #define LIVEKIT_PB_JOIN_RESPONSE_INIT_DEFAULT    {false, LIVEKIT_PB_ROOM_INIT_DEFAULT, LIVEKIT_PB_PARTICIPANT_INFO_INIT_DEFAULT, 0, NULL, 0, {LIVEKIT_PB_ICE_SERVER_INIT_DEFAULT, LIVEKIT_PB_ICE_SERVER_INIT_DEFAULT, LIVEKIT_PB_ICE_SERVER_INIT_DEFAULT, LIVEKIT_PB_ICE_SERVER_INIT_DEFAULT}, 0, false, LIVEKIT_PB_CLIENT_CONFIGURATION_INIT_DEFAULT, 0, 0}
@@ -537,7 +542,7 @@ extern "C" {
 #define LIVEKIT_PB_SIGNAL_REQUEST_INIT_ZERO      {0, {LIVEKIT_PB_SESSION_DESCRIPTION_INIT_ZERO}}
 #define LIVEKIT_PB_SIGNAL_RESPONSE_INIT_ZERO     {0, {LIVEKIT_PB_JOIN_RESPONSE_INIT_ZERO}}
 #define LIVEKIT_PB_SIMULCAST_CODEC_INIT_ZERO     {{{NULL}, NULL}, {{NULL}, NULL}}
-#define LIVEKIT_PB_ADD_TRACK_REQUEST_INIT_ZERO   {"", "", _LIVEKIT_PB_TRACK_TYPE_MIN, 0, _LIVEKIT_PB_TRACK_SOURCE_MIN, 0, {LIVEKIT_PB_VIDEO_LAYER_INIT_ZERO}, 0, {_LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN, _LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN}}
+#define LIVEKIT_PB_ADD_TRACK_REQUEST_INIT_ZERO   {"", "", _LIVEKIT_PB_TRACK_TYPE_MIN, 0, 0, 0, _LIVEKIT_PB_TRACK_SOURCE_MIN, 0, {LIVEKIT_PB_VIDEO_LAYER_INIT_ZERO}, _LIVEKIT_PB_BACKUP_CODEC_POLICY_MIN, 0, {_LIVEKIT_PB_AUDIO_TRACK_FEATURE_MIN}}
 #define LIVEKIT_PB_TRICKLE_REQUEST_INIT_ZERO     {NULL, _LIVEKIT_PB_SIGNAL_TARGET_MIN, 0}
 #define LIVEKIT_PB_MUTE_TRACK_REQUEST_INIT_ZERO  {{{NULL}, NULL}, 0}
 #define LIVEKIT_PB_JOIN_RESPONSE_INIT_ZERO       {false, LIVEKIT_PB_ROOM_INIT_ZERO, LIVEKIT_PB_PARTICIPANT_INFO_INIT_ZERO, 0, NULL, 0, {LIVEKIT_PB_ICE_SERVER_INIT_ZERO, LIVEKIT_PB_ICE_SERVER_INIT_ZERO, LIVEKIT_PB_ICE_SERVER_INIT_ZERO, LIVEKIT_PB_ICE_SERVER_INIT_ZERO}, 0, false, LIVEKIT_PB_CLIENT_CONFIGURATION_INIT_ZERO, 0, 0}
@@ -585,9 +590,12 @@ extern "C" {
 #define LIVEKIT_PB_ADD_TRACK_REQUEST_CID_TAG     1
 #define LIVEKIT_PB_ADD_TRACK_REQUEST_NAME_TAG    2
 #define LIVEKIT_PB_ADD_TRACK_REQUEST_TYPE_TAG    3
+#define LIVEKIT_PB_ADD_TRACK_REQUEST_WIDTH_TAG   4
+#define LIVEKIT_PB_ADD_TRACK_REQUEST_HEIGHT_TAG  5
 #define LIVEKIT_PB_ADD_TRACK_REQUEST_MUTED_TAG   6
 #define LIVEKIT_PB_ADD_TRACK_REQUEST_SOURCE_TAG  8
 #define LIVEKIT_PB_ADD_TRACK_REQUEST_LAYERS_TAG  9
+#define LIVEKIT_PB_ADD_TRACK_REQUEST_BACKUP_CODEC_POLICY_TAG 16
 #define LIVEKIT_PB_ADD_TRACK_REQUEST_AUDIO_FEATURES_TAG 17
 #define LIVEKIT_PB_TRICKLE_REQUEST_CANDIDATE_INIT_TAG 1
 #define LIVEKIT_PB_TRICKLE_REQUEST_TARGET_TAG    2
@@ -792,9 +800,12 @@ X(a, CALLBACK, SINGULAR, STRING,   cid,               2)
 X(a, STATIC,   SINGULAR, STRING,   cid,               1) \
 X(a, STATIC,   SINGULAR, STRING,   name,              2) \
 X(a, STATIC,   SINGULAR, UENUM,    type,              3) \
+X(a, STATIC,   SINGULAR, UINT32,   width,             4) \
+X(a, STATIC,   SINGULAR, UINT32,   height,            5) \
 X(a, STATIC,   SINGULAR, BOOL,     muted,             6) \
 X(a, STATIC,   SINGULAR, UENUM,    source,            8) \
 X(a, STATIC,   REPEATED, MESSAGE,  layers,            9) \
+X(a, STATIC,   SINGULAR, UENUM,    backup_codec_policy,  16) \
 X(a, STATIC,   REPEATED, UENUM,    audio_features,   17)
 #define LIVEKIT_PB_ADD_TRACK_REQUEST_CALLBACK NULL
 #define LIVEKIT_PB_ADD_TRACK_REQUEST_DEFAULT NULL
@@ -1227,7 +1238,7 @@ extern const pb_msgdesc_t livekit_pb_track_subscribed_t_msg;
 /* livekit_pb_SubscriptionResponse_size depends on runtime parameters */
 /* livekit_pb_RequestResponse_size depends on runtime parameters */
 #define LIVEKIT_LIVEKIT_RTC_PB_H_MAX_SIZE        LIVEKIT_PB_ADD_TRACK_REQUEST_SIZE
-#define LIVEKIT_PB_ADD_TRACK_REQUEST_SIZE        81
+#define LIVEKIT_PB_ADD_TRACK_REQUEST_SIZE        69
 #define LIVEKIT_PB_LEAVE_REQUEST_SIZE            4
 #define LIVEKIT_PB_PING_SIZE                     22
 #define LIVEKIT_PB_PONG_SIZE                     22
diff --git a/components/livekit/protocol/protobufs/livekit_models.options b/components/livekit/protocol/protobufs/livekit_models.options
index 3c64749..ef779b6 100644
--- a/components/livekit/protocol/protobufs/livekit_models.options
+++ b/components/livekit/protocol/protobufs/livekit_models.options
@@ -24,7 +24,7 @@ livekit_pb.ParticipantPermission.can_update_metadata type:FT_IGNORE
 livekit_pb.ParticipantPermission.can_subscribe_metrics type:FT_IGNORE
 
 livekit_pb.VideoLayer.bitrate type:FT_IGNORE
-livekit_pb.VideoLayer.ssrc type:FT_IGNORE
+# livekit_pb.VideoLayer.ssrc type:FT_IGNORE
 
 livekit_pb.DataPacket.metrics type:FT_IGNORE
 livekit_pb.DataPacket.transcription type:FT_IGNORE
diff --git a/components/livekit/protocol/protobufs/livekit_rtc.options b/components/livekit/protocol/protobufs/livekit_rtc.options
index 147ea9d..68048b3 100644
--- a/components/livekit/protocol/protobufs/livekit_rtc.options
+++ b/components/livekit/protocol/protobufs/livekit_rtc.options
@@ -21,10 +21,8 @@ livekit_pb.SessionDescription.sdp type:FT_POINTER
 
 livekit_pb.TrickleRequest.candidateInit type:FT_POINTER
 
-livekit_pb.AddTrackRequest.cid max_length:15
+livekit_pb.AddTrackRequest.cid max_length:2
 livekit_pb.AddTrackRequest.name max_length:15
-livekit_pb.AddTrackRequest.width type:FT_IGNORE
-livekit_pb.AddTrackRequest.height type:FT_IGNORE
 livekit_pb.AddTrackRequest.layers max_count:1
 livekit_pb.AddTrackRequest.simulcast_codecs type:FT_IGNORE
 livekit_pb.AddTrackRequest.sid type:FT_IGNORE
@@ -32,8 +30,7 @@ livekit_pb.AddTrackRequest.stereo type:FT_IGNORE
 livekit_pb.AddTrackRequest.disable_red type:FT_IGNORE
 livekit_pb.AddTrackRequest.encryption type:FT_IGNORE
 livekit_pb.AddTrackRequest.stream type:FT_IGNORE
-livekit_pb.AddTrackRequest.backup_codec_policy type:FT_IGNORE
-livekit_pb.AddTrackRequest.audio_features max_count:8
+livekit_pb.AddTrackRequest.audio_features max_count:1
 
 livekit_pb.TrackPublishedResponse.cid type:FT_IGNORE
 livekit_pb.TrackPublishedResponse.track type:FT_IGNORE