Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ Use this SDK to add realtime video, audio and data features to your ESP32 projec

- **Supported chips**: ESP32-S3 and ESP32-P4
- **Bidirectional audio**: Opus encoding, acoustic echo cancellation (AEC)
- **Bidirectional video**: *video support coming soon*
- **Video publishing**: H.264 encoding, subscribing coming soon
- **AI Agents**: interact with agents in the cloud built with [LiveKit Agents](https://docs.livekit.io/agents/)
- **Real-time data**: data packets, remote method calls (RPC)

Expand All @@ -38,6 +38,7 @@ One of the best ways to get started with LiveKit is by reviewing the [examples](

- [**Voice AI Agent**](./components/livekit/examples/voice_agent/README.md): Conversational AI voice agent that interacts with hardware based on user requests.
- [**Minimal**](./components/livekit/examples/minimal/README.md): Basic example of connecting to a LiveKit room with bidirectional audio.
- [**Minimal Video**](./components/livekit/examples/minimal_video/README.md): Equivalent to the minimal example with video publishing.

Once you have chosen an example to be your starting point, create a fresh project from it locally using the following command:

Expand Down
2 changes: 1 addition & 1 deletion components/livekit/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ Use this SDK to add realtime video, audio and data features to your ESP32 projec

- **Supported chips**: ESP32-S3 and ESP32-P4
- **Bidirectional audio**: Opus encoding, acoustic echo cancellation (AEC)
- **Bidirectional video**: *video support coming soon*
- **Video publishing**: H.264 encoding, subscribing coming soon
- **AI Agents**: interact with agents in the cloud built with [LiveKit Agents](https://docs.livekit.io/agents/)
- **Real-time data**: data packets, remote method calls (RPC)
124 changes: 62 additions & 62 deletions components/livekit/core/engine.c
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,11 @@ static engine_err_t send_add_video_track(engine_t *eng)
.type = LIVEKIT_PB_TRACK_TYPE_VIDEO,
.source = LIVEKIT_PB_TRACK_SOURCE_CAMERA,
.muted = false,
.width = video_layer.width,
.height = video_layer.height,
.layers_count = 1,
.layers = { video_layer },
.audio_features_count = 0
.backup_codec_policy = LIVEKIT_PB_BACKUP_CODEC_POLICY_REGRESSION
};

if (signal_send_add_track(eng->signal_handle, &req) != SIGNAL_ERR_NONE) {
Expand All @@ -341,36 +343,21 @@ static engine_err_t send_add_video_track(engine_t *eng)
return ENGINE_ERR_NONE;
}

/// Begins media streaming and sends add track requests.
static engine_err_t publish_tracks(engine_t *eng)
/// Send add track requests based on the media options.
///
/// Note: SFU expects add track request before publisher peer offer is sent.
///
static engine_err_t send_add_track_requests(engine_t *eng)
{
if (eng->options.media.audio_info.codec == ESP_PEER_AUDIO_CODEC_NONE &&
eng->options.media.video_info.codec == ESP_PEER_VIDEO_CODEC_NONE) {
ESP_LOGI(TAG, "No media tracks to publish");
return ENGINE_ERR_NONE;
if (eng->options.media.audio_info.codec != ESP_PEER_AUDIO_CODEC_NONE &&
send_add_audio_track(eng) != ENGINE_ERR_NONE) {
return ENGINE_ERR_SIGNALING;
}

int ret = ENGINE_ERR_OTHER;
do {
if (media_stream_begin(eng) != ENGINE_ERR_NONE) {
ret = ENGINE_ERR_MEDIA;
break;
}
if (eng->options.media.audio_info.codec != ESP_PEER_AUDIO_CODEC_NONE &&
send_add_audio_track(eng) != ENGINE_ERR_NONE) {
ret = ENGINE_ERR_SIGNALING;
break;
}
if (eng->options.media.video_info.codec != ESP_PEER_VIDEO_CODEC_NONE &&
send_add_video_track(eng) != ENGINE_ERR_NONE) {
ret = ENGINE_ERR_SIGNALING;
break;
}
return ENGINE_ERR_NONE;
} while (0);

media_stream_end(eng);
return ret;
if (eng->options.media.video_info.codec != ESP_PEER_VIDEO_CODEC_NONE &&
send_add_video_track(eng) != ENGINE_ERR_NONE) {
return ENGINE_ERR_SIGNALING;
}
return ENGINE_ERR_NONE;
}

// MARK: - Signal event handlers
Expand Down Expand Up @@ -420,6 +407,7 @@ static void on_peer_sdp(const char *sdp, peer_role_t role, void *ctx)
event_enqueue(eng, &ev, false);
}


static bool on_peer_data_packet(livekit_pb_data_packet_t* packet, void *ctx)
{
engine_t *eng = (engine_t *)ctx;
Expand Down Expand Up @@ -833,7 +821,9 @@ static bool handle_state_connecting(engine_t *eng, const engine_event_t *ev)
break;
case EV_SIG_STATE:
signal_state_t sig_state = ev->detail.sig_state;
if (sig_state == SIGNAL_STATE_DISCONNECTED) {
if (sig_state == SIGNAL_STATE_CONNECTED) {
send_add_track_requests(eng);
} else if(sig_state == SIGNAL_STATE_DISCONNECTED) {
eng->failure_reason = LIVEKIT_FAILURE_REASON_OTHER;
eng->state = ENGINE_STATE_BACKOFF;
} else if (sig_state & SIGNAL_STATE_FAILED_ANY) {
Expand Down Expand Up @@ -886,7 +876,7 @@ static bool handle_state_connected(engine_t *eng, const engine_event_t *ev)
case _EV_STATE_ENTER:
eng->retry_count = 0;
eng->failure_reason = LIVEKIT_FAILURE_REASON_NONE;
publish_tracks(eng);
media_stream_begin(eng);
break;
case EV_CMD_CLOSE:
signal_send_leave(eng->signal_handle);
Expand Down Expand Up @@ -1066,6 +1056,45 @@ static void engine_task(void *arg)
vTaskDelete(NULL);
}

static engine_err_t enable_capture_sink(engine_t *eng)
{
esp_capture_sink_cfg_t sink_cfg = {
.audio_info = {
.format_id = capture_audio_codec_type(eng->options.media.audio_info.codec),
.sample_rate = eng->options.media.audio_info.sample_rate,
.channel = eng->options.media.audio_info.channel,
.bits_per_sample = 16,
},
.video_info = {
.format_id = ESP_CAPTURE_FMT_ID_H264,
.width = (uint16_t)eng->options.media.video_info.width,
.height = (uint16_t)eng->options.media.video_info.height,
.fps = (uint8_t)eng->options.media.video_info.fps,
},
};

if (esp_capture_sink_setup(
eng->options.media.capturer,
0, // Path index
&sink_cfg,
&eng->capturer_path
) != ESP_CAPTURE_ERR_OK) {
ESP_LOGE(TAG, "Capture sink setup failed");
return ENGINE_ERR_MEDIA;
}

// TODO: Add muxer

if (esp_capture_sink_enable(
eng->capturer_path,
ESP_CAPTURE_RUN_MODE_ALWAYS
) != ESP_CAPTURE_ERR_OK) {
ESP_LOGE(TAG, "Capture sink enable failed");
return ENGINE_ERR_MEDIA;
}
return ENGINE_ERR_NONE;
}

// MARK: - Public API

engine_handle_t engine_init(const engine_options_t *options)
Expand Down Expand Up @@ -1117,38 +1146,9 @@ engine_handle_t engine_init(const engine_options_t *options)
if (eng->signal_handle == NULL) {
goto _init_failed;
}
eng->renderer_handle = options->media.renderer;

esp_capture_sink_cfg_t sink_cfg = {
.audio_info = {
.format_id = capture_audio_codec_type(eng->options.media.audio_info.codec),
.sample_rate = eng->options.media.audio_info.sample_rate,
.channel = eng->options.media.audio_info.channel,
.bits_per_sample = 16,
},
.video_info = {
.format_id = capture_video_codec_type(eng->options.media.video_info.codec),
.width = (uint16_t)eng->options.media.video_info.width,
.height = (uint16_t)eng->options.media.video_info.height,
.fps = (uint8_t)eng->options.media.video_info.fps,
},
};
if (options->media.audio_info.codec != ESP_PEER_AUDIO_CODEC_NONE) {
// TODO: Can we ensure the renderer is valid? If not, return error.
eng->renderer_handle = options->media.renderer;
}

if (esp_capture_sink_setup(
eng->options.media.capturer,
0, // Path index
&sink_cfg,
&eng->capturer_path
) != ESP_CAPTURE_ERR_OK) {
goto _init_failed;
}
if (esp_capture_sink_enable(
eng->capturer_path,
ESP_CAPTURE_RUN_MODE_ALWAYS
) != ESP_CAPTURE_ERR_OK) {
if (enable_capture_sink(eng) != ENGINE_ERR_NONE) {
goto _init_failed;
}
return eng;
Expand Down
7 changes: 7 additions & 0 deletions components/livekit/examples/minimal_video/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# IDF
**/sdkconfig
**/sdkconfig.old
**/build
**/managed_components
**/dependencies.lock
**/dist
6 changes: 6 additions & 0 deletions components/livekit/examples/minimal_video/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# The following lines of boilerplate have to be in your project's CMakeLists
# in this exact order for cmake to work correctly
cmake_minimum_required(VERSION 3.5)
set(COMPONENTS main) # Trim build
include($ENV{IDF_PATH}/tools/cmake/project.cmake)
project(minimal_video)
61 changes: 61 additions & 0 deletions components/livekit/examples/minimal_video/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Minimal Video

Basic example of connecting to a LiveKit room with bidirectional audio and video publishing.

## Configuration

> [!TIP]
> Options can either be set through *menuconfig* or added to *sdkconfig* as shown below.

### Credentials

**Option A**: Use a LiveKit Sandbox to get up and running quickly. Setup a LiveKit Sandbox from your [Cloud Project](https://cloud.livekit.io/projects/p_/sandbox), and use its ID in your configuration:

```ini
CONFIG_LK_EXAMPLE_USE_SANDBOX=y
CONFIG_LK_EXAMPLE_SANDBOX_ID="my-project-xxxxxx"
```

**Option B**: Specify a server URL and pregenerated token:

```ini
CONFIG_LK_EXAMPLE_USE_PREGENERATED=y
CONFIG_LK_EXAMPLE_TOKEN="your-jwt-token"
CONFIG_LK_EXAMPLE_SERVER_URL="ws://localhost:7880"
```

### Network

Connect using WiFi as follows:

```ini
CONFIG_LK_EXAMPLE_USE_WIFI=y
CONFIG_LK_EXAMPLE_WIFI_SSID="<your SSID>"
CONFIG_LK_EXAMPLE_WIFI_PASSWORD="<your password>"
```

### Development Board

This example uses the Espressif [*codec_board*](https://components.espressif.com/components/tempotian/codec_board/) component to access board-specific peripherals for media capture and rendering. Supported boards are [defined here](https://github.com/espressif/esp-webrtc-solution/blob/65d13427dd83c37264b6cff966d60af0f84f649c/components/codec_board/board_cfg.txt). Locate the name of your board, and set it as follows:

```ini
CONFIG_LK_EXAMPLE_CODEC_BOARD_TYPE="ESP32_P4_DEV_V14"
```

## Build & Flash

Navigate to this directory in your terminal. Run the following command to build your application, flash it to your board, and monitor serial output:

```sh
idf.py flash monitor
```

Once running, the example will establish a network connection, connect to a LiveKit room, and print the following message:

```txt
I (19508) livekit_example: Room state: Connected
```

## Next Steps

With a room connection established, you can connect another client (another ESP32, [LiveKit Meet](https://meet.livekit.io), etc.) or dispatch an [agent](https://docs.livekit.io/agents/) to talk with.
2 changes: 2 additions & 0 deletions components/livekit/examples/minimal_video/main/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
idf_component_register(SRCS "main.c" "example.c" "board.c" "media.c"
INCLUDE_DIRS ".")
60 changes: 60 additions & 0 deletions components/livekit/examples/minimal_video/main/Kconfig.projbuild
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
menu "LiveKit Example"

config LK_EXAMPLE_CODEC_BOARD_TYPE
string "Codec board type"
default "DUMMY_CODEC_BOARD"
help
The model of dev board you are using. See board_cfg.txt from the codec
board component for a list of supported boards.

choice LK_EXAMPLE_CONNECTION_METHOD
prompt "Choose room connection method"
help
Choose how to connect to the room in the example.

config LK_EXAMPLE_USE_SANDBOX
bool "Sandbox token"
help
Use a sandbox token server for room authentication.

config LK_EXAMPLE_USE_PREGENERATED
bool "Pre-generated token"
help
Use a pre-generated token and server URL for room connection.
endchoice

config LK_EXAMPLE_SERVER_URL
depends on LK_EXAMPLE_USE_PREGENERATED
string "Server URL"
default "ws://localhost:7880"
help
The server URL to use for room connection.
config LK_EXAMPLE_TOKEN
depends on LK_EXAMPLE_USE_PREGENERATED
string "Token"
help
The token to use for room connection.

config LK_EXAMPLE_SANDBOX_ID
depends on LK_EXAMPLE_USE_SANDBOX
string "Sandbox ID"
help
The ID of the sandbox token server to use.
config LK_EXAMPLE_ROOM_NAME
depends on LK_EXAMPLE_USE_SANDBOX
string "Room name (optional)"
help
Specific room name sandbox tokens will be generated with.
config LK_EXAMPLE_PARTICIPANT_NAME
depends on LK_EXAMPLE_USE_SANDBOX
string "Participant name (optional)"
help
Specific participant name sandbox tokens will be generated with.

config LK_EXAMPLE_SPEAKER_VOLUME
int "Default speaker volume (0-100%)"
default 85
range 0 100
help
Default playback volume for speaker output.
endmenu
19 changes: 19 additions & 0 deletions components/livekit/examples/minimal_video/main/board.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#include "esp_log.h"
#include "board.h"
#include "codec_init.h"
#include "codec_board.h"
#include <math.h>

static const char *TAG = "board";

void board_init()
{
ESP_LOGI(TAG, "Initializing board");

set_codec_board_type(CONFIG_LK_EXAMPLE_CODEC_BOARD_TYPE);
// Notes when use playback and record at same time, must set reuse_dev = false
codec_init_cfg_t cfg = {
.reuse_dev = false
};
init_codec(&cfg);
}
12 changes: 12 additions & 0 deletions components/livekit/examples/minimal_video/main/board.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#pragma once

#ifdef __cplusplus
extern "C" {
#endif

/// Initialize board.
void board_init(void);

#ifdef __cplusplus
}
#endif
Loading