livekit · xianshijing-lk · Nov 21, 2025 · Nov 15, 2025 · Nov 17, 2025 · Nov 17, 2025
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -141,22 +141,52 @@ add_custom_target(build_rust_ffi ALL
   VERBATIM
 )
 
+# A workaround to strip out the protozero_plugin.o symbols which cause our examples fail to link on Linux.
+# Make sure CMAKE_AR is defined; if not, you can hardcode "ar"
+if(NOT CMAKE_AR)
+  find_program(CMAKE_AR ar REQUIRED)
+endif()
+
+add_custom_command(
+  TARGET build_rust_ffi
+  POST_BUILD
+  COMMAND ${CMAKE_AR} -dv $<TARGET_FILE:livekit_ffi> protozero_plugin.o
+  COMMENT "Removing protozero_plugin.o (stray main) from liblivekit_ffi.a"
+)
+
 # ---- C++ wrapper library ----
 add_library(livekit
+  include/livekit/audio_frame.h
+  include/livekit/audio_source.h
   include/livekit/room.h
+  include/livekit/room_delegate.h
   include/livekit/ffi_handle.h
   include/livekit/ffi_client.h
+  include/livekit/local_audio_track.h
   include/livekit/participant.h
+  include/livekit/local_participant.h
   include/livekit/livekit.h
   include/livekit/stats.h
   include/livekit/track.h
+  include/livekit/track_publication.h
+  include/livekit/local_track_publication.h
+  include/livekit/remote_track_publication.h
+  src/audio_frame.cpp
+  src/audio_source.cpp
   src/ffi_handle.cpp
   src/ffi_client.cpp
+  src/local_audio_track.cpp
   src/room.cpp
-  src/room_event_converter.cpp
-  src/room_event_converter.h
+  src/room_proto_converter.cpp
+  src/room_proto_converter.h
+  src/local_participant.cpp
   src/stats.cpp
   src/track.cpp
+  src/track_proto_converter.cpp
+  src/track_proto_converter.h
+  src/track_publication.cpp
+  src/local_track_publication.cpp
+  src/remote_track_publication.cpp
 )
 
 # Add generated proto objects to the wrapper

diff --git a/examples/simple_room/main.cpp b/examples/simple_room/main.cpp
@@ -3,6 +3,7 @@
 #include <csignal>
 #include <cstdlib>
 #include <iostream>
+#include <random>
 #include <string>
 #include <thread>
 #include <vector>
@@ -28,7 +29,7 @@ void print_usage(const char *prog) {
             << "  LIVEKIT_URL, LIVEKIT_TOKEN\n";
 }
 
-void handle_sigint(int) { g_running = false; }
+void handle_sigint(int) { g_running.store(false); }
 
 bool parse_args(int argc, char *argv[], std::string &url, std::string &token) {
   // 1) --help
@@ -118,6 +119,47 @@ class SimpleRoomDelegate : public livekit::RoomDelegate {
   }
 };
 
+// Test utils to run a capture loop to publish noisy audio frames to the room
+void runNoiseCaptureLoop(const std::shared_ptr<AudioSource> &source) {
+  const int sample_rate = source->sample_rate();
+  const int num_channels = source->num_channels();
+  const int frame_ms = 10;
+  const int samples_per_channel = sample_rate * frame_ms / 1000;
+
+  std::mt19937 rng(std::random_device{}());
+  std::uniform_int_distribution<int16_t> noise_dist(-5000, 5000);
+  using Clock = std::chrono::steady_clock;
+  auto next_deadline = Clock::now();
+  while (g_running.load(std::memory_order_relaxed)) {
+    AudioFrame frame =
+        AudioFrame::create(sample_rate, num_channels, samples_per_channel);
+    const std::size_t total_samples =
+        static_cast<std::size_t>(num_channels) *
+        static_cast<std::size_t>(samples_per_channel);
+    for (std::size_t i = 0; i < total_samples; ++i) {
+      frame.data()[i] = noise_dist(rng);
+    }
+    try {
+      source->captureFrame(frame);
+    } catch (const std::exception &e) {
+      // If something goes wrong, log and break out
+      std::cerr << "Error in captureFrame: " << e.what() << std::endl;
+      break;
+    }
+
+    // Pace the loop to roughly real-time
+    next_deadline += std::chrono::milliseconds(frame_ms);
+    std::this_thread::sleep_until(next_deadline);
+  }
+
+  // Optionally clear queued audio on exit
+  try {
+    source->clearQueue();
+  } catch (...) {
+    // ignore errors on shutdown
+    std::cout << "Error in clearQueue" << std::endl;
+  }
+}
 } // namespace
 
 int main(int argc, char *argv[]) {
@@ -168,14 +210,46 @@ int main(int argc, char *argv[]) {
             << info.reliable_dc_buffered_amount_low_threshold << "\n"
             << "  Creation time (ms): " << info.creation_time << "\n";
 
-  // TOD(shijing), implement local and remoteParticipants in the room
+  auto audioSource = std::make_shared<AudioSource>(44100, 1, 10);
+  auto audioTrack =
+      LocalAudioTrack::createLocalAudioTrack("micTrack", audioSource);
+
+  TrackPublishOptions opts;
+  opts.source = TrackSource::SOURCE_MICROPHONE;
+  opts.dtx = false;
+  opts.simulcast = false;
 
+  try {
+    // publishTrack takes std::shared_ptr<Track>, LocalAudioTrack derives from
+    // Track
+    auto pub = room.local_participant()->publishTrack(audioTrack, opts);
+
+    std::cout << "Published track:\n"
+              << "  SID: " << pub->sid() << "\n"
+              << "  Name: " << pub->name() << "\n"
+              << "  Kind: " << static_cast<int>(pub->kind()) << "\n"
+              << "  Source: " << static_cast<int>(pub->source()) << "\n"
+              << "  Simulcasted: " << std::boolalpha << pub->simulcasted()
+              << "\n"
+              << "  Muted: " << std::boolalpha << pub->muted() << "\n";
+  } catch (const std::exception &e) {
+    std::cerr << "Failed to publish track: " << e.what() << std::endl;
+  }
+
+  // TODO, if we have pre-buffering feature, we might consider starting the
+  // thread right after creating the source.
+  std::thread audioThread(runNoiseCaptureLoop, audioSource);
   // Keep the app alive until Ctrl-C so we continue receiving events,
   // similar to asyncio.run(main()) keeping the loop running.
   while (g_running.load()) {
     std::this_thread::sleep_for(std::chrono::milliseconds(100));
   }
 
+  // Shutdown the audio thread.
+  if (audioThread.joinable()) {
+    audioThread.join();
+  }
+
   FfiClient::instance().shutdown();
   std::cout << "Exiting.\n";
   return 0;

diff --git a/include/livekit/audio_frame.h b/include/livekit/audio_frame.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2025 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an “AS IS” BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace livekit {
+
+namespace proto {
+class AudioFrameBufferInfo;
+class OwnedAudioFrameBuffer;
+} // namespace proto
+
+class AudioFrame {
+public:
+  /**
+   * Construct an AudioFrame from raw PCM samples.
+   *
+   * @param data                Interleaved PCM samples (int16).
+   * @param sample_rate         Sample rate (Hz).
+   * @param num_channels        Number of channels.
+   * @param samples_per_channel Number of samples per channel.
+   *
+   * Throws std::invalid_argument if the data size is inconsistent with
+   * num_channels * samples_per_channel.
+   */
+  AudioFrame(std::vector<std::int16_t> data, int sample_rate, int num_channels,
+             int samples_per_channel);
+
+  /**
+   * Create a new zero-initialized AudioFrame instance.
+   */
+  static AudioFrame create(int sample_rate, int num_channels,
+                           int samples_per_channel);
+
+  /**
+   * Construct an AudioFrame by copying data out of an OwnedAudioFrameBuffer.
+   */
+  static AudioFrame fromOwnedInfo(const proto::OwnedAudioFrameBuffer &owned);
+
+  /**
+   * Build a proto AudioFrameBufferInfo pointing at this frame’s data.
+   *
+   * The underlying buffer must stay alive as long as the native side
+   * uses the pointer.
+   *
+   */
+  proto::AudioFrameBufferInfo toProto() const;
+
+  // ---- Accessors ----
+
+  const std::vector<std::int16_t> &data() const noexcept { return data_; }
+  std::vector<std::int16_t> &data() noexcept { return data_; }
+
+  /// Number of samples in the buffer (per all channels).
+  std::size_t total_samples() const noexcept { return data_.size(); }
+
+  /// Sample rate in Hz.
+  int sample_rate() const noexcept { return sample_rate_; }
+
+  /// Number of channels.
+  int num_channels() const noexcept { return num_channels_; }
+
+  /// Samples per channel.
+  int samples_per_channel() const noexcept { return samples_per_channel_; }
+
+  /// Duration in seconds (samples_per_channel / sample_rate).
+  double duration() const noexcept;
+
+  /// A human-readable description (like Python __repr__).
+  std::string to_string() const;
+
+private:
+  std::vector<std::int16_t> data_;
+  int sample_rate_;
+  int num_channels_;
+  int samples_per_channel_;
+};
+
+} // namespace livekit
diff --git a/include/livekit/audio_source.h b/include/livekit/audio_source.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2025 LiveKit
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an “AS IS” BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstdint>
+
+#include "livekit/audio_frame.h"
+#include "livekit/ffi_handle.h"
+
+namespace livekit {
+
+namespace proto {
+class FfiRequest;
+class FfiResponse;
+} // namespace proto
+
+class FfiClient;
+
+/**
+ * Represents a real-time audio source with an internal audio queue.
+ */
+class AudioSource {
+public:
+  /**
+   * Create a new native audio source.
+   *
+   * @param sample_rate   Sample rate in Hz.
+   * @param num_channels  Number of channels.
+   * @param queue_size_ms Max buffer duration for the internal queue in ms.
+   */
+  AudioSource(int sample_rate, int num_channels, int queue_size_ms = 1000);
+
+  ~AudioSource();
+
+  AudioSource(const AudioSource &) = delete;
+  AudioSource &operator=(const AudioSource &) = delete;
+  AudioSource(AudioSource &&) noexcept = default;
+  AudioSource &operator=(AudioSource &&) noexcept = default;
+
+  /// The sample rate of the audio source in Hz.
+  int sample_rate() const noexcept { return sample_rate_; }
+
+  /// The number of audio channels.
+  int num_channels() const noexcept { return num_channels_; }
+
+  /// Underlying FFI handle ID used in FFI requests.
+  std::uint64_t ffi_handle_id() const noexcept {
+    return static_cast<std::uint64_t>(handle_.get());
+  }
+
+  /// Current duration of queued audio (in seconds).
+  double queuedDuration() const noexcept;
+
+  /**
+   * Clears the internal audio queue on the native side and resets local
+   * queue tracking.
+   */
+  void clearQueue();
+
+  /**
+   * Push an AudioFrame into the audio source and BLOCK until the FFI callback
+   * confirms that the native side has finished processing (consuming) the
+   * frame. Safe usage: The frame's internal buffer must remain valid only until
+   * this function returns. Because this call blocks until the corresponding FFI
+   * callback arrives, the caller may safely destroy or reuse the frame
+   * afterward.
+   * @param frame       The audio frame to send. No-op if the frame contains
+   * zero samples.
+   * @param timeout_ms  Maximum time to wait for the FFI callback.
+   *                    - If timeout_ms > 0: block up to this duration.
+   *                      A timeout will cause std::runtime_error.
+   *                    - If timeout_ms == 0: wait indefinitely until the
+   * callback arrives (recommended for production unless the caller needs
+   * explicit timeout control).
+   *
+   * Notes:
+   *   - This is a blocking call.
+   *   - timeout_ms == 0 (infinite wait) is the safest mode because it
+   * guarantees the callback completes before the function returns, which in
+   * turn guarantees that the audio buffer lifetime is fully protected. The
+   * caller does not need to manage or extend the frame lifetime manually.
+   *
+   *   - May throw std::runtime_error if:
+   *       • the FFI reports an error
+   *
+   *   - The underlying FFI request *must* eventually produce a callback for
+   * each frame. If the FFI layer is misbehaving or the event loop is stalled,
+   *     a timeout may occur in bounded-wait mode.
+   */
+  void captureFrame(const AudioFrame &frame, int timeout_ms = 20);
+
+  /**
+   * Block until the currently queued audio has (roughly) played out.
+   */
+  void waitForPlayout() const;
+
+private:
+  // Internal helper to reset the local queue tracking (like _release_waiter).
+  void resetQueueTracking() noexcept;
+
+  int sample_rate_;
+  int num_channels_;
+  int queue_size_ms_;
+
+  // RAII wrapper for this audio source's FFI handle
+  FfiHandle handle_;
+
+  // Queue tracking (all in seconds; based on steady_clock in the .cpp).
+  mutable double last_capture_{0.0};
+  mutable double q_size_{0.0};
+};
+
+} // namespace livekit