Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
356dfb7
feat: add wip hacked together start to an agents sdk
1egoman Aug 6, 2025
effb228
fix: address imports
1egoman Aug 7, 2025
ec87981
feat: get agent sdk to compile
1egoman Aug 7, 2025
ed4a803
feat: mostly get AgentParticipant working
1egoman Aug 7, 2025
a926e16
feat: comment out old transcription stuff
1egoman Aug 7, 2025
809e322
feat: mostly get the transcriptions incoming pipeline working
1egoman Aug 7, 2025
7760aec
feat: get CombinedMessageReceiver working
1egoman Aug 7, 2025
5e35853
feat: got transcriptions aggregating properly, it's way way more nuan…
1egoman Aug 7, 2025
3978348
feat: add MessageSender and ChatMessageSender / CombinedMessageSender…
1egoman Aug 7, 2025
b2355fc
feat: add loopback message receiver to ChatMessageSender
1egoman Aug 7, 2025
2bf6419
docs: add more comments
1egoman Aug 7, 2025
abe2026
feat: start migrating pre-existing hooks to agent alternatives
1egoman Aug 8, 2025
4c1c024
refactor: break up existing code into many files
1egoman Aug 8, 2025
8fcd2ed
docs: add initial AgentSession docs
1egoman Aug 8, 2025
9425833
feat: add OrderedMessageList
1egoman Aug 8, 2025
4443c04
feat: add more agent specific react hook coverage
1egoman Aug 8, 2025
994b209
feat; add stubs for a log of additional functionality lukas proposed …
1egoman Aug 8, 2025
1658dc9
feat: add waitUntilAgentIsAvailable
1egoman Aug 8, 2025
76a3962
feat: implement message aggregator idea
1egoman Aug 8, 2025
fb01009
docs: add docs for message aggregator idea
1egoman Aug 11, 2025
b4ba41f
feat: move agent state into AgentParticipant
1egoman Aug 11, 2025
b192a46
feat: fix useAgentLocalParticipant local participant mic track not sh…
1egoman Aug 11, 2025
6b9dfc5
feat: remove dead code from AgentParticipant
1egoman Aug 11, 2025
58a5ad3
feat: rename AgentParticipant to Agent
1egoman Aug 11, 2025
d38ddf6
docs: remove comments
1egoman Aug 11, 2025
91f4a08
feat: add "ready" state to useAgentMessages
1egoman Aug 11, 2025
728258b
feat: add most of agent control bar behind the scenes logic into useA…
1egoman Aug 11, 2025
0d312e3
feat: port useDebug to use AgentSession
1egoman Aug 11, 2025
22282c2
fix: agent timeout should disconnect whole AgentSession
1egoman Aug 11, 2025
4d22785
refactor: update docs
1egoman Aug 11, 2025
b2602b8
feat: add MessageReceived event, aggregation should be the responsibi…
1egoman Aug 11, 2025
6f0eeb5
refactor: comment out dead code
1egoman Aug 11, 2025
d24be87
fix: remove SentMessage from ChatEntryProps
1egoman Aug 14, 2025
8f6301b
feat: add special case for sendMessage string -> SentChatMessage
1egoman Aug 14, 2025
00674f4
feat: add ConnectionDetailsProvider to further abstract generating do…
1egoman Aug 14, 2025
34c3645
fix: add missing package
1egoman Aug 15, 2025
0d6288e
feat: remove defaultAggregator and startsAt param to createMessageAgg…
1egoman Aug 18, 2025
6fae521
feat: add explicit AgentSessionEvent.Disconnected event
1egoman Aug 18, 2025
82a5fe2
feat: use AgentSessionEvent.Disconnected to close any open `ReceivedM…
1egoman Aug 18, 2025
e5b1fc0
feat: make ReceivedMessageAggregator methods arrow functions so they …
1egoman Aug 18, 2025
f4eeace
feat: remove startsAt from TBD react layer
1egoman Aug 18, 2025
ad4c236
feat: add types-emitter package (this wasn't installed for some reason?)
1egoman Aug 18, 2025
730dbed
feat: add logic to ensure that `connect` can't be run until underlyin…
1egoman Aug 18, 2025
80038a0
feat: add better mechanism to control whether microphone is enabled o…
1egoman Aug 18, 2025
c3993be
feat: parameterize agentConnectTimeoutMilliseconds with default value
1egoman Aug 18, 2025
fa6cac4
feat: add dependency to temp react layer so that useAgentMessages rea…
1egoman Aug 18, 2025
e47c9af
feat: replace AgentState with AgentConnectionState / AgentConversatio…
1egoman Aug 18, 2025
6f12617
fix: add await as part of refresh so return is blocked until after re…
1egoman Aug 18, 2025
1d716d7
feat: add centralized participant attributes enum
1egoman Aug 18, 2025
a7ff04f
feat: remove canSend, proxy all messages to all `MessageSender`s
1egoman Aug 19, 2025
9d7c5b3
feat: add chat message options to SentChatMessage
1egoman Aug 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
228 changes: 228 additions & 0 deletions agent-sdk/agent-session/Agent.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
import type TypedEventEmitter from 'typed-emitter';
import { EventEmitter } from "events";
import { ConnectionState, ParticipantEvent, ParticipantKind, RemoteParticipant, Room, RoomEvent, Track } from 'livekit-client';
import { getParticipantTrackRefs, participantTrackEvents, TrackReference } from '@/agent-sdk/external-deps/components-js';
import { ParticipantEventCallbacks } from '@/agent-sdk/external-deps/client-sdk-js';
import { ParticipantAttributes } from '@/agent-sdk/lib/participant-attributes';

/** State representing the current connection status to the server hosted agent */
export type AgentConnectionState = 'disconnected' | 'connecting' | 'connected' | 'reconnecting' | 'signalReconnecting';

/** State representing the current status of the agent, whether it is ready for speach, etc */
export type AgentConversationalState = 'disconnected' | 'initializing' | 'idle' | 'listening' | 'thinking' | 'speaking';

export enum AgentEvent {
VideoTrackChanged = 'videoTrackChanged',
AudioTrackChanged = 'videoTrackChanged',
AgentAttributesChanged = 'agentAttributesChanged',
AgentConnectionStateChanged = 'agentConnectionStateChanged',
AgentConversationalStateChanged = 'agentConversationalStateChanged',
}

export type AgentCallbacks = {
[AgentEvent.VideoTrackChanged]: (newTrack: TrackReference | null) => void;
[AgentEvent.AudioTrackChanged]: (newTrack: TrackReference | null) => void;
[AgentEvent.AgentAttributesChanged]: (newAttributes: Record<string, string>) => void;
[AgentEvent.AgentConnectionStateChanged]: (newAgentConnectionState: AgentConnectionState) => void;
[AgentEvent.AgentConversationalStateChanged]: (newAgentConversationalState: AgentConversationalState) => void;
};

/**
* Agent encapculates all agent state, normalizing some quirks around how LiveKit Agents work.
*/
export default class Agent extends (EventEmitter as new () => TypedEventEmitter<AgentCallbacks>) {
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any strong (or JS-specific) arguments why it couldn't be flattened into one "observable" object? Agent sounds more like Participant vs an object tracking Room events.

Copy link
Contributor Author

@1egoman 1egoman Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agent sounds more like Participant vs an object tracking Room events

FWIW that was the intention, I want to avoid AgentSession becoming large and having to deal with all the quirks of the agent participant / worker participant, the "on behalf of" stuff, etc.

It sounds like you are proposing this become more of a general "state container" that would have a larger responsibility than it currently does?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, 2 main points here:

  • Agent should not know that much about the Room - looks inverted from OOP perspective to me
  • other ecosystems may prefer "1 smart observable publishing n stupid DTOs"

Personally I don't think AgentSession will ever be "too big" on the client side (look at the agents impl then)

private room: Room;

connectionState: AgentConnectionState = 'disconnected';
conversationalState: AgentConversationalState = 'disconnected';

private agentParticipant: RemoteParticipant | null = null;
private workerParticipant: RemoteParticipant | null = null; // ref: https://docs.livekit.io/agents/integrations/avatar/#avatar-workers
audioTrack: TrackReference | null = null;
videoTrack: TrackReference | null = null;

attributes: Record<string, string> = {};
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the defined attributes are primarily intended for internal usage.

If anything, I think we should strip all internal lk attributes and only expose the ones set by users.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, probably orthogonal to this PR 👍


constructor(room: Room) {
super();
this.room = room;

this.room.on(RoomEvent.ParticipantConnected, this.handleParticipantConnected);
this.room.on(RoomEvent.ParticipantDisconnected, this.handleParticipantDisconnected);
this.room.on(RoomEvent.ConnectionStateChanged, this.handleConnectionStateChanged);
this.room.localParticipant.on(ParticipantEvent.TrackPublished, this.handleLocalParticipantTrackPublished)

this.updateConnectionState();
this.updateConversationalState();
}

teardown() {
this.room.off(RoomEvent.ParticipantConnected, this.handleParticipantConnected);
this.room.off(RoomEvent.ParticipantDisconnected, this.handleParticipantDisconnected);
this.room.off(RoomEvent.ConnectionStateChanged, this.handleConnectionStateChanged);
this.room.localParticipant.off(ParticipantEvent.TrackPublished, this.handleLocalParticipantTrackPublished)
}

private handleParticipantConnected = () => {
this.updateParticipants();
}
private handleParticipantDisconnected = () => {
this.updateParticipants();
}

private handleConnectionStateChanged = () => {
this.updateConnectionState();
this.updateConversationalState();
}

private handleLocalParticipantTrackPublished = () => {
this.updateConversationalState();
}

private updateParticipants() {
const newAgentParticipant = this.roomRemoteParticipants.find(
(p) => p.kind === ParticipantKind.AGENT && !(ParticipantAttributes.publishOnBehalf in p.attributes),
) ?? null;
const newWorkerParticipant = newAgentParticipant ? (
this.roomRemoteParticipants.find(
(p) =>
p.kind === ParticipantKind.AGENT && p.attributes[ParticipantAttributes.publishOnBehalf] === newAgentParticipant.identity,
) ?? null
) : null;

const oldAgentParticipant = this.agentParticipant;
const oldWorkerParticipant = this.workerParticipant;
this.agentParticipant = newAgentParticipant;
this.workerParticipant = newWorkerParticipant;

// 1. Listen for attribute changes
if (oldAgentParticipant !== this.agentParticipant) {
oldAgentParticipant?.off(ParticipantEvent.AttributesChanged, this.handleAttributesChanged);

if (this.agentParticipant) {
this.agentParticipant.on(ParticipantEvent.AttributesChanged, this.handleAttributesChanged);
this.handleAttributesChanged(this.agentParticipant.attributes);
}
}

// 2. Listen for track updates
for (const event of participantTrackEvents) {
if (oldAgentParticipant !== this.agentParticipant) {
oldAgentParticipant?.off(event as keyof ParticipantEventCallbacks, this.handleUpdateTracks);
if (this.agentParticipant) {
this.agentParticipant.on(event as keyof ParticipantEventCallbacks, this.handleUpdateTracks);
this.handleUpdateTracks();
}
}
if (oldWorkerParticipant !== this.workerParticipant) {
oldWorkerParticipant?.off(event as keyof ParticipantEventCallbacks, this.handleUpdateTracks);
if (this.workerParticipant) {
this.workerParticipant.on(event as keyof ParticipantEventCallbacks, this.handleUpdateTracks);
this.handleUpdateTracks();
}
}
}
}

private handleUpdateTracks = () => {
const newVideoTrack = (
this.agentTracks.find((t) => t.source === Track.Source.Camera) ??
this.workerTracks.find((t) => t.source === Track.Source.Camera) ?? null
);
if (this.videoTrack !== newVideoTrack) {
this.videoTrack = newVideoTrack;
this.emit(AgentEvent.VideoTrackChanged, newVideoTrack);
}

const newAudioTrack = (
this.agentTracks.find((t) => t.source === Track.Source.Microphone) ??
this.workerTracks.find((t) => t.source === Track.Source.Microphone) ?? null
);
if (this.audioTrack !== newAudioTrack) {
this.audioTrack = newAudioTrack;
this.emit(AgentEvent.AudioTrackChanged, newAudioTrack);
}
};

private handleAttributesChanged = (attributes: Record<string, string>) => {
this.attributes = attributes;
this.emit(AgentEvent.AgentAttributesChanged, attributes);
this.updateConnectionState();
this.updateConversationalState();
};

private updateConnectionState() {
let newConnectionState: AgentConnectionState;

const roomConnectionState = this.room.state;
if (roomConnectionState === ConnectionState.Disconnected) {
newConnectionState = 'disconnected';
} else if (
roomConnectionState === ConnectionState.Connecting ||
!this.agentParticipant ||
!this.attributes[ParticipantAttributes.state]
) {
newConnectionState = 'connecting';
} else {
newConnectionState = roomConnectionState;
}
console.log('!! CONNECTION STATE:', newConnectionState);

if (this.connectionState !== newConnectionState) {
this.connectionState = newConnectionState;
this.emit(AgentEvent.AgentConnectionStateChanged, newConnectionState);
}
}

private updateConversationalState() {
let newConversationalState: AgentConversationalState = 'disconnected';

if (this.room.state !== ConnectionState.Disconnected) {
newConversationalState = 'initializing';
}

// If the microphone preconnect buffer is active, then the state should be "listening" rather
// than "initializing"
const micTrack = this.room.localParticipant.getTrackPublication(Track.Source.Microphone);
if (micTrack) {
newConversationalState = 'listening';
}

if (this.agentParticipant && this.attributes[ParticipantAttributes.state]) {
// ref: https://github.com/livekit/agents/blob/65170238db197f62f479eb7aaef1c0e18bfad6e7/livekit-agents/livekit/agents/voice/events.py#L97
const agentState = this.attributes[ParticipantAttributes.state] as 'initializing' | 'idle' | 'listening' | 'thinking' | 'speaking';
newConversationalState = agentState;
}

console.log('!! CONVERSATIONAL STATE:', newConversationalState);

if (this.conversationalState !== newConversationalState) {
this.conversationalState = newConversationalState;
this.emit(AgentEvent.AgentConversationalStateChanged, newConversationalState);
}
}

private get roomRemoteParticipants() {
return Array.from(this.room.remoteParticipants.values());
}

private get agentTracks() {
if (!this.agentParticipant) {
return [];
}
return getParticipantTrackRefs(
this.agentParticipant,
{ sources: [Track.Source.Microphone, Track.Source.Camera] }
);
}

private get workerTracks() {
if (!this.workerParticipant) {
return [];
}
return getParticipantTrackRefs(
this.workerParticipant,
{ sources: [Track.Source.Microphone, Track.Source.Camera] }
);
}
}
Loading