Skip to content

Commit

Permalink
Add hooks for transcriptions (#853)
Browse files Browse the repository at this point in the history
  • Loading branch information
lukasIO committed May 22, 2024
1 parent b921291 commit 79636e4
Show file tree
Hide file tree
Showing 13 changed files with 394 additions and 1,824 deletions.
6 changes: 6 additions & 0 deletions .changeset/happy-steaks-collect.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@livekit/components-core": patch
"@livekit/components-react": patch
---

Add experimental hooks for transcriptions
1,784 changes: 0 additions & 1,784 deletions examples/nextjs/yarn.lock

This file was deleted.

3 changes: 0 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,5 @@
},
"engines": {
"node": ">=18"
},
"pnpm": {
"overrides": {}
}
}
35 changes: 35 additions & 0 deletions packages/core/etc/components-core.api.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import type { Participant } from 'livekit-client';
import { ParticipantEvent } from 'livekit-client';
import type { ParticipantEventCallbacks } from 'livekit-client/dist/src/room/participant/Participant';
import type { ParticipantPermission } from '@livekit/protocol';
import type { PublicationEventCallbacks } from 'livekit-client/dist/src/room/track/TrackPublication';
import { RemoteParticipant } from 'livekit-client';
import { Room } from 'livekit-client';
import { RoomEvent } from 'livekit-client';
Expand All @@ -30,11 +31,18 @@ import { Track } from 'livekit-client';
import { TrackEvent as TrackEvent_2 } from 'livekit-client';
import { TrackPublication } from 'livekit-client';
import type { TrackPublishOptions } from 'livekit-client';
import { TranscriptionSegment } from 'livekit-client';
import type { VideoCaptureOptions } from 'livekit-client';

// @public (undocumented)
export function activeSpeakerObserver(room: Room): Observable<Participant[]>;

// @public (undocumented)
export function addMediaTimestampToTranscription(segment: TranscriptionSegment, timestamps: {
timestamp: number;
rtpTimestamp?: number;
}): ReceivedTranscriptionSegment;

// @public (undocumented)
export const allParticipantEvents: ParticipantEvent[];

Expand Down Expand Up @@ -153,12 +161,24 @@ export const DataTopic: {
readonly CHAT_UPDATE: "lk-chat-update-topic";
};

// @public (undocumented)
export function dedupeSegments<T extends TranscriptionSegment>(prevSegments: T[], newSegments: T[], windowSize: number): T[];

// @public (undocumented)
export const defaultUserChoices: LocalUserChoices;

// @public (undocumented)
export function didActiveSegmentsChange<T extends TranscriptionSegment>(prevActive: T[], newActive: T[]): boolean;

// @public (undocumented)
export function encryptionStatusObservable(room: Room, participant: Participant): Observable<boolean>;

// @public (undocumented)
export function getActiveTranscriptionSegments(segments: ReceivedTranscriptionSegment[], syncTimes: {
timestamp: number;
rtpTimestamp?: number;
}, maxAge?: number): ReceivedTranscriptionSegment[];

// Warning: (ae-internal-missing-underscore) The name "getScrollBarWidth" should be prefixed with an underscore because the declaration is marked as @internal
//
// @internal
Expand Down Expand Up @@ -357,6 +377,12 @@ export interface ReceivedDataMessage<T extends string | undefined = string> exte
from?: Participant;
}

// @public (undocumented)
export type ReceivedTranscriptionSegment = TranscriptionSegment & {
receivedAtMediaTimestamp: number;
receivedAt: number;
};

// @public (undocumented)
export type RequireOnlyOne<T, Keys extends keyof T = keyof T> = Pick<T, Exclude<keyof T, Keys>> & {
[K in Keys]-?: Required<Pick<T, K>> & Partial<Record<Exclude<Keys, K>, undefined>>;
Expand Down Expand Up @@ -565,6 +591,9 @@ export type TokenizeGrammar = {
[type: string]: RegExp;
};

// @public (undocumented)
export function trackEventSelector<T extends TrackEvent_2>(publication: TrackPublication | Track, event: T): Observable<Parameters<PublicationEventCallbacks[Extract<T, keyof PublicationEventCallbacks>]>>;

// Warning: (ae-internal-missing-underscore) The name "TrackIdentifier" should be prefixed with an underscore because the declaration is marked as @internal
//
// @internal
Expand Down Expand Up @@ -617,6 +646,12 @@ export type TrackSourceWithOptions = {
withPlaceholder: boolean;
};

// @public (undocumented)
export function trackSyncTimeObserver(track: Track): Observable<number>;

// @public (undocumented)
export function trackTranscriptionObserver(publication: TrackPublication): Observable<[transcription: TranscriptionSegment[]]>;

// Warning: (ae-forgotten-export) The symbol "UpdatableItem" needs to be exported by the entry point index.d.ts
//
// @public
Expand Down
1 change: 1 addition & 0 deletions packages/core/src/helper/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ export * from './eventGroups';
export { selectGridLayout, GRID_LAYOUTS, type GridLayoutDefinition } from './grid-layouts';
export { setDifference } from './set-helper';
export { supportsScreenSharing } from './featureDetection';
export * from './transcriptions';
75 changes: 75 additions & 0 deletions packages/core/src/helper/transcriptions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import type { TranscriptionSegment } from 'livekit-client';

export type ReceivedTranscriptionSegment = TranscriptionSegment & {
receivedAtMediaTimestamp: number;
receivedAt: number;
};

export function getActiveTranscriptionSegments(
segments: ReceivedTranscriptionSegment[],
syncTimes: { timestamp: number; rtpTimestamp?: number },
maxAge = 0,
) {
return segments.filter((segment) => {
const hasTrackSync = !!syncTimes.rtpTimestamp;
const currentTrackTime = syncTimes.rtpTimestamp ?? performance.timeOrigin + performance.now();
// if a segment arrives late, consider startTime to be the media timestamp from when the segment was received client side
const displayStartTime = hasTrackSync
? Math.max(segment.receivedAtMediaTimestamp, segment.startTime)
: segment.receivedAt;
// "active" duration is computed by the diff between start and end time, so we don't rely on displayStartTime to always be the same as the segment's startTime
const segmentDuration = maxAge + segment.endTime - segment.startTime;
return (
currentTrackTime >= displayStartTime && currentTrackTime <= displayStartTime + segmentDuration
);
});
}

export function addMediaTimestampToTranscription(
segment: TranscriptionSegment,
timestamps: { timestamp: number; rtpTimestamp?: number },
): ReceivedTranscriptionSegment {
return {
...segment,
receivedAtMediaTimestamp: timestamps.rtpTimestamp ?? 0,
receivedAt: timestamps.timestamp,
};
}

/**
* @returns An array of unique (by id) `TranscriptionSegment`s. Latest wins. If the resulting array would be longer than `windowSize`, the array will be reduced to `windowSize` length
*/
export function dedupeSegments<T extends TranscriptionSegment>(
prevSegments: T[],
newSegments: T[],
windowSize: number,
) {
return [...prevSegments, ...newSegments]
.reduceRight((acc, segment) => {
if (!acc.find((val) => val.id === segment.id)) {
acc.unshift(segment);
}
return acc;
}, [] as Array<T>)
.slice(0 - windowSize);
}

export function didActiveSegmentsChange<T extends TranscriptionSegment>(
prevActive: T[],
newActive: T[],
) {
if (newActive.length !== prevActive.length) {
return true;
}
return !newActive.every((newSegment) => {
return prevActive.find(
(prevSegment) =>
prevSegment.id === newSegment.id &&
prevSegment.text === newSegment.text &&
prevSegment.final === newSegment.final &&
prevSegment.language === newSegment.language &&
prevSegment.startTime === newSegment.startTime &&
prevSegment.endTime === newSegment.endTime,
);
});
}
36 changes: 36 additions & 0 deletions packages/core/src/observables/track.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import type { TrackReference } from '../track-reference';
import { observeRoomEvents } from './room';
import type { ParticipantTrackIdentifier } from '../types';
import { observeParticipantEvents } from './participant';
import type { PublicationEventCallbacks } from 'livekit-client/dist/src/room/track/TrackPublication';

export function trackObservable(track: TrackPublication) {
const trackObserver = observeTrackEvents(
Expand Down Expand Up @@ -170,3 +171,38 @@ export function participantTracksObservable(

return observable;
}

export function trackEventSelector<T extends TrackEvent>(
publication: TrackPublication | Track,
event: T,
) {
const observable = new Observable<
Parameters<PublicationEventCallbacks[Extract<T, keyof PublicationEventCallbacks>]>
>((subscribe) => {
const update = (
...params: Parameters<PublicationEventCallbacks[Extract<T, keyof PublicationEventCallbacks>]>
) => {
subscribe.next(params);
};
// @ts-expect-error not a perfect overlap between TrackEvent and keyof TrackEventCallbacks
publication.on(event, update);

const unsubscribe = () => {
// @ts-expect-error not a perfect overlap between TrackEvent and keyof TrackEventCallbacks
publication.off(event, update);
};
return unsubscribe;
});

return observable;
}

export function trackTranscriptionObserver(publication: TrackPublication) {
return trackEventSelector(publication, TrackEvent.TranscriptionReceived);
}

export function trackSyncTimeObserver(track: Track) {
return trackEventSelector(track, TrackEvent.TimeSyncUpdate).pipe(
map(([timeUpdate]) => timeUpdate),
);
}
12 changes: 12 additions & 0 deletions packages/react/etc/components-react.api.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import type { SVGProps } from 'react';
import { Track } from 'livekit-client';
import type { TrackPublication } from 'livekit-client';
import { TrackPublishOptions } from 'livekit-client';
import type { TranscriptionSegment } from 'livekit-client';
import type { VideoCaptureOptions } from 'livekit-client';

// @public (undocumented)
Expand Down Expand Up @@ -651,6 +652,11 @@ export interface TrackToggleProps<T extends ToggleSource> extends Omit<React_2.B
source: T;
}

// @alpha (undocumented)
export interface TrackTranscriptionOptions {
bufferSize?: number;
}

// Warning: (ae-internal-missing-underscore) The name "UnfocusToggleIcon" should be prefixed with an underscore because the declaration is marked as @internal
//
// @internal (undocumented)
Expand Down Expand Up @@ -1103,6 +1109,11 @@ export function useTrackToggle<T extends ToggleSource>({ source, onChange, initi
export interface UseTrackToggleProps<T extends ToggleSource> extends Omit<TrackToggleProps<T>, 'showIcon'> {
}

// @alpha (undocumented)
export function useTrackTranscription(trackRef: TrackReferenceOrPlaceholder, options?: TrackTranscriptionOptions): {
segments: ReceivedTranscriptionSegment[];
};

// @alpha
export const useTrackVolume: (trackOrTrackReference?: LocalAudioTrack | RemoteAudioTrack | TrackReference, options?: AudioAnalyserOptions) => number;

Expand Down Expand Up @@ -1159,6 +1170,7 @@ export type WidgetState = {
// src/context/layout-context.ts:11:3 - (ae-forgotten-export) The symbol "WidgetContextType" needs to be exported by the entry point index.d.ts
// src/hooks/useGridLayout.ts:24:6 - (ae-forgotten-export) The symbol "GridLayoutDefinition" needs to be exported by the entry point index.d.ts
// src/hooks/useMediaDeviceSelect.ts:47:29 - (ae-forgotten-export) The symbol "SetMediaDeviceOptions" needs to be exported by the entry point index.d.ts
// src/hooks/useTrackTranscription.ts:39:38 - (ae-forgotten-export) The symbol "ReceivedTranscriptionSegment" needs to be exported by the entry point index.d.ts

// (No @packageDocumentation comment for this package)

Expand Down
1 change: 1 addition & 0 deletions packages/react/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"usehooks-ts": "2.16.0"
},
"peerDependencies": {
"@livekit/protocol": "^1.16.0",
"livekit-client": "^2.1.5",
"react": ">=18",
"react-dom": ">=18",
Expand Down
1 change: 1 addition & 0 deletions packages/react/src/hooks/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,4 @@ export {
export { useIsEncrypted } from './useIsEncrypted';
export * from './useTrackVolume';
export * from './useParticipantTracks';
export * from './useTrackTranscription';
17 changes: 17 additions & 0 deletions packages/react/src/hooks/useTrackSyncTime.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import * as React from 'react';
import { type TrackReferenceOrPlaceholder, trackSyncTimeObserver } from '@livekit/components-core';
import { useObservableState } from './internal';

/**
* @internal
*/
export function useTrackSyncTime({ publication }: TrackReferenceOrPlaceholder) {
const observable = React.useMemo(
() => (publication?.track ? trackSyncTimeObserver(publication.track) : undefined),
[publication?.track],
);
return useObservableState(observable, {
timestamp: Date.now(),
rtpTimestamp: publication?.track?.rtpTimestamp,
});
}
86 changes: 86 additions & 0 deletions packages/react/src/hooks/useTrackTranscription.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import {
type ReceivedTranscriptionSegment,
addMediaTimestampToTranscription as addTimestampsToTranscription,
dedupeSegments,
// getActiveTranscriptionSegments,
getTrackReferenceId,
trackTranscriptionObserver,
type TrackReferenceOrPlaceholder,
// didActiveSegmentsChange,
} from '@livekit/components-core';
import type { TranscriptionSegment } from 'livekit-client';
import * as React from 'react';
import { useTrackSyncTime } from './useTrackSyncTime';

/**
* @alpha
*/
export interface TrackTranscriptionOptions {
/**
* how many transcription segments should be buffered in state
* @defaultValue 100
*/
bufferSize?: number;
/** amount of time (in ms) that the segment is considered `active` past its original segment duration, defaults to 2_000 */
// maxAge?: number;
}

const TRACK_TRANSCRIPTION_DEFAULTS = {
bufferSize: 100,
// maxAge: 2_000,
} as const satisfies TrackTranscriptionOptions;

/**
* @returns An object consisting of `segments` with maximum length of opts.windowLength and `activeSegments` that are valid for the current track timestamp
* @alpha
*/
export function useTrackTranscription(
trackRef: TrackReferenceOrPlaceholder,
options?: TrackTranscriptionOptions,
) {
const opts = { ...TRACK_TRANSCRIPTION_DEFAULTS, ...options };
const [segments, setSegments] = React.useState<Array<ReceivedTranscriptionSegment>>([]);
// const [activeSegments, setActiveSegments] = React.useState<Array<ReceivedTranscriptionSegment>>(
// [],
// );
// const prevActiveSegments = React.useRef<ReceivedTranscriptionSegment[]>([]);
const syncTimestamps = useTrackSyncTime(trackRef);
const handleSegmentMessage = (newSegments: TranscriptionSegment[]) => {
setSegments((prevSegments) =>
dedupeSegments(
prevSegments,
// when first receiving a segment, add the current media timestamp to it
newSegments.map((s) => addTimestampsToTranscription(s, syncTimestamps)),
opts.bufferSize,
),
);
};
React.useEffect(() => {
if (!trackRef.publication) {
return;
}
const subscription = trackTranscriptionObserver(trackRef.publication).subscribe((evt) => {
handleSegmentMessage(...evt);
});
return () => {
subscription.unsubscribe();
};
}, [getTrackReferenceId(trackRef), handleSegmentMessage]);

// React.useEffect(() => {
// if (syncTimestamps) {
// const newActiveSegments = getActiveTranscriptionSegments(
// segments,
// syncTimestamps,
// opts.maxAge,
// );
// // only update active segment array if content actually changed
// if (didActiveSegmentsChange(prevActiveSegments.current, newActiveSegments)) {
// setActiveSegments(newActiveSegments);
// prevActiveSegments.current = newActiveSegments;
// }
// }
// }, [syncTimestamps, segments, opts.maxAge]);

return { segments };
}
Loading

0 comments on commit 79636e4

Please sign in to comment.