Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

voice - implement lazy activation (microsoft/vscode-internalbacklog#4877) #207060

Merged
merged 3 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ export class DynamicSpeechAccessibilityConfiguration extends Disposable implemen
) {
super();

this._register(Event.runAndSubscribe(speechService.onDidRegisterSpeechProvider, () => this.updateConfiguration()));
this._register(Event.runAndSubscribe(speechService.onDidChangeHasSpeechProvider, () => this.updateConfiguration()));
}

private updateConfiguration(): void {
Expand Down
6 changes: 3 additions & 3 deletions src/vs/workbench/contrib/chat/common/voiceChat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ export interface IVoiceChatService {
* if the user says "at workspace slash fix this problem", the result
* will be "@workspace /fix this problem".
*/
createVoiceChatSession(token: CancellationToken, options: IVoiceChatSessionOptions): IVoiceChatSession;
createVoiceChatSession(token: CancellationToken, options: IVoiceChatSessionOptions): Promise<IVoiceChatSession>;
}

export interface IVoiceChatTextEvent extends ISpeechToTextEvent {
Expand Down Expand Up @@ -114,15 +114,15 @@ export class VoiceChatService extends Disposable implements IVoiceChatService {
}
}

createVoiceChatSession(token: CancellationToken, options: IVoiceChatSessionOptions): IVoiceChatSession {
async createVoiceChatSession(token: CancellationToken, options: IVoiceChatSessionOptions): Promise<IVoiceChatSession> {
const disposables = new DisposableStore();
disposables.add(token.onCancellationRequested(() => disposables.dispose()));

let detectedAgent = false;
let detectedSlashCommand = false;

const emitter = disposables.add(new Emitter<IVoiceChatTextEvent>());
const session = this.speechService.createSpeechToTextSession(token, 'chat');
const session = await this.speechService.createSpeechToTextSession(token, 'chat');

const phrases = this.createPhrases(options.model);
disposables.add(session.onDidChange(e => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ class VoiceChatSessions {
@IConfigurationService private readonly configurationService: IConfigurationService
) { }

start(controller: IVoiceChatSessionController, context?: IChatExecuteActionContext): IVoiceChatSession {
async start(controller: IVoiceChatSessionController, context?: IChatExecuteActionContext): Promise<IVoiceChatSession> {
this.stop();

let disableTimeout = false;
Expand All @@ -339,7 +339,7 @@ class VoiceChatSessions {

this.voiceChatGettingReadyKey.set(true);

const voiceChatSession = this.voiceChatService.createVoiceChatSession(cts.token, { usesAgents: controller.context !== 'inline', model: context?.widget?.viewModel?.model });
const voiceChatSession = await this.voiceChatService.createVoiceChatSession(cts.token, { usesAgents: controller.context !== 'inline', model: context?.widget?.viewModel?.model });

let inputValue = controller.getInput();

Expand Down Expand Up @@ -474,7 +474,7 @@ async function startVoiceChatWithHoldMode(id: string, accessor: ServicesAccessor
return;
}

const session = VoiceChatSessions.getInstance(instantiationService).start(controller, context);
const session = await VoiceChatSessions.getInstance(instantiationService).start(controller, context);

await holdMode;
handle.dispose();
Expand Down Expand Up @@ -545,7 +545,7 @@ export class HoldToVoiceChatInChatViewAction extends Action2 {
const handle = disposableTimeout(async () => {
const controller = await VoiceChatSessionControllerFactory.create(accessor, 'view');
if (controller) {
session = VoiceChatSessions.getInstance(instantiationService).start(controller, context);
session = await VoiceChatSessions.getInstance(instantiationService).start(controller, context);
session.setTimeoutDisabled(true);
}
}, VOICE_KEY_HOLD_THRESHOLD);
Expand Down Expand Up @@ -921,7 +921,7 @@ export class KeywordActivationContribution extends Disposable implements IWorkbe
}

private registerListeners(): void {
this._register(Event.runAndSubscribe(this.speechService.onDidRegisterSpeechProvider, () => {
this._register(Event.runAndSubscribe(this.speechService.onDidChangeHasSpeechProvider, () => {
this.updateConfiguration();
this.handleKeywordActivation();
}));
Expand Down
41 changes: 20 additions & 21 deletions src/vs/workbench/contrib/chat/test/common/voiceChat.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@ suite('VoiceChat', () => {
class TestSpeechService implements ISpeechService {
_serviceBrand: undefined;

onDidRegisterSpeechProvider = Event.None;
onDidUnregisterSpeechProvider = Event.None;
onDidChangeHasSpeechProvider = Event.None;

readonly hasSpeechProvider = true;
readonly hasActiveSpeechToTextSession = false;
Expand All @@ -74,7 +73,7 @@ suite('VoiceChat', () => {
onDidStartSpeechToTextSession = Event.None;
onDidEndSpeechToTextSession = Event.None;

createSpeechToTextSession(token: CancellationToken): ISpeechToTextSession {
async createSpeechToTextSession(token: CancellationToken): Promise<ISpeechToTextSession> {
return {
onDidChange: emitter.event
};
Expand All @@ -91,10 +90,10 @@ suite('VoiceChat', () => {
let service: VoiceChatService;
let event: IVoiceChatTextEvent | undefined;

function createSession(options: IVoiceChatSessionOptions) {
async function createSession(options: IVoiceChatSessionOptions) {
const cts = new CancellationTokenSource();
disposables.add(toDisposable(() => cts.dispose(true)));
const session = service.createVoiceChatSession(cts.token, options);
const session = await service.createVoiceChatSession(cts.token, options);
disposables.add(session.onDidChange(e => {
event = e;
}));
Expand All @@ -110,17 +109,17 @@ suite('VoiceChat', () => {
});

test('Agent and slash command detection (useAgents: false)', async () => {
testAgentsAndSlashCommandsDetection({ usesAgents: false, model: {} as IChatModel });
await testAgentsAndSlashCommandsDetection({ usesAgents: false, model: {} as IChatModel });
});

test('Agent and slash command detection (useAgents: true)', async () => {
testAgentsAndSlashCommandsDetection({ usesAgents: true, model: {} as IChatModel });
await testAgentsAndSlashCommandsDetection({ usesAgents: true, model: {} as IChatModel });
});

function testAgentsAndSlashCommandsDetection(options: IVoiceChatSessionOptions) {
async function testAgentsAndSlashCommandsDetection(options: IVoiceChatSessionOptions) {

// Nothing to detect
createSession(options);
await createSession(options);

emitter.fire({ status: SpeechToTextStatus.Started });
assert.strictEqual(event?.status, SpeechToTextStatus.Started);
Expand All @@ -141,7 +140,7 @@ suite('VoiceChat', () => {
assert.strictEqual(event?.waitingForInput, undefined);

// Agent
createSession(options);
await createSession(options);

emitter.fire({ status: SpeechToTextStatus.Recognizing, text: 'At' });
assert.strictEqual(event?.status, SpeechToTextStatus.Recognizing);
Expand All @@ -168,7 +167,7 @@ suite('VoiceChat', () => {
assert.strictEqual(event?.waitingForInput, false);

// Agent with punctuation
createSession(options);
await createSession(options);

emitter.fire({ status: SpeechToTextStatus.Recognizing, text: 'At workspace, help' });
assert.strictEqual(event?.status, SpeechToTextStatus.Recognizing);
Expand All @@ -180,7 +179,7 @@ suite('VoiceChat', () => {
assert.strictEqual(event?.text, options.usesAgents ? '@workspace help' : 'At workspace, help');
assert.strictEqual(event?.waitingForInput, false);

createSession(options);
await createSession(options);

emitter.fire({ status: SpeechToTextStatus.Recognizing, text: 'At Workspace. help' });
assert.strictEqual(event?.status, SpeechToTextStatus.Recognizing);
Expand All @@ -193,7 +192,7 @@ suite('VoiceChat', () => {
assert.strictEqual(event?.waitingForInput, false);

// Slash Command
createSession(options);
await createSession(options);

emitter.fire({ status: SpeechToTextStatus.Recognizing, text: 'Slash fix' });
assert.strictEqual(event?.status, SpeechToTextStatus.Recognizing);
Expand All @@ -206,7 +205,7 @@ suite('VoiceChat', () => {
assert.strictEqual(event?.waitingForInput, true);

// Agent + Slash Command
createSession(options);
await createSession(options);

emitter.fire({ status: SpeechToTextStatus.Recognizing, text: 'At code slash search help' });
assert.strictEqual(event?.status, SpeechToTextStatus.Recognizing);
Expand All @@ -219,7 +218,7 @@ suite('VoiceChat', () => {
assert.strictEqual(event?.waitingForInput, false);

// Agent + Slash Command with punctuation
createSession(options);
await createSession(options);

emitter.fire({ status: SpeechToTextStatus.Recognizing, text: 'At code, slash search, help' });
assert.strictEqual(event?.status, SpeechToTextStatus.Recognizing);
Expand All @@ -231,7 +230,7 @@ suite('VoiceChat', () => {
assert.strictEqual(event?.text, options.usesAgents ? '@vscode /search help' : 'At code, slash search, help');
assert.strictEqual(event?.waitingForInput, false);

createSession(options);
await createSession(options);

emitter.fire({ status: SpeechToTextStatus.Recognizing, text: 'At code. slash, search help' });
assert.strictEqual(event?.status, SpeechToTextStatus.Recognizing);
Expand All @@ -244,7 +243,7 @@ suite('VoiceChat', () => {
assert.strictEqual(event?.waitingForInput, false);

// Agent not detected twice
createSession(options);
await createSession(options);

emitter.fire({ status: SpeechToTextStatus.Recognizing, text: 'At workspace, for at workspace' });
assert.strictEqual(event?.status, SpeechToTextStatus.Recognizing);
Expand All @@ -258,7 +257,7 @@ suite('VoiceChat', () => {

// Slash command detected after agent recognized
if (options.usesAgents) {
createSession(options);
await createSession(options);

emitter.fire({ status: SpeechToTextStatus.Recognized, text: 'At workspace' });
assert.strictEqual(event?.status, SpeechToTextStatus.Recognized);
Expand All @@ -280,7 +279,7 @@ suite('VoiceChat', () => {
assert.strictEqual(event?.text, '/fix');
assert.strictEqual(event?.waitingForInput, true);

createSession(options);
await createSession(options);

emitter.fire({ status: SpeechToTextStatus.Recognized, text: 'At workspace' });
assert.strictEqual(event?.status, SpeechToTextStatus.Recognized);
Expand All @@ -297,7 +296,7 @@ suite('VoiceChat', () => {
test('waiting for input', async () => {

// Agent
createSession({ usesAgents: true, model: {} as IChatModel });
await createSession({ usesAgents: true, model: {} as IChatModel });

emitter.fire({ status: SpeechToTextStatus.Recognizing, text: 'At workspace' });
assert.strictEqual(event?.status, SpeechToTextStatus.Recognizing);
Expand All @@ -310,7 +309,7 @@ suite('VoiceChat', () => {
assert.strictEqual(event.waitingForInput, true);

// Slash Command
createSession({ usesAgents: true, model: {} as IChatModel });
await createSession({ usesAgents: true, model: {} as IChatModel });

emitter.fire({ status: SpeechToTextStatus.Recognizing, text: 'At workspace slash explain' });
assert.strictEqual(event?.status, SpeechToTextStatus.Recognizing);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ export class EditorDictation extends Disposable implements IEditorContribution {
super();
}

start() {
async start(): Promise<void> {
const disposables = new DisposableStore();
this.sessionDisposables.value = disposables;

Expand Down Expand Up @@ -249,7 +249,7 @@ export class EditorDictation extends Disposable implements IEditorContribution {
const cts = new CancellationTokenSource();
disposables.add(toDisposable(() => cts.dispose(true)));

const session = this.speechService.createSpeechToTextSession(cts.token);
const session = await this.speechService.createSpeechToTextSession(cts.token);
disposables.add(session.onDidChange(e => {
if (cts.token.isCancellationRequested) {
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ export class InlineChatQuickVoice implements IEditorContribution {
this._store.dispose();
}

start() {
async start() {

this._finishCallback?.(true);

Expand All @@ -236,7 +236,7 @@ export class InlineChatQuickVoice implements IEditorContribution {

let message: string | undefined;
let preview: string | undefined;
const session = this._voiceChatService.createVoiceChatSession(cts.token, { usesAgents: false });
const session = await this._voiceChatService.createVoiceChatSession(cts.token, { usesAgents: false });
const listener = session.onDidChange(e => {

if (cts.token.isCancellationRequested) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ import { InstantiationType, registerSingleton } from 'vs/platform/instantiation/
import { ISpeechService } from 'vs/workbench/contrib/speech/common/speechService';
import { SpeechService } from 'vs/workbench/contrib/speech/browser/speechService';

registerSingleton(ISpeechService, SpeechService, InstantiationType.Delayed);
registerSingleton(ISpeechService, SpeechService, InstantiationType.Eager /* Reads Extension Points */);