Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ Breaking changes in this release:
- Added core mute/unmute functionality for speech-to-speech via `useRecorder` hook (silent chunks keep server connection alive), in PR [#5688](https://github.com/microsoft/BotFramework-WebChat/pull/5688), by [@pranavjoshi](https://github.com/pranavjoshi001)
- 🧪 Added incremental streaming Markdown renderer for livestreaming, in PR [#5799](https://github.com/microsoft/BotFramework-WebChat/pull/5799), by [@OEvgeny](https://github.com/OEvgeny)
- Fixed streaming Markdown renderer to preserve link reference definitions during incremental rendering and recover on error, in PR [#5808](https://github.com/microsoft/BotFramework-WebChat/pull/5808), by [@OEvgeny](https://github.com/OEvgeny)
- Added multi-modal text + voice experience, in PR [#5817](https://github.com/microsoft/BotFramework-WebChat/pull/5817), by [@pranavjoshi001](https://github.com/pranavjoshi001)

### Changed

Expand Down
3 changes: 2 additions & 1 deletion __tests__/html2/speechToSpeech/barge.in.html
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@

const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Multi-modal experience: server announces audio, consumer opted into voice mode.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

render(
<FluentThemeProvider variant="fluent">
Expand Down
59 changes: 51 additions & 8 deletions __tests__/html2/speechToSpeech/basic.sendbox.with.mic.html
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
</head>
<body>
<main id="webchat"></main>
<script type="module">
import { setupMockMediaDevices } from '/assets/esm/speechToSpeech/mockMediaDevices.js';
import { setupMockAudioPlayback } from '/assets/esm/speechToSpeech/mockAudioPlayback.js';

setupMockMediaDevices();
setupMockAudioPlayback();
</script>
<script type="text/babel">
run(async function () {
const {
Expand All @@ -23,8 +30,9 @@
// GIVEN: Web Chat with Fluent Theme and microphone button enabled
const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Multi-modal experience: server announces audio, consumer opted into voice mode.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

render(
<FluentThemeProvider variant="fluent">
Expand All @@ -50,17 +58,52 @@
const keypadButton = document.querySelector(`[data-testid="${testIds.sendBoxTelephoneKeypadToolbarButton}"]`);
expect(keypadButton).toBeTruthy();

// THEN: Text counter should NOT be present
const textCounter = document.querySelector('.sendbox__text-counter');
expect(textCounter).toBeFalsy();

// THEN: Send button should NOT be present
// THEN: Multi-modal design: send button coexists with mic. While idle it is enabled
// so the user can also send text without leaving voice mode.
const sendButton = document.querySelector(`[data-testid="${testIds.sendBoxSendButton}"]`);
expect(sendButton).toBeFalsy();
const textArea = document.querySelector(`[data-testid="${testIds.sendBoxTextBox}"]`);
const isSendDisabled = () => sendButton.getAttribute('aria-disabled') === 'true';
expect(sendButton).toBeTruthy();
expect(isSendDisabled()).toBe(false);
expect(textArea.hasAttribute('readonly')).toBe(false);

// THEN: Should show sendbox with microphone and keypad buttons
// THEN: Should show sendbox with microphone, keypad and send buttons
await host.snapshot('local');

// WHEN: User starts recording
await host.click(micButton);

// First wait for the voice toggle to actually flip on so we know recording started.
await pageConditions.became(
'Recording started',
() => micButton.getAttribute('aria-label')?.includes('Microphone on'),
2000
);

// THEN: Send button is disabled and text input becomes read-only — voice and text
// are mutually exclusive while the mic is open.
await pageConditions.became(
'Send button disabled while recording',
() => isSendDisabled() && textArea.hasAttribute('readonly'),
2000
);

// WHEN: User stops recording
await host.click(micButton);

await pageConditions.became(
'Recording stopped',
() => micButton.getAttribute('aria-label')?.includes('Microphone off'),
2000
);

// THEN: Send button and text input are re-enabled — back to free text entry.
await pageConditions.became(
'Send button re-enabled after stopping recording',
() => !isSendDisabled() && !textArea.hasAttribute('readonly'),
2000
);

// WHEN: Voice configuration is removed from directLine
directLine.setCapability('getVoiceConfiguration', undefined);

Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion __tests__/html2/speechToSpeech/csp.recording.html
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@
// GIVEN: Web Chat with Speech-to-Speech enabled and CSP headers
const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Multi-modal experience: server announces audio, consumer opted into voice mode.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

render(
<FluentThemeProvider variant="fluent">
Expand Down
3 changes: 2 additions & 1 deletion __tests__/html2/speechToSpeech/dtmf.input.html
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@

const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Multi-modal experience: server announces audio, consumer opted into voice mode.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

// Intercept postActivity to capture outgoing DTMF events
const capturedDtmfEvents = [];
Expand Down
Binary file modified __tests__/html2/speechToSpeech/dtmf.input.html.snap-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified __tests__/html2/speechToSpeech/dtmf.input.html.snap-2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
17 changes: 10 additions & 7 deletions __tests__/html2/speechToSpeech/happy.path.html
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@
// GIVEN: Web Chat with Speech-to-Speech enabled
const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Server announces audio modality (mic shows up) and the consumer opted into the
// multi-modal experience: outgoing activities go over WebSocket without echo back.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

render(
<FluentThemeProvider variant="fluent">
Expand Down Expand Up @@ -137,20 +139,21 @@
expect(activities[0]).toHaveProperty('textContent', 'What is the weather today?');
expect(activities[1]).toHaveProperty('textContent', 'The weather today is sunny with a high of 75 degrees.');

// THEN: Verify activity status for voice transcripts
// THEN: Verify activity status for voice transcripts.
// New design: every transcript renders just `Just now | <icon>` — no role label,
// bot uses the audio-playing icon, user uses the microphone icon.
const activityStatuses = pageElements.activityStatuses();
expect(activityStatuses.length).toBe(2);

// THEN: User transcript should have timestamp but NO "Agent" label
const userActivityStatus = activityStatuses[0];
expect(userActivityStatus.innerText).not.toContain('Agent');
expect(userActivityStatus.innerText).toContain('Just now');
expect(userActivityStatus.innerText).toContain('|');
expect(userActivityStatus.querySelector('[class*="icon--microphone"]')).toBeTruthy();

// THEN: Bot transcript should have "Agent" label AND timestamp
const botActivityStatus = activityStatuses[1];
expect(botActivityStatus.innerText).toContain('Agent');
expect(botActivityStatus.innerText).toContain('|');
expect(botActivityStatus.innerText).toContain('Just now');
expect(botActivityStatus.innerText).toContain('|');
expect(botActivityStatus.querySelector('[class*="icon--audio-playing"]')).toBeTruthy();

// WHEN: User stops recording by clicking microphone button again
await host.click(micButton);
Expand Down
Binary file modified __tests__/html2/speechToSpeech/happy.path.html.snap-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified __tests__/html2/speechToSpeech/happy.path.html.snap-2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
197 changes: 197 additions & 0 deletions __tests__/html2/speechToSpeech/multimodal.text.with.voice.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
<!doctype html>
<html lang="en-US">
<head>
<link href="/assets/index.css" rel="stylesheet" type="text/css" />
<script crossorigin="anonymous" src="https://unpkg.com/@babel/standalone@7.8.7/babel.min.js"></script>
<script crossorigin="anonymous" src="https://unpkg.com/react@16.8.6/umd/react.production.min.js"></script>
<script crossorigin="anonymous" src="https://unpkg.com/react-dom@16.8.6/umd/react-dom.production.min.js"></script>
<script crossorigin="anonymous" src="/test-harness.js"></script>
<script crossorigin="anonymous" src="/test-page-object.js"></script>
<script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
<script crossorigin="anonymous" src="/__dist__/botframework-webchat-fluent-theme.production.min.js"></script>
</head>
<body>
<main id="webchat"></main>
<!--
Test: Multi-modal experience — text and voice coexist in the same send box.

Verifies the realistic interleaving:
1. Server announces audio capability + consumer opts into voice mode (`enableVoiceMode`).
2. Text turn: user types → bot replies as text. Both ride the WebSocket fire-and-forget,
saga renders user message optimistically, bot text arrives as a normal incoming activity.
3. Voice turn: user clicks mic → user speaks → bot replies via media.end transcript.
While recording, the text input is read-only and the send button is disabled.
4. Mic toggled off → text turn again (user types → bot replies as text).
5. Snapshot captures the full mixed transcript.
-->
<script type="module">
import { setupMockMediaDevices } from '/assets/esm/speechToSpeech/mockMediaDevices.js';
import { setupMockAudioPlayback } from '/assets/esm/speechToSpeech/mockAudioPlayback.js';

setupMockMediaDevices();
setupMockAudioPlayback();
</script>
<script type="text/babel">
run(async function () {
const {
React,
ReactDOM: { render },
WebChat: { FluentThemeProvider, ReactWebChat, testIds }
} = window;

const { directLine, store } = testHelpers.createDirectLineEmulator();

// Mirror real DirectLine when `enableVoiceMode` is true: server announces audio,
// and outgoing traffic flows over the WebSocket without echo back.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

// Capture outgoing activities to assert WebSocket-style fire-and-forget delivery.
const outgoingActivities = [];
const originalPostActivity = directLine.postActivity.bind(directLine);
directLine.postActivity = activity => {
outgoingActivities.push(activity);
return originalPostActivity(activity);
};

render(
<FluentThemeProvider variant="fluent">
<ReactWebChat directLine={directLine} store={store} />
</FluentThemeProvider>,
document.getElementById('webchat')
);

await pageConditions.uiConnected();

const micButton = document.querySelector(`[data-testid="${testIds.sendBoxMicrophoneButton}"]`);
const sendButton = document.querySelector(`[data-testid="${testIds.sendBoxSendButton}"]`);
const textArea = document.querySelector(`[data-testid="${testIds.sendBoxTextBox}"]`);
const isSendDisabled = () => sendButton.getAttribute('aria-disabled') === 'true';

// GIVEN: Multi-modal idle — mic, send button and a writable text box all coexist.
expect(micButton).toBeTruthy();
expect(sendButton).toBeTruthy();
expect(isSendDisabled()).toBe(false);
expect(textArea.hasAttribute('readonly')).toBe(false);

// ===== TURN 1: Text in → Text out =====
await pageObjects.sendMessageViaSendBox('What is the weather today?', { waitForSend: false });

await pageConditions.became(
'Outgoing text activity captured',
() => outgoingActivities.some(a => a.type === 'message' && a.text === 'What is the weather today?'),
1000
);

await pageConditions.numActivitiesShown(1);

await directLine.emulateIncomingActivity('The weather today is sunny with a high of 75 degrees.');

await pageConditions.numActivitiesShown(2);

// ===== TURN 2: Voice in → Voice out =====
await host.click(micButton);

await pageConditions.became(
'Recording started',
() => micButton.getAttribute('aria-label')?.includes('Microphone on'),
2000
);

// While recording, text path is locked down.
await pageConditions.became(
'Recording active disables text path',
() => isSendDisabled() && textArea.hasAttribute('readonly'),
2000
);

// User speech is identified, processed, then transcript arrives.
await directLine.emulateIncomingVoiceActivity({
type: 'event',
name: 'request.update',
from: { role: 'bot' },
value: { state: 'detected', message: 'Your request is identified' },
valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
});

await directLine.emulateIncomingVoiceActivity({
type: 'event',
name: 'request.update',
from: { role: 'bot' },
value: { state: 'processing', message: 'Your request is being processed' },
valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
});

await directLine.emulateIncomingVoiceActivity({
type: 'event',
name: 'media.end',
value: { transcription: 'Will it rain tomorrow?', origin: 'user' },
valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
});

await pageConditions.numActivitiesShown(3);

// Bot replies as voice (audio chunk + transcript).
await directLine.emulateIncomingVoiceActivity({
type: 'event',
name: 'media.chunk',
from: { role: 'bot' },
value: { content: 'AAAAAA==', contentType: 'audio/webm' },
valueType: 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
});

await directLine.emulateIncomingVoiceActivity({
type: 'event',
name: 'media.end',
from: { role: 'bot' },
value: { transcription: 'No rain expected tomorrow.', origin: 'agent' },
valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
});

await pageConditions.numActivitiesShown(4);

// Toggle mic off — back to idle text mode.
await host.click(micButton);

await pageConditions.became(
'Recording stopped',
() => micButton.getAttribute('aria-label')?.includes('Microphone off'),
2000
);

await pageConditions.became(
'Idle re-enables text path',
() => !isSendDisabled() && !textArea.hasAttribute('readonly'),
2000
);

// ===== TURN 3: Text in → Text out =====
await pageObjects.sendMessageViaSendBox('Thanks!', { waitForSend: false });

await pageConditions.became(
'Second outgoing text captured',
() => outgoingActivities.some(a => a.type === 'message' && a.text === 'Thanks!'),
1000
);

await pageConditions.numActivitiesShown(5);

await directLine.emulateIncomingActivity("You're welcome!");

await pageConditions.numActivitiesShown(6);

// ===== Verify final transcript order =====
const activities = pageElements.activityContents();
expect(activities[0]).toHaveProperty('textContent', 'What is the weather today?');
expect(activities[1]).toHaveProperty('textContent', 'The weather today is sunny with a high of 75 degrees.');
expect(activities[2]).toHaveProperty('textContent', 'Will it rain tomorrow?');
expect(activities[3]).toHaveProperty('textContent', 'No rain expected tomorrow.');
expect(activities[4]).toHaveProperty('textContent', 'Thanks!');
expect(activities[5]).toHaveProperty('textContent', "You're welcome!");

await pageConditions.scrollToBottomCompleted();
await host.snapshot('local');
});
</script>
</body>
</html>
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 2 additions & 1 deletion __tests__/html2/speechToSpeech/multiple.turns.html
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@

const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Multi-modal experience: server announces audio, consumer opted into voice mode.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

render(
<FluentThemeProvider variant="fluent">
Expand Down
Binary file modified __tests__/html2/speechToSpeech/multiple.turns.html.snap-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions __tests__/html2/speechToSpeech/mute.unmute.html
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
// Setup Web Chat with Speech-to-Speech
const { directLine, store } = testHelpers.createDirectLineEmulator();
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

// Track voiceState and microphoneMuted changes
store.subscribe(() => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@

const { directLine, store } = testHelpers.createDirectLineEmulator();

// Set voice configuration capability to enable microphone button
// Multi-modal experience: server announces audio, consumer opted into voice mode.
directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
directLine.setCapability('getIsVoiceModeEnabled', true, { emitEvent: false });

// Intercept postActivity to capture outgoing voice chunks
const capturedChunks = [];
Expand Down
Loading
Loading