diff --git a/README.md b/README.md index f479122..708eb61 100644 --- a/README.md +++ b/README.md @@ -34,31 +34,67 @@ It demonstrates the following features: ## QuickStart -`npm install https://github.com/readium/speech#build` +At the moment, the new alpha version of the library is not published on npm, so you need to clone the repository and build it yourself. ``` -import { voicesSelection} from "readium-speech"; -console.log(voicesSelection); +git clone https://github.com/readium/speech.git -// or with cjs only : -const { getVoices } = require("readium-speech/cjs/voices.js"); -console.log(getVoices); +``` + +``` +cd speech +npm install +npm run build +``` + +You can then link the library to your project, for example using `npm link`. -// or with esm mjs : -import { getVoices } from "readium-speech/mjs/voices.js"; +``` +import { getVoices } from "readium-speech"; console.log(getVoices); -const voices = await voicesSelection.getVoices(); +const voices = await getVoices(); console.log(voices); ``` -## API +### Basic Usage + +Here's how to get started with the Readium Speech library: + +```typescript +import { WebSpeechReadAloudNavigator } from "readium-speech"; + +// Initialize the navigator with default WebSpeech engine +const navigator = new WebSpeechReadAloudNavigator(); + +// Load content to be read +navigator.loadContent([ + { text: "Hello, this is the first sentence.", language: "en-US" }, + { text: "And this is the second sentence.", language: "en-US" } +]); + +// Set up event listeners +navigator.on("start", () => console.log("Playback started")); +navigator.on("end", () => console.log("Playback finished")); + +// Start playback +navigator.play().catch(console.error); + +// Later, you can pause, resume, or stop +// navigator.pause(); +// navigator.stop(); + +// Clean up when done +// navigator.destroy(); +``` + +## Voices API ### Interface ``` -export interface IVoices { +export interface ReadiumSpeechVoices { label: string; voiceURI: string; name: string; @@ -79,24 +115,24 @@ export interface ILanguages { } ``` -#### Parse and Extract IVoices from speechSynthesis WebAPI +#### Parse and Extract ReadiumSpeechVoices from speechSynthesis WebAPI ``` -function getVoices(preferredLanguage?: string[] | string, localization?: string): Promise +function getVoices(preferredLanguage?: string[] | string, localization?: string): Promise ``` -#### List languages from IVoices +#### List languages from ReadiumSpeechVoices ``` -function getLanguages(voices: IVoices[], preferredLanguage?: string[] | string, localization?: string | undefined): ILanguages[] +function getLanguages(voices: ReadiumSpeechVoices[], preferredLanguage?: string[] | string, localization?: string | undefined): ILanguages[] ``` #### helpers ``` -function listLanguages(voices: IVoices[], localization?: string): ILanguages[] +function listLanguages(voices: ReadiumSpeechVoices[], localization?: string): ILanguages[] -function ListRegions(voices: IVoices[], localization?: string): ILanguages[] +function ListRegions(voices: ReadiumSpeechVoices[], localization?: string): ILanguages[] -function parseSpeechSynthesisVoices(speechSynthesisVoices: SpeechSynthesisVoice[]): IVoices[] +function parseSpeechSynthesisVoices(speechSynthesisVoices: SpeechSynthesisVoice[]): ReadiumSpeechVoices[] function getSpeechSynthesisVoices(): Promise ``` @@ -104,39 +140,118 @@ function getSpeechSynthesisVoices(): Promise #### groupBy ``` -function groupByKindOfVoices(allVoices: IVoices[]): TGroupVoices +function groupByKindOfVoices(allVoices: ReadiumSpeechVoices[]): TGroupVoices -function groupByRegions(voices: IVoices[], language: string, preferredRegions?: string[] | string, localization?: string): TGroupVoices +function groupByRegions(voices: ReadiumSpeechVoices[], language: string, preferredRegions?: string[] | string, localization?: string): TGroupVoices -function groupByLanguage(voices: IVoices[], preferredLanguage?: string[] | string, localization?: string): TGroupVoices +function groupByLanguage(voices: ReadiumSpeechVoices[], preferredLanguage?: string[] | string, localization?: string): TGroupVoices ``` #### sortBy ``` -function sortByLanguage(voices: IVoices[], preferredLanguage?: string[] | string): IVoices[] +function sortByLanguage(voices: ReadiumSpeechVoices[], preferredLanguage?: string[] | string): ReadiumSpeechVoices[] -function sortByRegion(voices: IVoices[], preferredRegions?: string[] | string, localization?: string | undefined): IVoices[] +function sortByRegion(voices: ReadiumSpeechVoices[], preferredRegions?: string[] | string, localization?: string | undefined): ReadiumSpeechVoices[] -function sortByGender(voices: IVoices[], genderFirst: TGender): IVoices[] +function sortByGender(voices: ReadiumSpeechVoices[], genderFirst: TGender): ReadiumSpeechVoices[] -function sortByName(voices: IVoices[]): IVoices[] +function sortByName(voices: ReadiumSpeechVoices[]): ReadiumSpeechVoices[] -function sortByQuality(voices: IVoices[]): IVoices[] +function sortByQuality(voices: ReadiumSpeechVoices[]): ReadiumSpeechVoices[] ``` #### filterOn ``` -function filterOnRecommended(voices: IVoices[], _recommended?: IRecommended[]): TReturnFilterOnRecommended +function filterOnRecommended(voices: ReadiumSpeechVoices[], _recommended?: IRecommended[]): TReturnFilterOnRecommended + +function filterOnVeryLowQuality(voices: ReadiumSpeechVoices[]): ReadiumSpeechVoices[] -function filterOnVeryLowQuality(voices: IVoices[]): IVoices[] +function filterOnNovelty(voices: ReadiumSpeechVoices[]): ReadiumSpeechVoices[] -function filterOnNovelty(voices: IVoices[]): IVoices[] +function filterOnQuality(voices: ReadiumSpeechVoices[], quality: TQuality | TQuality[]): ReadiumSpeechVoices[] -function filterOnQuality(voices: IVoices[], quality: TQuality | TQuality[]): IVoices[] +function filterOnLanguage(voices: ReadiumSpeechVoices[], language: string | string[]): ReadiumSpeechVoices[] -function filterOnLanguage(voices: IVoices[], language: string | string[]): IVoices[] +function filterOnGender(voices: ReadiumSpeechVoices[], gender: TGender): ReadiumSpeechVoices[] +``` + +## Playback API + +### ReadiumSpeechNavigator + +```typescript +interface ReadiumSpeechNavigator { + // Voice Management + getVoices(): Promise; + setVoice(voice: ReadiumSpeechVoice | string): Promise; + getCurrentVoice(): ReadiumSpeechVoice | null; + + // Content Management + loadContent(content: ReadiumSpeechUtterance | ReadiumSpeechUtterance[]): void; + getCurrentContent(): ReadiumSpeechUtterance | null; + getContentQueue(): ReadiumSpeechUtterance[]; + + // Playback Control + play(): Promise; + pause(): void; + stop(): void; + + // Navigation + next(): Promise; + previous(): Promise; + jumpTo(utteranceIndex: number): void; + + // Playback Parameters + setRate(rate: number): void; + getRate(): number; + setPitch(pitch: number): void; + getPitch(): number; + setVolume(volume: number): void; + getVolume(): number; + + // State + getState(): ReadiumSpeechPlaybackState; + getCurrentUtteranceIndex(): number; + + // Events + on( + event: ReadiumSpeechPlaybackEvent["type"], + listener: (event: ReadiumSpeechPlaybackEvent) => void + ): void; + + // Cleanup + destroy(): void; +} +``` -function filterOnGender(voices: IVoices[], gender: TGender): IVoices[] +### Events + +#### ReadiumSpeechPlaybackEvent + +```typescript +type ReadiumSpeechPlaybackEvent = { + type: + | "start" // Playback started + | "pause" // Playback paused + | "resume" // Playback resumed + | "end" // Playback ended naturally + | "stop" // Playback stopped manually + | "error" // An error occurred + | "boundary" // Reached a word/sentence boundary + | "mark" // Reached a named mark in SSML + | "idle" // No content loaded + | "loading" // Loading content + | "ready" // Ready to play + | "voiceschanged" // Available voices changed + | "positionchanged"; // Playback position changed + detail?: any; // Event-specific data +}; ``` + +#### ReadiumSpeechPlaybackState + +```typescript +type ReadiumSpeechPlaybackState = "playing" | "paused" | "idle" | "loading" | "ready"; +``` \ No newline at end of file diff --git a/demo/navigator/navigator-demo-script.js b/demo/navigator/navigator-demo-script.js index 7a95ac0..3bf0121 100644 --- a/demo/navigator/navigator-demo-script.js +++ b/demo/navigator/navigator-demo-script.js @@ -157,6 +157,17 @@ navigator.on("error", (event) => { viewRender(); }); +navigator.on("positionchanged", (event) => { + // Update the UI when the position changes programmatically + const newPosition = (event.detail?.position ?? 0) + 1; // Convert to 1-based index for display + lastNavigatorPosition = newPosition; + const input = document.getElementById("utterance-index"); + if (input && input !== document.activeElement) { + input.value = newPosition; + } + viewRender(); +}); + navigator.on("boundary", (event) => { // Handle word boundaries for highlighting if (event.detail.name === "word") { diff --git a/package.json b/package.json index 0343db0..be7c562 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "readium-speech", - "version": "2.0.0-alpha.1", + "version": "2.0.0-alpha.2", "description": "Readium Speech is a TypeScript library for implementing a read aloud feature with Web technologies. It follows [best practices](https://github.com/HadrienGardeur/read-aloud-best-practices) gathered through interviews with members of the digital publishing industry.", "main": "build/index.js", "module": "build/index.js", diff --git a/script/fixup.sh b/script/fixup.sh deleted file mode 100644 index de03d71..0000000 --- a/script/fixup.sh +++ /dev/null @@ -1,12 +0,0 @@ -cat >build/cjs/package.json <build/mjs/package.json < { + this.setNavigatorState("idle"); + this.emitEvent({ type: "stop" }); + }); + this.engine.on("error", (event) => { this.setNavigatorState("idle"); - // Only emit error for genuine errors, not interruptions during navigation - if (event.detail.error !== "interrupted" && event.detail.error !== "canceled") { - this.emitEvent(event); - } + this.emitEvent(event); }); this.engine.on("ready", () => { @@ -82,6 +84,10 @@ export class WebSpeechReadAloudNavigator implements ReadiumSpeechNavigator { this.engine.on("mark", (event) => { this.emitEvent(event); }); + + this.engine.on("positionchanged", (event) => { + this.emitEvent(event); + }); this.engine.on("voiceschanged", () => { this.emitEvent({ type: "voiceschanged" }); @@ -160,38 +166,42 @@ export class WebSpeechReadAloudNavigator implements ReadiumSpeechNavigator { this.emitEvent({ type: "stop" }); // Then emit event for UI update } - async togglePlayPause(): Promise { - if (this.navigatorState === "playing") { - this.pause(); - } else { - await this.play(); - } - } - // Navigation - Navigator coordinates with proper state management - async next(): Promise { + async next(forcePlay: boolean = false): Promise { const currentIndex = this.getCurrentUtteranceIndex(); const totalCount = this.engine.getUtteranceCount(); - if (currentIndex < totalCount - 1) { - this.engine.speak(currentIndex + 1); - return true; + if (currentIndex >= totalCount - 1) return false; + + if (this.navigatorState === "paused" && !forcePlay) { + this.engine.setCurrentUtteranceIndex(currentIndex + 1); + } else { + this.setNavigatorState("playing"); + await this.engine.speak(currentIndex + 1); } - return false; + return true; } - async previous(): Promise { + async previous(forcePlay: boolean = false): Promise { const currentIndex = this.getCurrentUtteranceIndex(); + if (currentIndex <= 0) return false; - if (currentIndex > 0) { - this.engine.speak(currentIndex - 1); - return true; + if (this.navigatorState === "paused" && !forcePlay) { + this.engine.setCurrentUtteranceIndex(currentIndex - 1); + } else { + this.setNavigatorState("playing"); + await this.engine.speak(currentIndex - 1); } - return false; + return true; } - jumpTo(utteranceIndex: number): void { - if (utteranceIndex >= 0 && utteranceIndex < this.contentQueue.length) { + jumpTo(utteranceIndex: number, forcePlay: boolean = false): void { + if (utteranceIndex < 0 || utteranceIndex >= this.contentQueue.length) return; + + if (this.navigatorState === "paused" && !forcePlay) { + this.engine.setCurrentUtteranceIndex(utteranceIndex); + } else { + this.setNavigatorState("playing"); this.engine.speak(utteranceIndex); } } @@ -201,14 +211,26 @@ export class WebSpeechReadAloudNavigator implements ReadiumSpeechNavigator { this.engine.setRate(rate); } + getRate(): number { + return this.engine.getRate(); + } + setPitch(pitch: number): void { this.engine.setPitch(pitch); } + getPitch(): number { + return this.engine.getPitch(); + } + setVolume(volume: number): void { this.engine.setVolume(volume); } + getVolume(): number { + return this.engine.getVolume(); + } + // State - Navigator is the single source of truth getState(): ReadiumSpeechPlaybackState { return this.navigatorState; diff --git a/src/WebSpeech/webSpeechEngine.ts b/src/WebSpeech/webSpeechEngine.ts index 1e28ac7..e4fd56d 100644 --- a/src/WebSpeech/webSpeechEngine.ts +++ b/src/WebSpeech/webSpeechEngine.ts @@ -24,8 +24,10 @@ export class WebSpeechEngine implements ReadiumSpeechPlaybackEngine { // Enhanced properties for cross-browser compatibility private resumeInfinityTimer?: number; - private isPausedInternal: boolean = false; private isSpeakingInternal: boolean = false; + private isPausedInternal: boolean = false; + private isAndroidPaused: boolean = false; // Explicitly tracks Android's paused state + private pausedAtUtteranceIndex: number | null = null; // Tracks which utterance was playing when paused private initialized: boolean = false; private maxLengthExceeded: "error" | "none" | "warn" = "warn"; private utterancesBeingCancelled: boolean = false; // Flag to track if utterances are being cancelled @@ -288,6 +290,11 @@ export class WebSpeechEngine implements ReadiumSpeechPlaybackEngine { this.setState("playing"); this.emitEvent({ type: "start" }); + // Clear Android paused state when new utterance actually starts + if (this.patches.isAndroid && this.isAndroidPaused) { + this.isAndroidPaused = false; + } + const shouldUseResumeInfinity = this.shouldUseResumeInfinity(); if (shouldUseResumeInfinity) { this.startResumeInfinity(utterance); @@ -320,9 +327,16 @@ export class WebSpeechEngine implements ReadiumSpeechPlaybackEngine { }; utterance.onerror = (event) => { + // Skip error handling for Android pause operations + if (event.error === "interrupted" && this.patches.isAndroid && this.isAndroidPaused) { + return; + } + + // Common cleanup this.isSpeakingInternal = false; this.isPausedInternal = false; this.stopResumeInfinity(); + this.setState("idle"); // Fatal errors that break playback completely - reset to beginning const fatalErrors = ["synthesis-unavailable", "audio-hardware", "voice-unavailable"]; @@ -331,14 +345,19 @@ export class WebSpeechEngine implements ReadiumSpeechPlaybackEngine { this.currentUtteranceIndex = 0; } - this.setState("idle"); - this.emitEvent({ - type: "error", - detail: { - error: event.error, // Preserve original error type - message: `Speech synthesis error: ${event.error}` - } - }); + // Handle interrupted/canceled as stop events + if (event.error === "interrupted" || event.error === "canceled") { + this.emitEvent({ type: "stop" }); + } else { + // All other errors + this.emitEvent({ + type: "error", + detail: { + error: event.error, // Preserve original error type + message: `Speech synthesis error: ${event.error}` + } + }); + } }; utterance.onpause = () => { @@ -416,40 +435,58 @@ export class WebSpeechEngine implements ReadiumSpeechPlaybackEngine { pause(): void { if (this.playbackState === "playing") { - // Android-specific handling: pause causes speech to end but not fire end-event - // so we simply do it manually instead of pausing + // Store the current index when pausing + this.pausedAtUtteranceIndex = this.currentUtteranceIndex; + if (this.patches.isAndroid) { + this.isAndroidPaused = true; this.speechSynthesis.cancel(); - return; + } else { + this.speechSynthesis.pause(); } - - this.speechSynthesis.pause(); - // in some cases, pause does not update the internal state, - // so we need to update it manually using an own state + + // Common state updates this.isPausedInternal = true; this.isSpeakingInternal = false; this.setState("paused"); - // Emit pause event since speechSynthesis.pause() may not trigger utterance.onpause this.emitEvent({ type: "pause" }); } } resume(): void { - if (this.playbackState === "paused") { - this.speechSynthesis.resume(); - // in some cases, resume does not update the internal state, - // so we need to update it manually using an own state + if (this.playbackState === "paused" && (this.currentUtteranceIndex < this.currentUtterances.length)) { + // Common state updates this.isPausedInternal = false; this.isSpeakingInternal = true; this.setState("playing"); - // Emit resume event since speechSynthesis.resume() may not trigger utterance.onresume this.emitEvent({ type: "resume" }); + + // Check if we need to restart or can resume + const shouldRestart = this.patches.isAndroid || + this.pausedAtUtteranceIndex !== this.currentUtteranceIndex; + + if (shouldRestart) { + // If index changed or on Android, start fresh from the new index + this.speak(this.currentUtteranceIndex); + } else { + // Otherwise, resume from where we left off + this.speechSynthesis.resume(); + } + + // Reset the paused index + this.pausedAtUtteranceIndex = null; } } stop(): void { this.speechSynthesis.cancel(); this.currentUtteranceIndex = 0; // Reset to beginning when stopped + + // Reset Android paused state when stopping + if (this.patches.isAndroid) { + this.isAndroidPaused = false; + } + this.setState("idle"); this.emitEvent({ type: "stop" }); // Emit immediately } @@ -459,14 +496,26 @@ export class WebSpeechEngine implements ReadiumSpeechPlaybackEngine { this.rate = Math.max(0.1, Math.min(10, rate)); } + getRate(): number { + return this.rate; + } + setPitch(pitch: number): void { this.pitch = Math.max(0, Math.min(2, pitch)); } + getPitch(): number { + return this.pitch; + } + setVolume(volume: number): void { this.volume = Math.max(0, Math.min(1, volume)); } + getVolume(): number { + return this.volume; + } + // State getState(): ReadiumSpeechPlaybackState { return this.playbackState; @@ -476,6 +525,32 @@ export class WebSpeechEngine implements ReadiumSpeechPlaybackEngine { return this.currentUtteranceIndex; } + setCurrentUtteranceIndex(index: number): void { + // Validate the new index + if (index < 0 || index >= this.currentUtterances.length) { + throw new Error("Invalid utterance index"); + } + + // If the index isn't changing, do nothing + if (index === this.currentUtteranceIndex) { + return; + } + + // First, handle any ongoing speech + if (!this.isPausedInternal && this.isSpeakingInternal) { + this.cancelCurrentSpeech(); + } + + // Only after ensuring any ongoing speech is cancelled, update the index + this.currentUtteranceIndex = index; + + // Only after all state changes and side effects, emit the event + this.emitEvent({ + type: "positionchanged", + detail: { position: index } + }); + } + getUtteranceCount(): number { return this.currentUtterances.length; } diff --git a/src/engine.ts b/src/engine.ts index dd21df1..b079459 100644 --- a/src/engine.ts +++ b/src/engine.ts @@ -19,12 +19,16 @@ export interface ReadiumSpeechPlaybackEngine { // Playback Parameters setRate(rate: number): void; + getRate(): number; setPitch(pitch: number): void; + getPitch(): number; setVolume(volume: number): void; + getVolume(): number; // State getState(): ReadiumSpeechPlaybackState; getCurrentUtteranceIndex(): number; + setCurrentUtteranceIndex(index: number): void; getUtteranceCount(): number; // Events diff --git a/src/navigator.ts b/src/navigator.ts index 7398f6a..7b152b6 100644 --- a/src/navigator.ts +++ b/src/navigator.ts @@ -5,18 +5,19 @@ export type ReadiumSpeechPlaybackState = "playing" | "paused" | "idle" | "loadin export interface ReadiumSpeechPlaybackEvent { type: - | "start" // Playback started - | "pause" // Playback paused - | "resume" // Playback resumed - | "end" // Playback ended naturally - | "stop" // Playback stopped manually - | "error" // An error occurred - | "boundary" // Reached a word/sentence boundary - | "mark" // Reached a named mark in SSML - | "idle" // No content loaded - | "loading" // Loading content - | "ready" // Ready to play - | "voiceschanged"; // Available voices changed + | "start" // Playback started + | "pause" // Playback paused + | "resume" // Playback resumed + | "end" // Playback ended naturally + | "stop" // Playback stopped manually + | "error" // An error occurred + | "boundary" // Reached a word/sentence boundary + | "mark" // Reached a named mark in SSML + | "idle" // No content loaded + | "loading" // Loading content + | "ready" // Ready to play + | "voiceschanged" // Available voices changed + | "positionchanged"; // Playback position changed detail?: any; // Event-specific data } @@ -37,7 +38,6 @@ export interface ReadiumSpeechNavigator { play(): Promise; pause(): void; stop(): void; - togglePlayPause(): Promise; // Navigation next(): Promise; @@ -46,8 +46,11 @@ export interface ReadiumSpeechNavigator { // Playback Parameters setRate(rate: number): void; + getRate(): number; setPitch(pitch: number): void; + getPitch(): number; setVolume(volume: number): void; + getVolume(): number; // State getState(): ReadiumSpeechPlaybackState; diff --git a/tsconfig-types.json b/tsconfig-types.json deleted file mode 100644 index 1c96a26..0000000 --- a/tsconfig-types.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "extends": "./tsconfig-base.json", - "compilerOptions": { - "declaration": true, - "moduleResolution": "bundler", - "module": "ES2022", - "outDir": "build/types", - "emitDeclarationOnly": true, - "target": "esnext" - } -}