In [12]:
import requests
import pyaudio
import time
import numpy as np
from collections import deque
import base64
import json

def play_tts_audio(server_url, prompt, sample_rate=24000):
    """
    Fetch and play TTS audio from the server in a notebook environment.
    Uses buffering for smoother playback while still collecting all chunks.
    
    Parameters:
    -----------
    server_url : str
        Base URL of the TTS server
    prompt : str
        Text to synthesize
    sample_rate : int
        Sample rate of the audio (should match server's sample rate)
        
    Returns:
    --------
    list
        List of all audio chunks for later use (e.g., saving to a WAV file)
    """
    # Setup PyAudio
    p = pyaudio.PyAudio()
    stream = p.open(
        format=pyaudio.paInt16,
        channels=1,
        rate=sample_rate,
        output=True,
        frames_per_buffer=1024*2
    )
    
    # Store all audio chunks for potential later use
    all_audio_chunks = []
    
    # Create a buffer for smoother playback
    buffer_size = 42 # Increase buffer size for smoother playback
    audio_buffer = bytearray()
    buffer_filled = False
    
    # Construct the URL with the prompt
    url = f"{server_url}/tts-stream"
    #print(f"Streaming audio for: '{prompt}'")
    
    try:
        # Send POST request
        response = requests.post(
            url,
            json={"text": prompt},
            headers={"Content-Type": "application/json"},
            stream=True,
            timeout=30
        )
        
        if response.status_code != 200:
            print(f"Error: Server returned status code {response.status_code}")
            return []
        
        print("Buffering audio...")
        
        # Process streaming response with buffering
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                # Ensure chunk is PCM int16
                chunk = np.frombuffer(chunk, dtype=np.int16).tobytes()
                
                
                # Always collect all chunks regardless of buffering
                all_audio_chunks.append(chunk)
                
                
                audio_buffer.extend(chunk)
                if len(audio_buffer) > 24000*3:
                    print("Playing audio...")
                    stream.write(bytes(audio_buffer))
                    audio_buffer = bytearray()
            
        
        # Ensure the last bit of audio plays completely
        time.sleep(0.5)
        
    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
    except Exception as e:
        print(f"Error during playback: {e}")
    finally:
        # Clean up
        stream.stop_stream()
        stream.close()
        p.terminate()
        print("Finished playing audio")
    

In [19]:
story = """Once upon a time in the small coastal town of Bayview, there lived an elderly lighthouse keeper named Samuel. For forty-five years, Samuel had faithfully maintained the lighthouse that stood on the rocky promontory overlooking the treacherous waters of the Atlantic. His weathered face, lined with deep creases from decades of squinting against the salt spray and relentless sun, told the story of his dedication.

Samuel's routine rarely varied. Each morning at precisely 5:30 AM, he would climb the one hundred and eight spiraling steps to the lantern room at the top of the lighthouse. There, he would meticulously clean the glass panels of the Fresnel lens, check the light mechanism, and ensure that the fog horn was in working order. This ritual had become as natural to him as breathing.

The townspeople of Bayview respected Samuel, but few truly knew him. He was a man of few words, preferring the company of seagulls and the rhythmic crashing of waves against the rocks below. His only regular visitor was a twelve-year-old girl named Eliza, the granddaughter of the local bookshop owner.

Eliza had first encountered Samuel on a school field trip to the lighthouse three years prior. While her classmates had been disinterested in the old man's explanations about tide patterns and navigation techniques, Eliza had been captivated. From that day forward, she would visit the lighthouse every Tuesday and Thursday afternoon, bringing along books that Samuel might enjoy or homemade cookies her grandmother had baked.

On one particularly stormy evening in late October, as rain lashed against the lighthouse and thunder rolled across the heavens, Samuel spotted a small fishing vessel struggling against the violent waves. The boat's navigation lights were flickering weakly, suggesting electrical problems. Without hesitation, Samuel activated the emergency protocols, increasing the intensity of the lighthouse beam and sounding the fog horn at regular intervals to guide the struggling vessel.

Hours passed as Samuel kept vigilant watch over the boat's progress. The storm showed no signs of abating, and he feared the worst. Just when it seemed the tiny vessel would be dashed against the rocks, it made a desperate turn, finding the narrow channel that led to the safety of the harbor.

The following morning, as Samuel completed his routine checks, he heard unfamiliar footsteps climbing the lighthouse stairs. A man in his thirties, with salt-crusted hair and tired eyes, appeared in the doorway of the lantern room.

"Are you Samuel?" the man asked, his voice hoarse.
"That I am," Samuel replied cautiously.
"I'm Daniel Fletcher. My father and I were on that fishing boat last night. If it weren't for your light and horn, we wouldn't have made it back."

Samuel nodded silently, uncomfortable with gratitude as always.

"My father told me that thirty years ago, when I was just a baby, you saved him once before, during the great storm of '87," Daniel continued. "He never forgot that, and he made sure I knew your name. Last night, when I saw that light cutting through the darkness, I knew we'd be okay because Samuel was on watch."

For the first time in many years, Samuel felt tears pricking at his eyes. He had never considered that his lonely vigil had created a legacy, that his name had been spoken with reverence in homes he would never visit, by people whose faces he would never know.

In the weeks that followed, Daniel became a regular visitor to the lighthouse, along with Eliza. He brought with him stories of his travels at sea, photographs of distant ports, and eventually, his six-year-old daughter, Lily, who gazed up at the lighthouse with the same wonder that Eliza had shown years before.

As winter turned to spring, Samuel found himself surrounded by something he had never expected in his solitary life: a family of sorts, bound not by blood but by the light that had guided them all home in different ways.

One evening, as they all sat together in Samuel's modest quarters at the base of the lighthouse, enjoying a simple meal of fish stew, Eliza asked the question that had been on her mind for some time.

"Samuel, why did you become a lighthouse keeper?"

The old man was quiet for a long moment, stirring his stew thoughtfully.

"When I was a boy," he finally said, "my father was a fisherman. One night, he went out and never came back. The lighthouse keeper at that time had fallen ill, and there was no light to guide my father home through the storm." He paused, his eyes distant. "I decided then that as long as I was alive, no one would lose their way back to shore if I could help it."

The room fell silent, save for the distant sound of waves and the steady ticking of Samuel's old clock.

"You've kept that promise," Daniel said softly. "And not just for sailors."

Samuel looked around at the faces gathered at his table—Eliza, now fifteen and dreaming of becoming a marine biologist; Daniel, who had found a new job at the local maritime museum; little Lily, who had decorated the lighthouse keeper's quarters with her colorful drawings. In their eyes, he saw something he had never anticipated: his own reflection, not as a solitary figure against the elements, but as a beacon in their lives as well.

As the years passed, Samuel grew older, and the modern world encroached upon his traditional role. Automated systems were installed in the lighthouse, reducing the need for a full-time keeper. But Samuel remained, now officially retired but still climbing those one hundred and eight steps each morning out of habit and devotion.

On his eightieth birthday, the town of Bayview organized a celebration in his honor. Samuel, uncomfortable with the attention but moved by the gesture, stood awkwardly as the mayor presented him with a plaque commemorating his fifty years of service. As he looked out at the gathered crowd, he realized that he recognized nearly every face—fishermen he had guided to safety, children he had allowed to visit the lighthouse on special occasions, tourists who had returned year after year, drawn by his stories of the sea.

And in the front row stood Eliza, now a respected marine researcher; Daniel, gray beginning to touch his temples; and Lily, a college student who had inherited her father's love of the sea and Samuel's devotion to the lighthouse. Beside them was an empty chair, reserved for Eliza's grandmother, who had passed away the previous winter.

Later that evening, as the celebration wound down and Samuel prepared to return to his quarters, Eliza approached him with a wrapped package.

"We wanted to give you something special," she said, as Daniel and Lily joined her. "Something to show you what you've meant to all of us."

Inside the package was a handcrafted ship in a bottle, but unlike any Samuel had seen before. The ship was a perfect miniature of a modern research vessel, the kind Eliza worked on. But behind it, meticulously crafted in glass and wood, stood a tiny replica of the Bayview lighthouse, its beam pointing the way forward.

"It's beautiful," Samuel whispered, his gnarled fingers tracing the contours of the glass.

"Look closer," Lily urged.

Samuel peered into the bottle and saw that on the deck of the ship stood four tiny figures: an old man, a middle-aged man, a young woman, and a girl, all facing the lighthouse together.

That night, after returning to his quarters, Samuel placed the ship in a bottle on his windowsill, where the rising sun would catch it in the morning. As he prepared for bed, he heard the familiar sound of the automated light switching on above, sweeping its beam across the darkened waters.

For the first time in fifty years, Samuel felt a profound sense of peace wash over him. He had kept his promise to the frightened boy he had once been, and in doing so, had illuminated paths he never knew existed—not just across treacherous waters, but through the complex channels of human connection.

As he drifted off to sleep, Samuel smiled, knowing that long after his light went out, the legacy of the Bayview lighthouse keeper would continue to guide others home.

Three years later, when Samuel passed away peacefully in his sleep, the entire town gathered for his funeral. The service was held at the base of the lighthouse, and as Eliza delivered the eulogy, a sudden break in the overcast sky allowed a beam of sunlight to illuminate the white tower, as if in final tribute.

In accordance with his wishes, Samuel's ashes were scattered from the lantern room, carried by the wind out over the waters he had watched so vigilantly for most of his life. And though the lighthouse was now fully automated, the townspeople swore that on particularly stormy nights, when the sea raged against the shore, the light seemed to burn just a little brighter, as if Samuel were still there, keeping watch, guiding everyone safely home."""

In [14]:
import requests
import pyaudio
import time
import numpy as np
from collections import deque
import base64
import json
import webrtcvad

def play_tts_audio(server_url, prompt, sample_rate=24000, aggressiveness=3, frame_duration=30, min_segment_duration=0.1):
    """
    Fetch and play TTS audio from the server with silence-based segmentation.
    """
    vad = webrtcvad.Vad(aggressiveness)
    p = pyaudio.PyAudio()
    stream = p.open(
        format=pyaudio.paInt16,
        channels=1,
        rate=sample_rate,
        output=True,
        frames_per_buffer=1024 * 2
    )

    all_audio_chunks = []
    audio_buffer = bytearray()
    frame_bytes_buffer = bytearray()
    frame_samples = int(sample_rate * frame_duration / 1000)
    bytes_per_sample = 2

    for chunk in stream_tts_audio(server_url, prompt):
        if chunk:
            np_chunk = np.frombuffer(chunk, dtype=np.int16)
            chunk_bytes = np_chunk.tobytes()
            all_audio_chunks.append(chunk_bytes)
            audio_buffer.extend(chunk_bytes)
            frame_bytes_buffer.extend(chunk_bytes)

            # Process in frame size chunks
            for i in range(0, len(audio_buffer) - frame_samples * bytes_per_sample, frame_samples * bytes_per_sample):
                frame = audio_buffer[i:i + frame_samples * bytes_per_sample]
                is_speech = vad.is_speech(frame, sample_rate)

                if not is_speech:
                    # Silence detected, play up to this point
                    segment = audio_buffer[:i]
                    if len(segment) > 0:  # Avoid playing empty segments
                        stream.write(bytes(segment))
                    audio_buffer = audio_buffer[i:]  # Update buffer to next sample
                #if len(audio_buffer) > 24000 * 3: #backup protection threshold
                 #   stream.write(bytes(audio_buffer))
                  #  audio_buffer = bytearray()

    # Ensure the last bit of audio plays completely
    if len(audio_buffer) > 0:
        stream.write(bytes(audio_buffer))
    time.sleep(0.5)

    stream.stop_stream()
    stream.close()
    p.terminate()

def stream_tts_audio(server_url, prompt):
    """Streams TTS audio chunks from the server."""
    url = f"{server_url}/tts-stream"
    try:
        response = requests.post(
            url,
            json={"text": prompt},
            headers={"Content-Type": "application/json"},
            stream=True,
            timeout=30
        )
        response.raise_for_status()
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                yield np.frombuffer(chunk, dtype=np.int16).tobytes()

    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
        yield None  # Indicate error
    except Exception as e:
        print(f"Error during streaming: {e}")
        yield None


ModuleNotFoundError: No module named 'webrtcvad'

In [16]:
import requests
import pyaudio
import time
import numpy as np

def play_tts_audio(server_url, prompt, sample_rate=24000, frame_duration=0.03, silence_threshold_factor=0.5, min_segment_duration=0.1):
    """
    Fetch and play TTS audio, segmenting based on energy and adaptive threshold.

    Args:
        server_url: URL of the TTS server.
        prompt: Text to synthesize.
        sample_rate: Audio sample rate.
        frame_duration: Duration of each frame in seconds.
        silence_threshold_factor: Factor to multiply the moving average energy by to determine the silence threshold.
        min_segment_duration: Minimum segment duration in seconds.
    """

    p = pyaudio.PyAudio()
    stream = p.open(
        format=pyaudio.paInt16,
        channels=1,
        rate=sample_rate,
        output=True,
        frames_per_buffer=1024
    )

    all_audio_chunks = []
    audio_buffer = bytearray()
    frame_length = int(sample_rate * frame_duration)
    bytes_per_sample = 2  # Assuming int16

    # Initialize variables for adaptive thresholding
    moving_average_energy = 0
    segment_start_time = 0  # Initialize segment start time

    for chunk in stream_tts_audio(server_url, prompt):
        if chunk:
            np_chunk = np.frombuffer(chunk, dtype=np.int16)
            chunk_bytes = np_chunk.tobytes()
            all_audio_chunks.append(chunk_bytes)
            audio_buffer.extend(chunk_bytes)
            num_samples = len(chunk_bytes) // bytes_per_sample
            
            # Process audio in frames (frame_length increments)
            for i in range(0, len(audio_buffer) - frame_length * bytes_per_sample, frame_length * bytes_per_sample):
                frame = audio_buffer[i:i + frame_length * bytes_per_sample]
                frame_np = np.frombuffer(frame, dtype=np.int16)

                # Calculate frame energy
                frame_energy = np.sum(frame_np**2) / frame_length

                # Update moving average energy (simple exponential smoothing)
                moving_average_energy = 0.95 * moving_average_energy + 0.05 * frame_energy

                # Calculate silence threshold
                silence_threshold = silence_threshold_factor * moving_average_energy

                # Check for silence
                if frame_energy < silence_threshold:
                    # Silence detected
                    segment_duration = i / sample_rate - segment_start_time # Time elapsed in second, can be wrong
                    if segment_duration >= min_segment_duration:
                        segment = audio_buffer[:i]

                        if len(segment) > 0:
                            stream.write(bytes(segment))
                        # Update buffer and start time
                        audio_buffer = audio_buffer[i:]
                        segment_start_time = i / sample_rate
                #if len(audio_buffer) > 24000 * 3: #Backup thresshold and also prevent hogging too much data.
                 #   segment = audio_buffer[:i] #Write everything before the threshold to keep the code efficient, avoiding long latency

                  #  stream.write(bytes(segment))
                   # audio_buffer = audio_buffer[i:] #Also do it to the buffers 


    if len(audio_buffer) > 0:
        stream.write(bytes(audio_buffer))

    time.sleep(0.5)
    stream.stop_stream()
    stream.close()
    p.terminate()


def stream_tts_audio(server_url, prompt):
    """Streams TTS audio chunks from the server."""
    url = f"{server_url}/tts-stream"
    try:
        response = requests.post(
            url,
            json={"text": prompt},
            headers={"Content-Type": "application/json"},
            stream=True,
            timeout=30
        )
        response.raise_for_status()
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                yield np.frombuffer(chunk, dtype=np.int16).tobytes()

    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
        yield None
    except Exception as e:
        print(f"Error during streaming: {e}")
        yield None


In [21]:
import requests
import pyaudio
import time
import numpy as np

def play_tts_audio(server_url, prompt, sample_rate=24000, chunk_size=1024, buffer_duration=0.2, crossfade_duration=0.02):
    """Plays TTS audio from the server with buffering and crossfading for smoother transitions."""

    p = pyaudio.PyAudio()
    stream = p.open(
        format=pyaudio.paInt16,
        channels=1,
        rate=sample_rate,
        output=True,
        frames_per_buffer=chunk_size
    )

    all_audio_chunks = []
    audio_buffer = bytearray()
    buffer_size_bytes = int(buffer_duration * sample_rate * 2)  # buffer_duration seconds * sample_rate * 2 bytes/sample
    crossfade_samples = int(crossfade_duration * sample_rate)
    bytes_per_sample = 2  #Assuming int16

    for chunk in stream_tts_audio(server_url, prompt):
        if chunk:
            np_chunk = np.frombuffer(chunk, dtype=np.int16)
            chunk_bytes = np_chunk.tobytes()
            all_audio_chunks.append(chunk_bytes)
            audio_buffer.extend(chunk_bytes)

            # If the buffer is full enough
            while len(audio_buffer) >= buffer_size_bytes:
                segment = audio_buffer[:buffer_size_bytes] #Write data from the buffer before buffer size bytes.

                # Apply Crossfade
                if len(segment) > crossfade_samples * bytes_per_sample:
                    segment_np = np.frombuffer(segment, dtype=np.int16)
                    fade_in = np.linspace(0, 1, crossfade_samples)
                    fade_out = np.linspace(1, 0, crossfade_samples)

                    # Apply crossfade to last section of audio
                    segment_np[-crossfade_samples:] = segment_np[-crossfade_samples:] * fade_out

                    segment = segment_np.tobytes()


                stream.write(segment) #Write audio

                audio_buffer = audio_buffer[len(segment):]

    # Play any remaining audio
    if len(audio_buffer) > 0:
        stream.write(bytes(audio_buffer)) #write last bits

    # Cleanup
    time.sleep(0.5)  # Let the last bit play
    stream.stop_stream()
    stream.close()
    p.terminate()


def stream_tts_audio(server_url, prompt):
    """Streams TTS audio chunks from the server."""
    url = f"{server_url}/tts-stream"
    try:
        response = requests.post(
            url,
            json={"text": prompt},
            headers={"Content-Type": "application/json"},
            stream=True,
            timeout=30
        )
        response.raise_for_status()
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                yield np.frombuffer(chunk, dtype=np.int16).tobytes()

    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
        yield None
    except Exception as e:
        print(f"Error during streaming: {e}")
        yield None


In [22]:
play_tts_audio("https://06ac-35-185-185-231.ngrok-free.app/", "Hi I am hungry")

2025-04-04 14:38:40,610 - urllib3.connectionpool - DEBUG - MainThread - Starting new HTTPS connection (1): 06ac-35-185-185-231.ngrok-free.app:443
2025-04-04 14:38:42,519 - urllib3.connectionpool - DEBUG - MainThread - https://06ac-35-185-185-231.ngrok-free.app:443 "POST /tts-stream HTTP/11" 200 None


In [7]:
import requests
import json
import time
import threading
import queue
import pyaudio
import numpy as np
import logging
from typing import Optional, Set, List
from collections import deque

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(threadName)s - %(message)s',
    handlers=[
        logging.FileHandler("tts_client.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger("TTSClient")

class TTSClient:
    def __init__(self, server_url):
        logger.info(f"Initializing TTSClient with server URL: {server_url}")
        if not server_url:
            logger.error("Server URL is missing")
            raise ValueError("Server URL is required")
        
        # Configuration
        self.server_url = server_url
        self.sample_rate = 24000
        self.bytes_per_second = self.sample_rate * 2  # 16-bit audio = 2 bytes per sample
        self.min_write_size = self.bytes_per_second  # 1 second of audio data
        
        # Audio components
        self.audio_queue = queue.Queue()
        self.p = pyaudio.PyAudio()
        self.stream = None
        
        # Text processing components
        self.text_queue = queue.Queue()
        self.current_phrase = ""
        
        # Thread synchronization
        self.lock = threading.RLock()
        self.stop_event = threading.Event()
        self.audio_finished_event = threading.Event()
        
        # Audio tracking
        self.last_write_time = 0
        self.all_audio_chunks = []
        self.total_bytes_queued = 0
        self.total_bytes_played = 0
        
        # Common abbreviations (used to avoid splitting sentences)
        self.abbreviations = {
            'Mr.', 'Mrs.', 'Ms.', 'Dr.', 'Prof.', 'Sr.', 'Jr.', 'Ph.D.', 'M.D.', 
            'B.A.', 'M.A.', 'B.S.', 'M.S.', 'LL.B.', 'LL.M.', 'J.D.', 'Esq.', 
            'Inc.', 'Ltd.', 'Co.', 'Corp.', 'Ave.', 'St.', 'Rd.', 'Blvd.', 'Dr.', 
            'Apt.', 'Ste.', 'No.', 'vs.', 'etc.', 'i.e.', 'e.g.', 'a.m.', 'p.m.',
            'U.S.', 'U.K.', 'N.Y.', 'L.A.', 'D.C.'
        }
        
        # Start processing threads
        self._start_audio_stream()
        
        self.text_thread = threading.Thread(target=self._process_text_queue, daemon=True, name="TextProcessor")
        self.text_thread.start()
        
        self.play_thread = threading.Thread(target=self._play_audio, daemon=True, name="AudioPlayer")
        self.play_thread.start()
        
    
    def _start_audio_stream(self):
        """Initialize or reinitialize the audio playback stream"""
        with self.lock:
            if self.stream:
                try:
                    self.stream.stop_stream()
                    self.stream.close()
                except Exception as e:
                    logger.warning(f"Error closing stream: {e}")
                
            try:
                self.stream = self.p.open(
                    format=self.p.get_format_from_width(2),  # 16-bit audio
                    channels=1,
                    rate=self.sample_rate,
                    output=True
                )
            except Exception as e:
                logger.critical(f"Failed to initialize audio stream: {e}")
                self.stream = None
    
    def _play_audio(self):
        """Play audio chunks from the queue, aggregating into 1-second chunks"""
        logger.info("Audio playback thread started")
        buffer = bytearray()
        total_silence_time = 0
        last_check_time = time.time()
        
        while not self.stop_event.is_set():
            try:
                # Get audio chunk from queue with timeout
                audio_chunk = self.audio_queue.get(timeout=0.2)
                chunk_size = len(audio_chunk)
                self.audio_queue.task_done()
                
                # Add to our buffer
                buffer.extend(audio_chunk)
                buffer_size = len(buffer)
                
                # Check if we have enough data for a 1-second write
                if buffer_size >= self.min_write_size:
                    # Write a complete second (or multiple seconds) to PyAudio
                    self._write_to_stream(buffer)
                    buffer = bytearray()  # Clear the buffer
                    total_silence_time = 0  # Reset silence counter
                
            except queue.Empty:
                # Check if we need to flush a partial buffer
                current_time = time.time()
                elapsed = current_time - last_check_time
                last_check_time = current_time
                
                # Track silence time
                total_silence_time += elapsed
                buffer_size = len(buffer)
                
                # If we've been silent for a while but have data, flush it
                if buffer and total_silence_time > 0.5:
                    self._write_to_stream(buffer)
                    buffer = bytearray()
                    total_silence_time = 0
                
                # Check if we're done playing
                if self.audio_queue.empty() and self.text_queue.empty():
                    # If we have data in buffer, flush it
                    if buffer:
                        self._write_to_stream(buffer)
                        buffer = bytearray()
                    
                    # Check if we should signal completion
                    if (self.audio_queue.empty() and self.text_queue.empty() and time.time() - self.last_write_time > 1.0):  # Last audio chunk had time to play
                        self.audio_finished_event.set()
                        break
            except Exception as e:
                logger.error(f"Error in audio playback: {e}", exc_info=True)
        
        # Final flush of any remaining audio
        if buffer:
            self._write_to_stream(buffer)
        
        # Signal completion before exiting thread
        self.audio_finished_event.set()
    
    def _write_to_stream(self, audio_data):
        """Write a chunk of audio data to the stream"""
        try:
            with self.lock:
                if not self.stream:
                    logger.error("Cannot write to stream: stream is None")
                    return
                    
                if not self.stream.is_active():
                    logger.warning("Stream is not active, attempting to restart")
                    self._start_audio_stream()
                    
                if self.stream and not self.stop_event.is_set():
                    data_size = len(audio_data)
                    seconds = data_size / self.bytes_per_second
                    
                    # Try to write to stream
                    try:
                        self.stream.write(bytes(audio_data))
                        self.last_write_time = time.time()
                        self.all_audio_chunks.append(bytes(audio_data))
                        self.total_bytes_played += data_size
                    except Exception as e:
                        logger.error(f"Failed to write to stream: {e}")
                        # Try to recover
                        self._start_audio_stream()
                        if self.stream:
                            try:
                                self.stream.write(bytes(audio_data))
                                logger.info("Recovery successful - wrote data after stream restart")
                            except Exception as e2:
                                logger.error(f"Recovery failed: {e2}")
        except Exception as e:
            logger.error(f"Error in _write_to_stream: {e}", exc_info=True)
    
    def _is_phrase_complete(self, text):
        """Check if a phrase is complete based on punctuation"""
        text = text.strip()
        if not text:
            return False
            
        # Check for sentence endings
        phrase_endings = ['.', '?', '!']
        
        # Don't split on abbreviations
        is_complete = text.endswith(tuple(phrase_endings)) and not text in self.abbreviations
        return is_complete
    
    def _process_text_queue(self):
        """Process text chunks from the queue, sending complete phrases to TTS"""
        logger.info("Text processing thread started")
        while not self.stop_event.is_set():
            try:
                # Get text chunk with timeout
                text_chunk = self.text_queue.get(timeout=0.5)
                
                with self.lock:
                    prev_phrase = self.current_phrase
                    self.current_phrase += text_chunk
                    
                    # Skip abbreviations
                    if text_chunk.strip() in self.abbreviations:
                        self.text_queue.task_done()
                        continue
                    
                    # Send complete phrases to TTS
                    if self._is_phrase_complete(self.current_phrase):
                        phrase = self.current_phrase.strip()
                        self.current_phrase = ""
                        self._send_to_tts(phrase)
                
                self.text_queue.task_done()
            except queue.Empty:
                # If we have a substantial partial phrase, send it
                with self.lock:
                    if self.current_phrase and len(self.current_phrase.split()) >= 3:
                        phrase = self.current_phrase.strip()
                        self.current_phrase = ""
                        self._send_to_tts(phrase)
            except Exception as e:
                logger.error(f"Error processing text: {e}", exc_info=True)
    
    def _send_to_tts(self, text):
        """Send text to TTS service and process the response"""
        if not text:
            return
        
        try:
            url = f"{self.server_url}/tts-stream"
            
            start_time = time.time()
            response = requests.post(
                url,
                json={"text": text},
                headers={"Content-Type": "application/json"},
                stream=True
            )
            
            if response.status_code == 200:
                chunks_received = 0
                total_bytes = 0
                
                for chunk in response.iter_content(chunk_size=1024):
                    if chunk and not self.stop_event.is_set():
                        # Convert to audio format and queue for playback
                        chunk_size = len(chunk)
                        total_bytes += chunk_size
                        chunks_received += 1
                        
                        # Verify data is valid audio
                        try:
                            audio_data = np.frombuffer(chunk, dtype=np.int16).tobytes()
                            self.audio_queue.put(audio_data)
                            self.total_bytes_queued += len(audio_data)
                        except Exception as e:
                            logger.error(f"Error processing audio chunk: {e}", exc_info=True)
                
                elapsed = time.time() - start_time
                
            else:
                logger.error(f"TTS server error: {response.status_code} - {response.text}")
        except Exception as e:
            logger.error(f"Error sending text to TTS: {e}", exc_info=True)
    
    def stream_text(self, text_chunk):
        """Add text to the processing queue, handling both complete text and individual tokens"""
        if not text_chunk:
            return
            
        # Check if this is likely a single token/word from streaming
        is_token = len(text_chunk.strip().split()) <= 1 and not any(c in text_chunk for c in '.?!,;:')
        
        if is_token:
            # Handle as a streaming token - append space to signal word boundary
            token = text_chunk.strip() + " "
            self.text_queue.put(token)
        else:
            # Handle as a complete chunk of text
            self.text_queue.put(text_chunk)
    
    def wait_for_completion(self, timeout=5):
        """Wait for all audio to finish playing"""
        logger.info(f"wait_for_completion called with timeout={timeout}")
        # Process any remaining text
        with self.lock:
            if self.current_phrase:
                self._send_to_tts(self.current_phrase.strip())
                self.current_phrase = ""
        
        # Wait for the audio_finished_event or timeout
        start_time = time.time()
        
        while time.time() - start_time < timeout * 2:
            text_queue_empty = self.text_queue.empty()
            audio_queue_empty = self.audio_queue.empty()
            
            if self.audio_finished_event.wait(timeout=0.5):
                # Add a small delay to ensure playback completion
                time.sleep(0.5)
                return True
            
            # Both queues are empty - just waiting for last audio to finish
            if text_queue_empty and audio_queue_empty:
                # Give some time for the last audio to play
                time_since_last_write = time.time() - self.last_write_time
                if time_since_last_write > 1.0:
                    return True
        
        logger.warning(f"wait_for_completion timed out after {time.time() - start_time:.2f} seconds")
        return True  # Return true even on timeout
    
    def stop(self):
        """Stop all processing and clean up resources"""
        logger.info("stop() called, shutting down TTSClient")
        self.stop_event.set()
        
        # Wait briefly for threads to notice stop event
        time.sleep(0.2)
        
        with self.lock:
            if self.stream:
                try:
                    self.stream.stop_stream()
                    self.stream.close()
                except Exception as e:
                    logger.error(f"Error closing audio stream: {e}")
            
            try:
                self.p.terminate()
            except Exception as e:
                logger.error(f"Error terminating PyAudio: {e}")
        
        logger.info("TTSClient shutdown complete")

In [8]:
tts_client = TTSClient("https://284e-34-143-233-24.ngrok-free.app/")
tts_client.stream_text("Hi I am hungry")
tts_client.wait_for_completion()


2025-04-04 12:44:07,177 - TTSClient - INFO - MainThread - Initializing TTSClient with server URL: https://284e-34-143-233-24.ngrok-free.app/
2025-04-04 12:44:07,956 - TTSClient - INFO - TextProcessor - Text processing thread started
2025-04-04 12:44:07,961 - TTSClient - INFO - AudioPlayer - Audio playback thread started
2025-04-04 12:44:07,964 - TTSClient - INFO - MainThread - wait_for_completion called with timeout=5
2025-04-04 12:44:07,977 - urllib3.connectionpool - DEBUG - MainThread - Starting new HTTPS connection (1): 284e-34-143-233-24.ngrok-free.app:443
2025-04-04 12:44:09,971 - urllib3.connectionpool - DEBUG - MainThread - https://284e-34-143-233-24.ngrok-free.app:443 "POST /tts-stream HTTP/11" 200 None
2025-04-04 12:44:13,780 - urllib3.connectionpool - DEBUG - TextProcessor - Starting new HTTPS connection (1): 284e-34-143-233-24.ngrok-free.app:443


KeyboardInterrupt: 

2025-04-04 12:49:14,330 - TTSClient - ERROR - TextProcessor - Error sending text to TTS: HTTPSConnectionPool(host='284e-34-143-233-24.ngrok-free.app', port=443): Max retries exceeded with url: /tts-stream (Caused by SSLError(SSLError(1, '[SSL: DECRYPTION_FAILED_OR_BAD_RECORD_MAC] decryption failed or bad record mac (_ssl.c:2638)')))
urllib3.exceptions.SSLError: [SSL: DECRYPTION_FAILED_OR_BAD_RECORD_MAC] decryption failed or bad record mac (_ssl.c:2638)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "c:\Users\Mino Mecha 54\AppData\Local\Programs\Python\Python313\Lib\site-packages\requests\adapters.py", line 667, in send
    resp = conn.urlopen(
        method=request.method,
    ...<9 lines>...
        chunked=chunked,
    )
  File "c:\Users\Mino Mecha 54\AppData\Local\Programs\Python\Python313\Lib\site-packages\urllib3\connectionpool.py", line 843, in urlopen
    retries = retries.increment(
        method, url, error=ne

In [23]:
import numpy as np
import pyaudio
import time
import threading
import queue
import os
from concurrent.futures import ThreadPoolExecutor

class MockTTSServer:
    """Simulates a TTS server that sends audio in very small chunks"""
    
    def __init__(self, sample_rate=24000, duration=5, chunk_size=128):
        """
        Initialize the mock server
        
        Args:
            sample_rate: Sample rate of audio to generate
            duration: Duration of audio in seconds
            chunk_size: Size of chunks to send (small to simulate challenging conditions)
        """
        self.sample_rate = sample_rate
        self.duration = duration
        self.chunk_size = chunk_size
        
        # Generate a test tone (sine wave)
        t = np.linspace(0, duration, int(sample_rate * duration), False)
        # Generate a 440 Hz sine wave with some variation
        tone = 0.5 * np.sin(2 * np.pi * 440 * t)
        
        # Add some frequency variation to make it more interesting
        tone += 0.3 * np.sin(2 * np.pi * 220 * t)
        tone += 0.2 * np.sin(2 * np.pi * 880 * t)
        
        # Add a slow amplitude modulation
        tone *= 0.5 + 0.5 * np.sin(2 * np.pi * 0.5 * t)
        
        # Normalize and convert to int16
        tone = np.int16(tone / np.max(np.abs(tone)) * 32767)
        
        # Split into very small chunks to simulate challenging streaming
        self.chunks = []
        for i in range(0, len(tone), chunk_size):
            self.chunks.append(tone[i:i+chunk_size].tobytes())
    
    def stream_audio(self, delay_ms=5):
        """
        Generator that yields audio chunks with artificial delay
        
        Args:
            delay_ms: Artificial delay between chunks in milliseconds
        """
        for chunk in self.chunks:
            time.sleep(delay_ms / 1000)  # Simulate network/processing delay
            yield chunk


def play_tts_audio(audio_source, sample_rate=24000, chunk_size=1024, 
                  initial_buffer_seconds=1.0, playback_chunk_size=2048):
    """
    Plays TTS audio with enhanced buffering for smoother playback.
    
    Args:
        audio_source: Generator yielding audio chunks
        sample_rate: Audio sample rate in Hz
        chunk_size: Size of incoming chunks
        initial_buffer_seconds: How many seconds of audio to buffer before starting playback
        playback_chunk_size: Size of chunks to send to audio device
    """
    print(f"Setting up audio stream (sample rate: {sample_rate}Hz)")
    
    # Setup audio playback
    p = pyaudio.PyAudio()
    stream = p.open(
        format=pyaudio.paInt16,
        channels=1,
        rate=sample_rate,
        output=True,
        frames_per_buffer=playback_chunk_size
    )
    
    # Create a queue for buffering audio chunks
    audio_queue = queue.Queue(maxsize=100)
    
    # Flag to signal when streaming is complete
    streaming_complete = threading.Event()
    
    # Store all audio for later analysis
    complete_audio = bytearray()
    
    def stream_producer():
        """Thread function to stream audio and put in queue"""
        nonlocal complete_audio
        try:
            chunk_count = 0
            total_bytes = 0
            start_time = time.time()
            
            for chunk in audio_source:
                if chunk is None or len(chunk) == 0:
                    break
                
                # Store complete audio for validation
                complete_audio.extend(chunk)
                
                audio_queue.put(chunk)
                chunk_count += 1
                total_bytes += len(chunk)
                
            end_time = time.time()
            print(f"Producer finished: {chunk_count} chunks, {total_bytes} bytes in {end_time - start_time:.2f}s")
            streaming_complete.set()  # Signal that streaming is done
            
        except Exception as e:
            print(f"Error in producer thread: {e}")
            streaming_complete.set()
    
    # Start the producer thread
    producer_thread = threading.Thread(target=stream_producer)
    producer_thread.daemon = True
    producer_thread.start()
    
    # Buffer initial audio before playback
    print(f"Buffering initial audio ({initial_buffer_seconds}s)...")
    initial_buffer_bytes = int(initial_buffer_seconds * sample_rate * 2)  # 2 bytes per sample
    buffered_bytes = 0
    buffer_start_time = time.time()
    
    while buffered_bytes < initial_buffer_bytes:
        if streaming_complete.is_set() and audio_queue.empty():
            break
        if not audio_queue.empty():
            chunk = audio_queue.get()
            buffered_bytes += len(chunk)
            audio_queue.put(chunk)  # Put it back for playback
        time.sleep(0.01)
    
    buffer_end_time = time.time()
    print(f"Initial buffering complete: {buffered_bytes} bytes in {buffer_end_time - buffer_start_time:.2f}s")
    
    # Create a numpy buffer to hold audio data
    playback_buffer = np.array([], dtype=np.int16)
    
    # Playback statistics
    underruns = 0
    start_time = time.time()
    total_played = 0
    
    # Main playback loop
    print("Starting playback...")
    while not (streaming_complete.is_set() and audio_queue.empty() and len(playback_buffer) == 0):
        # Add more data to the playback buffer if available
        while not audio_queue.empty() and len(playback_buffer) < playback_chunk_size * 4:
            try:
                chunk = audio_queue.get(block=False)
                np_chunk = np.frombuffer(chunk, dtype=np.int16)
                playback_buffer = np.append(playback_buffer, np_chunk)
            except queue.Empty:
                break
        
        # Check if we need more data but don't have enough
        if len(playback_buffer) < playback_chunk_size and not streaming_complete.is_set():
            print(f"Buffer underrun! Only have {len(playback_buffer)} samples")
            underruns += 1
            time.sleep(0.1)  # Wait for more data
            continue
        
        # Play audio if we have enough data
        if len(playback_buffer) >= playback_chunk_size:
            # Extract the next segment to play
            segment = playback_buffer[:playback_chunk_size]
            stream.write(segment.tobytes())
            total_played += len(segment)
            
            # Remove played segment from buffer
            playback_buffer = playback_buffer[playback_chunk_size:]
        else:
            # Not enough data yet, wait a bit
            time.sleep(0.01)
            
            # If we're waiting for more data but streaming is complete, play what we have
            if streaming_complete.is_set() and len(playback_buffer) > 0:
                stream.write(playback_buffer.tobytes())
                total_played += len(playback_buffer)
                playback_buffer = np.array([], dtype=np.int16)
    
    end_time = time.time()
    
    # Cleanup
    stream.stop_stream()
    stream.close()
    p.terminate()
    
    # Print statistics
    playback_duration = end_time - start_time
    expected_duration = total_played / (sample_rate * 2)  # 2 bytes per sample
    print(f"Playback complete - Statistics:")
    print(f"  Duration: {playback_duration:.2f}s (expected: {expected_duration:.2f}s)")
    print(f"  Bytes played: {total_played}")
    print(f"  Buffer underruns: {underruns}")
    
    return {
        "underruns": underruns,
        "playback_duration": playback_duration,
        "expected_duration": expected_duration,
        "total_played": total_played,
        "complete_audio": complete_audio
    }


def run_test(delay_ms=5, chunk_size=128, buffer_time=1.0, playback_chunk_size=2048):
    """Run a complete test of the TTS player with simulated small chunks"""
    print("\n" + "="*80)
    print(f"TEST: delay={delay_ms}ms, chunk_size={chunk_size}, buffer_time={buffer_time}s")
    print("="*80)
    
    # Create mock server that will stream audio in very small chunks
    mock_server = MockTTSServer(chunk_size=chunk_size)
    
    # Create the audio stream with specified delay
    audio_stream = mock_server.stream_audio(delay_ms=delay_ms)
    
    # Run the player with the specified buffering parameters
    result = play_tts_audio(
        audio_stream,
        initial_buffer_seconds=buffer_time,
        playback_chunk_size=playback_chunk_size
    )
    
    # Report success or failure
    if result["underruns"] == 0:
        print("TEST PASSED: No buffer underruns")
    else:
        print(f"TEST FAILED: {result['underruns']} buffer underruns")
    
    return result


if __name__ == "__main__":
    print("TTS Player Test with Simulated Small-Chunk Streaming")
    print("==================================================")
    
    # Test scenarios
    scenarios = [
        # Very challenging: tiny chunks with delay
        {"delay_ms": 10, "chunk_size": 64, "buffer_time": 0.5, "playback_chunk_size": 1024},
        
        # Small chunks but more buffering
        {"delay_ms": 5, "chunk_size": 128, "buffer_time": 1.0, "playback_chunk_size": 2048},
        
        # Better scenario but still small chunks
        {"delay_ms": 2, "chunk_size": 256, "buffer_time": 0.75, "playback_chunk_size": 2048},
    ]
    
    results = []
    
    # Run all test scenarios
    for i, scenario in enumerate(scenarios):
        print(f"\nRunning test scenario {i+1}/{len(scenarios)}")
        result = run_test(**scenario)
        results.append((scenario, result))
    
    # Print summary
    print("\n" + "="*80)
    print("TEST SUMMARY")
    print("="*80)
    
    for i, (scenario, result) in enumerate(results):
        print(f"\nScenario {i+1}:")
        print(f"  Settings: {scenario}")
        print(f"  Underruns: {result['underruns']}")
        print(f"  Duration: {result['playback_duration']:.2f}s (expected: {result['expected_duration']:.2f}s)")
        print(f"  {'PASSED' if result['underruns'] == 0 else 'FAILED'}")

TTS Player Test with Simulated Small-Chunk Streaming

Running test scenario 1/3

TEST: delay=10ms, chunk_size=64, buffer_time=0.5s
Setting up audio stream (sample rate: 24000Hz)
Buffering initial audio (0.5s)...
Initial buffering complete: 24064 bytes in 1.99s
Starting playback...
Buffer underrun! Only have 896 samples
Buffer underrun! Only have 512 samples
Buffer underrun! Only have 64 samples
Buffer underrun! Only have 640 samples
Buffer underrun! Only have 256 samples
Buffer underrun! Only have 896 samples
Buffer underrun! Only have 448 samples
Buffer underrun! Only have 64 samples
Buffer underrun! Only have 704 samples
Buffer underrun! Only have 256 samples
Buffer underrun! Only have 896 samples
Buffer underrun! Only have 512 samples
Buffer underrun! Only have 768 samples
Buffer underrun! Only have 320 samples
Buffer underrun! Only have 960 samples
Buffer underrun! Only have 576 samples
Buffer underrun! Only have 128 samples
Buffer underrun! Only have 768 samples
Buffer underrun! O

In [24]:
import requests
import pyaudio
import time
import numpy as np
import threading
import queue
import sys

def play_tts_audio(server_url, prompt, sample_rate=24000, 
                  initial_buffer_seconds=0.75, playback_chunk_size=2048,
                  print_stats=True):
    """
    Plays TTS audio from the server with robust buffering for smooth playback.
    
    Args:
        server_url: URL of the TTS server
        prompt: Text to convert to speech
        sample_rate: Audio sample rate in Hz
        initial_buffer_seconds: How many seconds of audio to buffer before playback
        playback_chunk_size: Size of chunks to send to audio device
        print_stats: Whether to print debugging statistics
    """
    if print_stats:
        print(f"Starting TTS streaming with {initial_buffer_seconds}s buffer")
    
    # Setup audio playback
    p = pyaudio.PyAudio()
    stream = p.open(
        format=pyaudio.paInt16,
        channels=1,
        rate=sample_rate,
        output=True,
        frames_per_buffer=playback_chunk_size
    )
    
    # Create a queue for buffering audio chunks
    audio_queue = queue.Queue(maxsize=100)
    
    # Flag to signal when streaming is complete
    streaming_complete = threading.Event()
    buffer_ready = threading.Event()
    
    # Stats for debugging
    stats = {
        "chunks_received": 0,
        "bytes_received": 0,
        "playback_bytes": 0,
        "underruns": 0,
        "start_time": time.time(),
        "playback_start_time": 0
    }
    
    def stream_producer():
        """Thread function to stream audio from server and put in queue"""
        try:
            stream_start = time.time()
            
            for chunk in stream_tts_audio(server_url, prompt):
                if chunk is None or len(chunk) == 0:
                    break
                
                stats["chunks_received"] += 1
                stats["bytes_received"] += len(chunk)
                
                # Convert bytes to numpy array
                np_chunk = np.frombuffer(chunk, dtype=np.int16)
                audio_queue.put(np_chunk)
                
                # Signal when buffer is ready
                if not buffer_ready.is_set():
                    buffer_size_samples = int(sample_rate * initial_buffer_seconds)
                    if stats["bytes_received"] >= buffer_size_samples * 2:  # 2 bytes per sample
                        buffer_ready.set()
                        if print_stats:
                            print(f"Buffer ready: {stats['bytes_received']} bytes in {time.time() - stream_start:.2f}s")
            
            if print_stats:
                print(f"Streaming complete: {stats['chunks_received']} chunks, "
                      f"{stats['bytes_received']} bytes in {time.time() - stream_start:.2f}s")
            
            streaming_complete.set()
            
        except Exception as e:
            print(f"Error in producer thread: {e}")
            streaming_complete.set()
    
    # Start the producer thread
    producer_thread = threading.Thread(target=stream_producer)
    producer_thread.daemon = True
    producer_thread.start()
    
    # Wait for initial buffer to fill
    if print_stats:
        print(f"Waiting for initial buffer ({initial_buffer_seconds}s of audio)...")
    
    buffer_wait_start = time.time()
    buffer_ready.wait(timeout=5.0)  # Wait up to 5 seconds for buffer
    
    if not buffer_ready.is_set():
        if print_stats:
            print("Warning: Starting playback before full buffer - might stutter")
    elif print_stats:
        print(f"Buffer filled in {time.time() - buffer_wait_start:.2f}s")
    
    # Create a numpy buffer to hold consolidated audio data
    playback_buffer = np.array([], dtype=np.int16)
    stats["playback_start_time"] = time.time()
    
    if print_stats:
        print("Starting playback...")
    
    # Main playback loop
    while not (streaming_complete.is_set() and audio_queue.empty() and len(playback_buffer) == 0):
        # Add more data to the playback buffer if available
        queue_items = 0
        while not audio_queue.empty() and queue_items < 10:  # Don't empty the whole queue at once
            try:
                chunk = audio_queue.get(block=False)
                playback_buffer = np.append(playback_buffer, chunk)
                queue_items += 1
            except queue.Empty:
                break
        
        # Play audio if we have enough data
        if len(playback_buffer) >= playback_chunk_size:
            # Extract the next segment to play
            segment = playback_buffer[:playback_chunk_size]
            stream.write(segment.tobytes())
            stats["playback_bytes"] += len(segment) * 2  # 2 bytes per sample
            
            # Remove played segment from buffer
            playback_buffer = playback_buffer[playback_chunk_size:]
        else:
            # Not enough data yet, wait a bit
            if not streaming_complete.is_set():
                if print_stats and len(playback_buffer) < playback_chunk_size:
                    print(f"Buffer underrun! Only have {len(playback_buffer)} samples")
                    stats["underruns"] += 1
                time.sleep(0.01)
            else:
                # If streaming is complete, play any remaining audio
                if len(playback_buffer) > 0:
                    stream.write(playback_buffer.tobytes())
                    stats["playback_bytes"] += len(playback_buffer) * 2
                    playback_buffer = np.array([], dtype=np.int16)
    
    # Print final statistics
    if print_stats:
        total_time = time.time() - stats["start_time"]
        playback_time = time.time() - stats["playback_start_time"]
        samples_played = stats["playback_bytes"] // 2
        expected_duration = samples_played / sample_rate
        
        print("\nPlayback complete - Statistics:")
        print(f"  Total duration: {total_time:.2f}s")
        print(f"  Playback duration: {playback_time:.2f}s (expected: {expected_duration:.2f}s)")
        print(f"  Received: {stats['chunks_received']} chunks, {stats['bytes_received']} bytes")
        print(f"  Played: {samples_played} samples ({stats['playback_bytes']} bytes)")
        print(f"  Buffer underruns: {stats['underruns']}")
    
    # Cleanup
    stream.stop_stream()
    stream.close()
    p.terminate()
    
    return stats


def stream_tts_audio(server_url, prompt):
    """Streams TTS audio chunks from the server."""
    url = f"{server_url}/tts-stream"
    try:
        response = requests.post(
            url,
            json={"text": prompt},
            headers={"Content-Type": "application/json"},
            stream=True,
            timeout=30
        )
        response.raise_for_status()
        
        # Stream the audio in chunks (no specific chunk size set - accept whatever the server sends)
        for chunk in response.iter_content():
            if chunk:
                yield chunk
    
    except requests.exceptions.RequestException as e:
        print(f"Request error: {e}")
        yield None
    except Exception as e:
        print(f"Error during streaming: {e}")
        yield None


def test_player():
    """Test the TTS player with a simulated server"""
    class MockTTSServer:
        """Simulates a TTS server that sends audio in very small chunks"""
        def __init__(self, duration=5, sample_rate=24000, chunk_size=64):
            self.sample_rate = sample_rate
            # Create a test tone
            t = np.linspace(0, duration, int(sample_rate * duration), False)
            tone = 0.5 * np.sin(2 * np.pi * 440 * t)
            tone += 0.3 * np.sin(2 * np.pi * 220 * t)
            tone *= 0.5 + 0.5 * np.sin(2 * np.pi * 0.5 * t)
            self.audio = np.int16(tone / np.max(np.abs(tone)) * 32767)
            self.chunk_size = chunk_size
        
        def generate_chunks(self, delay_ms=5):
            """Generate audio chunks with delay"""
            for i in range(0, len(self.audio), self.chunk_size):
                chunk = self.audio[i:i+self.chunk_size].tobytes()
                time.sleep(delay_ms / 1000)
                yield chunk
    
    class MockServer:
        def __init__(self):
            self.tts_server = MockTTSServer(chunk_size=64)
        
        def post(self, *args, **kwargs):
            class MockResponse:
                def __init__(self, generator):
                    self.generator = generator
                
                def raise_for_status(self):
                    pass
                
                def iter_content(self, chunk_size=None):
                    for chunk in self.generator:
                        yield chunk
            
            return MockResponse(self.tts_server.generate_chunks(delay_ms=5))
    
    # Monkey patch requests to use our mock
    original_post = requests.post
    requests.post = MockServer().post
    
    try:
        print("Testing TTS player with simulated small chunks...")
        stats = play_tts_audio(
            "http://mock-server",
            "This is a test of the TTS streaming system.",
            initial_buffer_seconds=1.0,
            playback_chunk_size=2048
        )
        
        if stats["underruns"] == 0:
            print("\nTEST PASSED: Smooth playback achieved!")
        else:
            print(f"\nTEST PARTIALLY PASSED: {stats['underruns']} buffer underruns occurred")
        
    finally:
        # Restore original requests.post
        requests.post = original_post


if __name__ == "__main__":

    test_player()
    # else:
    #     # Usage with actual server
    #     server_url = input("Enter TTS server URL (e.g. http://localhost:8000): ")
    #     text = input("Enter text to convert to speech: ")
    #     print("\nStarting TTS playback...")
    #     play_tts_audio(server_url, text, initial_buffer_seconds=1.0)

Testing TTS player with simulated small chunks...
Starting TTS streaming with 1.0s buffer
Waiting for initial buffer (1.0s of audio)...
Starting playback...
Buffer underrun! Only have 640 samples
Buffer underrun! Only have 1280 samples
Buffer underrun! Only have 1920 samples
Buffer underrun! Only have 1152 samples
Buffer underrun! Only have 1792 samples
Buffer underrun! Only have 1024 samples
Buffer underrun! Only have 1664 samples
Buffer underrun! Only have 896 samples
Buffer underrun! Only have 1344 samples
Buffer underrun! Only have 1536 samples
Buffer underrun! Only have 1600 samples
Buffer underrun! Only have 1728 samples
Buffer underrun! Only have 1856 samples
Buffer underrun! Only have 1984 samples
Buffer underrun! Only have 64 samples
Buffer underrun! Only have 192 samples
Buffer underrun! Only have 320 samples
Buffer underrun! Only have 448 samples
Buffer underrun! Only have 576 samples
Buffer underrun! Only have 704 samples
Buffer underrun! Only have 832 samples
Buffer underr