In [3]:
# Enhanced Streaming Voice WebSocket Debug - Test conversation history bleeding issue
import asyncio, math, json, time, contextlib, os, ssl, base64
from pathlib import Path
from typing import List, Dict, Any

try:
    import websockets  # type: ignore
except ImportError:
    raise RuntimeError("Install websockets: pip install websockets")

# Server configuration
HOST = '127.0.0.1:8000'
USE_SECURE = False  # Local server uses HTTP/WS
LANGUAGE = 'en'  # Using English for clearer testing
FAST = True  # if False, will pace real-time
AUTO_CLOSE_AFTER_FINAL_S = 30.0  # Increased timeout for multi-file test

# Test files for conversation simulation
WEBM_FILES = [
    Path('/home/lumi/beautyai/voice_tests/input_test_questions/webm/greeting.webm'),
    Path('/home/lumi/beautyai/voice_tests/input_test_questions/webm/botox.webm'),
    Path('/home/lumi/beautyai/voice_tests/input_test_questions/webm/laser_hair.webm')
]

PAUSE_BETWEEN_FILES_S = 6.0  # 6 seconds pause between files to simulate natural conversation

print(f"🚀 Starting Enhanced Streaming Voice Debug")
print(f"   Host: {HOST}")
print(f"   Language: {LANGUAGE}")
print(f"   Files to test: {len(WEBM_FILES)}")
print(f"   Pause between files: {PAUSE_BETWEEN_FILES_S}s")

# Verify all test files exist
for i, webm_path in enumerate(WEBM_FILES, 1):
    if webm_path.exists():
        print(f"   ✅ File {i}: {webm_path.name} ({webm_path.stat().st_size} bytes)")
    else:
        print(f"   ❌ File {i}: {webm_path.name} - NOT FOUND")
        
# Verify streaming endpoint is available
import requests
try:
    stream_status = requests.get(f"http://{HOST}/api/v1/ws/streaming-voice/status", timeout=5)
    print(f"✅ Streaming voice status: {stream_status.status_code}")
    if stream_status.status_code == 200:
        status_data = stream_status.json()
        print(f"   Enabled: {status_data.get('enabled')}, Active sessions: {status_data.get('active_sessions')}")
    else:
        print(f"❌ Stream status error: {stream_status.text}")
        
except Exception as e:
    print(f"❌ Local server connection failed: {e}")

async def test_conversation_bleeding():
    """
    Test for conversation history bleeding by uploading webm files sequentially
    with pauses to simulate real conversation flow.
    """
    scheme = 'wss' if USE_SECURE else 'ws'
    url = f"{scheme}://{HOST}/api/v1/ws/streaming-voice?language={LANGUAGE}"
    print(f"\n🔗 Connecting to WebSocket: {url}")

    # Global test state
    test_results = {
        'connection_established': False,
        'files_processed': [],
        'transcription_events': [],
        'assistant_responses': [],
        'conversation_bleeding_detected': False,
        'bleeding_evidence': [],
        'total_events': 0,
        'test_start_time': time.time(),
        'errors': []
    }
    
    events_log = []
    final_transcripts = []  # Track final transcripts for each file
    
    async def receiver(ws):
        """Handle all incoming WebSocket messages and track conversation bleeding"""
        message_count = 0
        
        try:
            while True:
                try:
                    msg = await asyncio.wait_for(ws.recv(), timeout=5.0)
                    message_count += 1
                    test_results['total_events'] = message_count
                except asyncio.TimeoutError:
                    print("⏰ [receiver] No message for 5s, continuing...")
                    continue
                except Exception as e:
                    print(f"🔚 [receiver] Connection ended after {message_count} messages: {e}")
                    break
                
                timestamp = time.time()
                
                try:
                    data = json.loads(msg)
                except json.JSONDecodeError:
                    print(f"📄 [receiver] Non-JSON message #{message_count}")
                    continue
                    
                # Log all events for analysis
                event_entry = {
                    'timestamp': timestamp,
                    'message_num': message_count,
                    'event_type': data.get('type', data.get('event', 'unknown')),
                    'data': data,
                    'relative_time_s': round(timestamp - test_results['test_start_time'], 2)
                }
                events_log.append(event_entry)
                
                etype = data.get('type', data.get('event'))
                content_preview = str(data.get('text', data.get('content', '')))[:100]
                
                print(f"📨 [#{message_count:03d}] {etype}: {content_preview}")
                
                # Track transcription events specifically
                if etype in ['partial_transcript', 'final_transcript']:
                    transcription_event = {
                        'type': etype,
                        'text': data.get('text', ''),
                        'utterance_index': data.get('utterance_index'),
                        'timestamp': timestamp,
                        'file_context': len(test_results['files_processed'])  # Which file was being processed
                    }
                    test_results['transcription_events'].append(transcription_event)
                    
                    # For final transcripts, check for bleeding
                    if etype == 'final_transcript':
                        final_text = data.get('text', '').strip()
                        if final_text:
                            final_transcripts.append({
                                'text': final_text,
                                'utterance_index': data.get('utterance_index'),
                                'file_number': len(test_results['files_processed']),
                                'timestamp': timestamp
                            })
                            
                            # Check for conversation bleeding
                            current_file_num = len(test_results['files_processed'])
                            if current_file_num > 1:  # Only check after first file
                                # Check if this transcript contains text from previous files
                                for prev_transcript in final_transcripts[:-1]:  # All previous transcripts
                                    if prev_transcript['text'].lower() in final_text.lower():
                                        bleeding_evidence = {
                                            'current_transcript': final_text,
                                            'previous_transcript': prev_transcript['text'],
                                            'current_file': current_file_num,
                                            'previous_file': prev_transcript['file_number'],
                                            'detection_time': timestamp
                                        }
                                        test_results['bleeding_evidence'].append(bleeding_evidence)
                                        test_results['conversation_bleeding_detected'] = True
                                        print(f"🚨 BLEEDING DETECTED: Current transcript contains previous content!")
                                        print(f"   Previous: '{prev_transcript['text']}'")
                                        print(f"   Current:  '{final_text}'")
                            
                            print(f"🎤 FINAL TRANSCRIPT #{data.get('utterance_index', '?')}: {final_text}")
                    
                elif etype in ['assistant_response', 'assistant_text']:
                    assistant_text = data.get('text', '')
                    if assistant_text:
                        test_results['assistant_responses'].append({
                            'text': assistant_text,
                            'utterance_index': data.get('utterance_index'),
                            'timestamp': timestamp,
                            'file_context': len(test_results['files_processed'])
                        })
                        print(f"🤖 ASSISTANT RESPONSE: {assistant_text[:100]}...")
                
                elif etype == 'error':
                    error_msg = data.get('message', 'Unknown error')
                    test_results['errors'].append({
                        'message': error_msg,
                        'stage': data.get('stage'),
                        'timestamp': timestamp
                    })
                    print(f"❌ ERROR: {error_msg}")
                    
        except Exception as e:
            print(f"❌ [receiver] Error: {e}")
            test_results['errors'].append({'message': str(e), 'timestamp': time.time()})

    async def send_webm_file(ws, file_path: Path, file_number: int):
        """Send a webm file to the WebSocket for processing"""
        print(f"\n📤 [File {file_number}] Starting upload: {file_path.name}")
        
        try:
            webm_data = file_path.read_bytes()
            await ws.send(webm_data)
            
            file_info = {
                'file_path': str(file_path),
                'file_name': file_path.name,
                'file_number': file_number,
                'file_size_bytes': len(webm_data),
                'upload_timestamp': time.time()
            }
            test_results['files_processed'].append(file_info)
            
            print(f"✅ [File {file_number}] Uploaded {len(webm_data)} bytes")
            
        except Exception as e:
            print(f"❌ [File {file_number}] Upload failed: {e}")
            test_results['errors'].append({
                'message': f"File upload failed: {e}",
                'file': str(file_path),
                'timestamp': time.time()
            })

    # Main test execution
    try:
        async with websockets.connect(url, ping_interval=30) as ws:
            print("🔗 WebSocket connected successfully!")
            test_results['connection_established'] = True
            
            # Start receiver task
            receiver_task = asyncio.create_task(receiver(ws))
            
            # Send files sequentially with pauses
            for i, webm_file in enumerate(WEBM_FILES, 1):
                if not webm_file.exists():
                    print(f"⚠️ Skipping missing file: {webm_file}")
                    continue
                    
                # Upload the file
                await send_webm_file(ws, webm_file, i)
                
                # Wait for processing before next file (except for last file)
                if i < len(WEBM_FILES):
                    print(f"⏳ Waiting {PAUSE_BETWEEN_FILES_S}s before next file...")
                    await asyncio.sleep(PAUSE_BETWEEN_FILES_S)
            
            print(f"📤 All files uploaded. Waiting {AUTO_CLOSE_AFTER_FINAL_S}s for final processing...")
            
            # Wait for final processing to complete
            try:
                await asyncio.sleep(AUTO_CLOSE_AFTER_FINAL_S)
            except:
                pass
            
            # Cancel receiver task
            receiver_task.cancel()
            try:
                await receiver_task
            except asyncio.CancelledError:
                pass
                
    except Exception as e:
        print(f"❌ WebSocket connection error: {e}")
        test_results['errors'].append({'message': f"Connection error: {e}", 'timestamp': time.time()})

    # Final analysis and reporting
    test_results['test_end_time'] = time.time()
    test_results['total_duration_s'] = test_results['test_end_time'] - test_results['test_start_time']
    test_results['all_events'] = events_log
    test_results['final_transcripts'] = final_transcripts
    
    return test_results

# Run the conversation bleeding test
print("\n" + "="*80)
print("🧪 STARTING CONVERSATION BLEEDING TEST")
print("="*80)

test_results = await test_conversation_bleeding()

🚀 Starting Enhanced Streaming Voice Debug
   Host: 127.0.0.1:8000
   Language: en
   Files to test: 3
   Pause between files: 6.0s
   ✅ File 1: greeting.webm (18586 bytes)
   ✅ File 2: botox.webm (21566 bytes)
   ✅ File 3: laser_hair.webm (23981 bytes)
✅ Streaming voice status: 200
   Enabled: True, Active sessions: 0

🧪 STARTING CONVERSATION BLEEDING TEST

🔗 Connecting to WebSocket: ws://127.0.0.1:8000/api/v1/ws/streaming-voice?language=en
🔗 WebSocket connected successfully!

📤 [File 1] Starting upload: greeting.webm
✅ [File 1] Uploaded 18586 bytes
⏳ Waiting 6.0s before next file...
📨 [#001] ready: 


📨 [#002] decoder_started: 
📨 [#003] info: 
📨 [#004] ingest_mode: 
📨 [#005] partial_transcript: Hello, how are you today?
📨 [#006] metrics_snapshot: 
📨 [#007] perf_cycle: 
📨 [#008] perf_cycle: 
📨 [#009] endpoint_event: 
📨 [#010] perf_cycle: 
📨 [#011] perf_cycle: 
📨 [#012] endpoint_event: 
📨 [#013] final_transcript: Hello, how are you today?
🎤 FINAL TRANSCRIPT #0: Hello, how are you today?
📨 [#014] perf_cycle: 
📨 [#015] assistant_pipeline_start: 
📨 [#016] tts_start: 
📨 [#017] endpoint_event: 
📨 [#018] heartbeat: 
📨 [#019] endpoint_event: 

📤 [File 2] Starting upload: botox.webm
✅ [File 2] Uploaded 21566 bytes
⏳ Waiting 6.0s before next file...
📨 [#020] partial_transcript: Hello, how are you today? What is Botox used for?
📨 [#021] metrics_snapshot: 
📨 [#022] endpoint_event: 
📨 [#023] endpoint_event: 
📨 [#024] final_transcript: Hello, how are you today? What is Botox used for?
🚨 BLEEDING DETECTED: Current transcript contains previous content!
   Previous: 'Hello, how are you today?'
   Cur

In [8]:
# Generate comprehensive bug report
def generate_bug_report(test_results: Dict[str, Any]) -> str:
    """Generate a comprehensive bug report from test results"""
    
    report_lines = [
        "# 🐛 Streaming Voice WebSocket - Conversation History Bleeding Bug Report",
        f"**Generated:** {time.strftime('%Y-%m-%d %H:%M:%S UTC', time.gmtime())}",
        f"**Test Duration:** {test_results.get('total_duration_s', 0):.2f}s",
        f"**Total Events:** {test_results.get('total_events', 0)}",
        "",
        "## 📋 Test Summary",
        f"- **Connection Established:** {'✅ Yes' if test_results.get('connection_established') else '❌ No'}",
        f"- **Files Processed:** {len(test_results.get('files_processed', []))}/3",
        f"- **Transcription Events:** {len(test_results.get('transcription_events', []))}",
        f"- **Assistant Responses:** {len(test_results.get('assistant_responses', []))}",
        f"- **Errors Encountered:** {len(test_results.get('errors', []))}",
        f"- **Conversation Bleeding Detected:** {'🚨 YES' if test_results.get('conversation_bleeding_detected') else '✅ NO'}",
        ""
    ]
    
    # Files processed section
    if test_results.get('files_processed'):
        report_lines.extend([
            "## 📁 Files Processed",
            ""
        ])
        for file_info in test_results['files_processed']:
            report_lines.append(f"- **File {file_info['file_number']}:** {file_info['file_name']} ({file_info['file_size_bytes']} bytes)")
    
    # Bleeding evidence section
    if test_results.get('conversation_bleeding_detected') and test_results.get('bleeding_evidence'):
        report_lines.extend([
            "",
            "## 🚨 Conversation Bleeding Evidence",
            ""
        ])
        for i, evidence in enumerate(test_results['bleeding_evidence'], 1):
            report_lines.extend([
                f"### Evidence #{i}",
                f"- **Current File:** {evidence['current_file']}",
                f"- **Previous File:** {evidence['previous_file']}",
                f"- **Previous Transcript:** `{evidence['previous_transcript']}`",
                f"- **Current Transcript:** `{evidence['current_transcript']}`",
                f"- **Detection Time:** {evidence['detection_time']}",
                ""
            ])
    
    # Final transcripts section
    if test_results.get('final_transcripts'):
        report_lines.extend([
            "## 🎤 Final Transcripts Chronology",
            ""
        ])
        for i, transcript in enumerate(test_results['final_transcripts'], 1):
            relative_time = transcript['timestamp'] - test_results['test_start_time']
            report_lines.extend([
                f"### Transcript {i} (File {transcript['file_number']}, Utterance {transcript['utterance_index']})",
                f"- **Time:** +{relative_time:.2f}s",
                f"- **Text:** `{transcript['text']}`",
                ""
            ])
    
    # Assistant responses section
    if test_results.get('assistant_responses'):
        report_lines.extend([
            "## 🤖 Assistant Responses",
            ""
        ])
        for i, response in enumerate(test_results['assistant_responses'], 1):
            relative_time = response['timestamp'] - test_results['test_start_time']
            report_lines.extend([
                f"### Response {i} (File Context {response['file_context']}, Utterance {response['utterance_index']})",
                f"- **Time:** +{relative_time:.2f}s",
                f"- **Text:** `{response['text'][:200]}{'...' if len(response['text']) > 200 else ''}`",
                ""
            ])
    
    # Errors section
    if test_results.get('errors'):
        report_lines.extend([
            "## ❌ Errors Encountered",
            ""
        ])
        for i, error in enumerate(test_results['errors'], 1):
            relative_time = error['timestamp'] - test_results['test_start_time']
            report_lines.extend([
                f"### Error {i}",
                f"- **Time:** +{relative_time:.2f}s",
                f"- **Stage:** {error.get('stage', 'Unknown')}",
                f"- **Message:** `{error['message']}`",
                ""
            ])
    
    # Event timeline section (key events only)
    if test_results.get('all_events'):
        report_lines.extend([
            "## 📊 Key Events Timeline",
            ""
        ])
        key_event_types = ['final_transcript', 'assistant_response', 'error', 'conversation_reset']
        key_events = [e for e in test_results['all_events'] if e['event_type'] in key_event_types]
        
        for event in key_events[:20]:  # Limit to first 20 key events
            report_lines.append(f"- **+{event['relative_time_s']:6.2f}s** `{event['event_type']}` - {str(event['data'].get('text', event['data'].get('message', 'N/A')))[:100]}")
    
    # Technical details section
    report_lines.extend([
        "",
        "## 🔧 Technical Analysis",
        "",
        "### Potential Root Causes",
        "1. **Whisper Model State Persistence:** The Whisper transcription service may be maintaining audio buffer state between utterances",
        "2. **Conversation History Bleeding:** Backend conversation history management may be incorrectly prepending previous transcripts",
        "3. **Session State Management:** WebSocket session state (utterance indices, processed transcripts) may not be properly reset between utterances",
        "4. **Audio Buffer Accumulation:** Audio chunks from previous utterances may be accumulating in transcription pipeline",
        "",
        "### Recommended Investigation Areas",
        "1. **Streaming Voice Endpoint:** Check `SessionState.conversation` and utterance tracking logic",
        "2. **Transcription Service:** Verify Whisper model state isolation between utterances",
        "3. **WebSocket Message Handling:** Examine partial/final transcript processing and deduplication",
        "4. **Conversation Reset Mechanism:** Validate that reset functionality clears all relevant state",
        "",
        f"### Environment Context",
        f"- **Server:** {HOST}",
        f"- **Language:** {LANGUAGE}",
        f"- **WebSocket URL:** `{'wss' if USE_SECURE else 'ws'}://{HOST}/api/v1/ws/streaming-voice?language={LANGUAGE}`",
        ""
    ])
    
    return "\n".join(report_lines)

# Generate and display the bug report
print("\n" + "="*80)
print("📋 GENERATING BUG REPORT")
print("="*80)

bug_report = generate_bug_report(test_results)
print(bug_report)

# Save results for further analysis
results_file = Path('/home/lumi/beautyai/tests/streaming/conversation_bleeding_test_results.json')
results_file.parent.mkdir(parents=True, exist_ok=True)

try:
    with open(results_file, 'w') as f:
        json.dump(test_results, f, indent=2, default=str)
    print(f"\n💾 Test results saved to: {results_file}")
except Exception as e:
    print(f"\n❌ Failed to save results: {e}")

print(f"\n🏁 Test completed. Bleeding detected: {'YES' if test_results.get('conversation_bleeding_detected') else 'NO'}")


📋 GENERATING BUG REPORT
# 🐛 Streaming Voice WebSocket - Conversation History Bleeding Bug Report
**Generated:** 2025-08-22 13:20:06 UTC
**Test Duration:** 42.04s
**Total Events:** 53

## 📋 Test Summary
- **Connection Established:** ✅ Yes
- **Files Processed:** 3/3
- **Transcription Events:** 4
- **Assistant Responses:** 2
- **Errors Encountered:** 0
- **Conversation Bleeding Detected:** 🚨 YES

## 📁 Files Processed

- **File 1:** greeting.webm (18586 bytes)
- **File 2:** botox.webm (21566 bytes)
- **File 3:** laser_hair.webm (23981 bytes)

## 🚨 Conversation Bleeding Evidence

### Evidence #1
- **Current File:** 2
- **Previous File:** 1
- **Previous Transcript:** `Hello, how are you today?`
- **Current Transcript:** `Hello, how are you today? What is Botox used for?`
- **Detection Time:** 1755866728.168185

## 🎤 Final Transcripts Chronology

### Transcript 1 (File 1, Utterance 0)
- **Time:** +5.27s
- **Text:** `Hello, how are you today?`

### Transcript 2 (File 2, Utterance 2)
- **Time: