In [9]:
chunk = "Hello I am here."
words = chunk.split()
sentence = ""
for word in words:
    sentence += word + " "
print(sentence)

Hello I am here. 


In [1]:
import re

def format_time_for_tts(text):
    # Pattern to match times like 11:00 AM, 3:45 PM, etc.
    pattern = r'(\d{1,2}):(\d{2})\s*(AM|PM|am|pm)'
    
    def replace_time(match):
        hour = int(match.group(1))
        minute = int(match.group(2))
        period = match.group(3).upper()
        
        # Format the time in a TTS-friendly way
        if minute == 0:
            return f"{hour} o'clock {period}"
        else:
            return f"{hour} {minute} {period}"
    
    # Check if the pattern exists in the text
    if re.search(pattern, text):
        # If pattern exists, replace all occurrences
        return re.sub(pattern, replace_time, text)
    else:
        # If no pattern is found, return the original text
        return text

# Example usage
examples = [
    "The meeting is at 11:00 AM and lunch is at 12:30 PM.",
    "Please arrive by 9:15 am for the orientation.",
    "This text has no time information."
]

for example in examples:
    formatted = format_time_for_tts(example)
    print(f"Original: {example}")
    print(f"Formatted: {formatted}")
    print()

Original: The meeting is at 11:00 AM and lunch is at 12:30 PM.
Formatted: The meeting is at 11 o'clock AM and lunch is at 12 30 PM.

Original: Please arrive by 9:15 am for the orientation.
Formatted: Please arrive by 9 15 AM for the orientation.

Original: This text has no time information.
Formatted: This text has no time information.



In [None]:
import queue
import threading
import time
import requests
import json
import base64
import pyaudio
import numpy as np
import base64
from openai import OpenAI
from typing import Optional

class StreamingTTSClient:
    def __init__(self, server_url):
        """Initialize the streaming TTS client."""
        self.server_url = server_url
        
        # Audio playback components
        self.audio_queue = queue.Queue()
        self.is_playing = False
        self.p = pyaudio.PyAudio()
        self.stream = None
        self.stream_lock = threading.Lock()
        
        # Text processing components
        self.text_queue = queue.Queue()
        self.current_phrase = ""
        self.buffer_lock = threading.Lock()
        
        # Control flags
        self.stop_event = threading.Event()
        self.audio_finished_event = threading.Event()
        
        # Start processing threads
        self.text_thread = threading.Thread(target=self._process_text_queue, daemon=True)
        self.text_thread.start()
        
        # Start audio stream
        self.start_audio_stream()
    
    def start_audio_stream(self):
        """Start the audio playback stream"""
        with self.stream_lock:
            if self.stream:
                try:
                    self.stream.stop_stream()
                    self.stream.close()
                except:
                    pass
                
            self.stream = self.p.open(
                format=self.p.get_format_from_width(2),  # 16-bit audio
                channels=1,
                rate=24000,
                output=True
            )
            self.is_playing = True
            self.audio_finished_event.clear()
        
        # Start a thread for playing audio
        self.play_thread = threading.Thread(target=self._play_audio, daemon=True)
        self.play_thread.start()
    
    def _play_audio(self):
        """Play audio chunks from the queue"""
        while self.is_playing and not self.stop_event.is_set():
            try:
                # Get audio chunk from queue with timeout
                audio_chunk = self.audio_queue.get(timeout=0.5)
                
                # Ensure stream is open before playing
                with self.stream_lock:
                    if not self.is_playing or not self.stream or not self.stream.is_active():
                        self.start_audio_stream()
                    
                    try:
                        self.stream.write(audio_chunk)
                    except Exception as e:
                        # Reopen stream on error
                        self.start_audio_stream()
                
                self.audio_queue.task_done()
            except queue.Empty:
                # No chunks available, check if we're done
                if self.audio_queue.empty() and self.text_queue.empty():
                    # Signal that we've finished playing all audio
                    self.audio_finished_event.set()
                continue
            except Exception:
                pass
        
        # Signal that we've finished playing all audio
        self.audio_finished_event.set()
    
    def _is_phrase_complete(self, text):
        """Check if a phrase is complete based on punctuation."""
        text = text.strip()
        phrase_endings = ['.', '!', '?', ':', ';']
        return any(text.endswith(end) for end in phrase_endings)
    
    def _process_text_queue(self):
        """Process text chunks from the queue, sending complete phrases to TTS."""
        while not self.stop_event.is_set():
            try:
                # Get text chunk with timeout
                text_chunk = self.text_queue.get(timeout=0.1)
                
                with self.buffer_lock:
                    self.current_phrase += text_chunk
                    
                    # Check if phrase is complete
                    if self._is_phrase_complete(self.current_phrase):
                        phrase_to_speak = self.current_phrase.strip()
                        self.current_phrase = ""
                        
                        # Send phrase to TTS service
                        self._send_to_tts(phrase_to_speak)
                
                self.text_queue.task_done()
            except queue.Empty:
                # If no new text but we have accumulated text, process it
                with self.buffer_lock:
                    if self.current_phrase and len(self.current_phrase.split()) >= 3:
                        phrase_to_speak = self.current_phrase.strip()
                        self.current_phrase = ""
                        # Send phrase to TTS service
                        self._send_to_tts(phrase_to_speak)
            except Exception:
                pass
    
    def _send_to_tts(self, text):
        """Send text to TTS service and process the response."""
        try:
            # Prepare request data
            data = {"text": text}
            
            # Make request to server
            response = requests.post(
                f"{self.server_url}/tts-stream",
                json=data,
                stream=True
            )
            
            if response.status_code != 200:
                return
            
            # Process streaming response
            buffer = ""
            
            for line in response.iter_lines():  
                if line:
                    # Decode bytes to string
                    line_str = line.decode('utf-8')
                    buffer += line_str
                    
                    # Try to parse JSON from buffer
                    try:
                        data = json.loads(buffer)
                        buffer = ""
                        
                        if 'chunk' in data:
                            # Decode base64 audio chunk
                            audio_bytes = base64.b64decode(data['chunk'])
                            
                            # Add to audio queue for playback
                            self.audio_queue.put(audio_bytes)
                    except json.JSONDecodeError:
                        # Incomplete JSON, continue adding to buffer
                        continue
        except Exception:
            pass
        
    def stream_text(self, text_chunk):
        """Add a chunk of text to the processing queue."""
        self.text_queue.put(text_chunk)
    
    def wait_for_completion(self, timeout=30):
        """Wait for all audio to finish playing."""
        # Process any remaining text
        with self.buffer_lock:
            if self.current_phrase:
                phrase_to_speak = self.current_phrase.strip()
                self.current_phrase = ""
                self._send_to_tts(phrase_to_speak)
        
        # Wait for text queue to empty
        start_time = time.time()
        while not self.text_queue.empty() and time.time() - start_time < timeout:
            time.sleep(0.1)
        
        # Wait for audio queue to empty
        while not self.audio_queue.empty() and time.time() - start_time < timeout:
            time.sleep(0.1)
        
        # Wait for audio finished event
        self.audio_finished_event.wait(timeout=timeout)
        
        # Additional delay to ensure last chunk is fully played
        time.sleep(2)
    
    def stop(self):
        """Stop all processing and clean up resources."""
        # First wait for all audio to finish
        self.wait_for_completion()
        
        # Now set stop event
        self.stop_event.set()
        
        # Stop audio
        with self.stream_lock:
            self.is_playing = False
            if self.stream:
                try:
                    self.stream.stop_stream()
                    self.stream.close()
                except:
                    pass
        
        self.p.terminate()
            
class ConversationManager:
    def __init__(self, base_url, api_key="None", system_prompt: Optional[str] = None):
        self.client = OpenAI(base_url=base_url, api_key=api_key)
        self.tts = StreamingTTSClient("https://44dd-34-87-172-26.ngrok-free.app/")
        self.conversation_history = []
        self.custom_params = {
            "mode": "inference"
        }
        if system_prompt is not None:
            self.custom_params["system_prompt"] = system_prompt
    
    def add_user_message(self, text=None, image_path=None):
        """Add a user message to the conversation history"""
        if image_path and text:
            # If both image and text are provided
            with open(image_path, "rb") as image_file: #use this only if you are sending a base 64
                image_data = image_file.read()
                encoded_image = base64.b64encode(image_data).decode('utf-8')
            
            self.conversation_history.append({
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": image_path, #Note image_path should be aurl or a base 64
                            # set url to f"data:image/jpeg;base64,{encoded_image}" if you are sending a base64
                        },
                    },
                    {
                        "type": "text",
                        "text": text,
                    },
                ],
            })
        elif text:
            # Text-only message
            self.conversation_history.append({
                "role": "user",
                "content": text
            })
        else:
            raise ValueError("Text message is required")
        
        return self
    
    def get_assistant_response(self, max_tokens=300, temperature=1.0, stream=True):
        """Get response from the assistant and update conversation history"""
        response = self.client.chat.completions.create(
            model="workspace/merged-llava-model",
            messages=self.conversation_history,
            stream=stream,
            max_tokens=max_tokens,
            temperature=temperature,
            #you can add other params supported by open ai
            extra_body=self.custom_params
        )
        
        if stream:
            # Handle streaming response
            collected_content = ""
            print("\nAssistant: ", end="", flush=True)
            
            for chunk in response:
                if chunk.choices[0].delta.content is not None:
                    content_piece = chunk.choices[0].delta.content
                    collected_content += content_piece
                    print(content_piece, end="", flush=True)
                    
                    try: 
                        self.tts.stream_text(content_piece)
                    except KeyboardInterrupt:
                        self.tts.stop()
                        pass
                        
            
            print("\n")
            # Allow time for processing to complete
            self.tts.wait_for_completion()
            self.tts.stop()
            
            # Add assistant's response to conversation history
            self.conversation_history.append({
                "role": "assistant",
                "content": collected_content
            })
            
            return collected_content
        else:
            # Handle non-streaming response
            content = response.choices[0].message.content
            
            try: 
                self.tts.stream_text(content)
            except KeyboardInterrupt:
                self.tts.stop()
                pass
          
            self.tts.wait_for_completion()
            self.tts.stop()
            
            # Add assistant's response to conversation history
            self.conversation_history.append({
                "role": "assistant",
                "content": content
            })
            
            return content
    
    def display_conversation(self):
        """Display the entire conversation history"""
        for message in self.conversation_history:
            role = message["role"]
            content = message["content"]
            
            if role == "user" and isinstance(content, list):
                # This is a message with an image
                text_content = next((item["text"] for item in content if item["type"] == "text"), "")
                print(f"User: [Image] {text_content}")
            else:
                print(f"{role.capitalize()}: {content}")
                
                
LLM = ConversationManager()

LLM.custom_params["system_prompt"] = "You are Moremi"
            
LLM.add_user_message(
            text = "Tell me a story"
)
        
LLM.get_assistant_response()

