In [1]:
import sys
import os
print(f"Python version: {sys.version}")
print(f"Python executable: {sys.executable}")
print(f"Current directory: {os.getcwd()}")
!pip list

Python version: 3.13.2 (tags/v3.13.2:4f8bb39, Feb  4 2025, 15:23:48) [MSC v.1942 64 bit (AMD64)]
Python executable: C:\Program Files\Python313\python.exe
Current directory: C:\Users\Mahir\AppData\Roaming\Python\Python313\Scripts
Package                   Version
------------------------- --------------
annotated-types           0.7.0
anyio                     4.9.0
argon2-cffi               23.1.0
argon2-cffi-bindings      21.2.0
arrow                     1.3.0
asttokens                 3.0.0
async-lru                 2.0.5
attrs                     25.3.0
audioop-lts               0.2.1
babel                     2.17.0
beautifulsoup4            4.13.3
bleach                    6.2.0
certifi                   2025.1.31
cffi                      1.17.1
charset-normalizer        3.4.1
colorama                  0.4.6
comm                      0.2.2
comtypes                  1.4.10
debugpy                   1.8.13
decorator                 5.2.1
defusedxml                0.7.1
distro      

In [2]:
import pyttsx3
try:
    engine = pyttsx3.init()
    engine.say("Testing text to speech.")
    engine.runAndWait()
    print("Text-to-speech test passed.")
except Exception as e:
    print(f"Text-to-speech error: {e}")

Text-to-speech test passed.


In [None]:
import speech_recognition as sr
try:
    recognizer = sr.Recognizer()
    print("Microphones:", sr.Microphone.list_microphone_names())
    with sr.Microphone() as source:
        print("Say something...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source, timeout=5)
        text = recognizer.recognize_google(audio)
        print(f"You said: {text}")
except Exception as e:
    print(f"Speech recognition error: {e}")

Microphones: ['Microsoft Sound Mapper - Input', 'Microphone (High Definition Aud', 'Microsoft Sound Mapper - Output', 'Speakers (High Definition Audio', 'Primary Sound Capture Driver', 'Microphone (High Definition Audio Device)', 'Primary Sound Driver', 'Speakers (High Definition Audio Device)', 'Speakers (High Definition Audio Device)', 'Microphone (High Definition Audio Device)', 'Microphone (HD Audio Mixed capture)', 'Speakers (HD Audio Speaker)']
Say something...


In [None]:
from transformers import pipeline
try:
    llm = pipeline('question-answering', model='distilbert-base-uncased-distilled-squad')
    result = llm(question="What is the capital of France?", context="France is a country in Europe.")
    print(f"LLM answer: {result['answer']}")
except Exception as e:
    print(f"LLM error: {e}")

In [None]:
# Install dependencies
!pip install speechrecognition pyttsx3 transformers torch
!pip install pipwin
!pipwin install pyaudio
!pip install --force-reinstall comtypes

In [None]:
# Install the openai package (run this if not already installed)
!pip install openai

In [None]:
# Install the latest openai package (if not already installed)
!pip install openai --upgrade

In [None]:
import speech_recognition as sr
import pyttsx3
from openai import OpenAI
import sys
import os
import time
import threading
import queue

# Initialize Open AI client with your API key
client = OpenAI(api_key="")  # Replace with your actual Open AI API key

# Print environment info
print(f"Python version: {sys.version}")
print(f"Python executable: {sys.executable}")
print(f"Current directory: {os.getcwd()}")

# Initialize components
recognizer = None
tts_engine = None

# Initialize the recognizer and text-to-speech engine
try:
    recognizer = sr.Recognizer()
    print("Speech recognizer initialized.")
except Exception as e:
    print(f"Recognizer error: {e}")

try:
    tts_engine = pyttsx3.init()
    print("Text-to-speech initialized.")
except Exception as e:
    print(f"TTS error: {e}")

# Check microphones
if recognizer:
    print("Microphones:", sr.Microphone.list_microphone_names())

# Global variables for interruption
current_answer = queue.Queue()  # To store the current answer from Open AI
interrupt_flag = False  # To signal interruption
speaking_thread = None  # To manage the speaking thread
speaking_in_progress = False  # To track if the system is speaking
processing_in_progress = False  # To track if the system is processing a query

# Convert text to speech with interruption support
def speak(text):
    global speaking_in_progress
    if not tts_engine:
        print("TTS not initialized.")
        return
    try:
        # Check if speech is already in progress
        if speaking_in_progress:
            print("Speech already in progress. Skipping current speech.")
            return
        # Stop any ongoing speech
        tts_engine.stop()
        # Reset speaking status
        speaking_in_progress = True
        print(f"Spoke: {text}")
        tts_engine.say(text)
        tts_engine.runAndWait()
        speaking_in_progress = False  # Speech finished
    except Exception as e:
        print(f"Speak error: {e}")

# Thread to speak the answer
def speak_in_thread(text):
    global speaking_thread
    speaking_thread = threading.Thread(target=speak, args=(text,))
    speaking_thread.start()

# Capture voice input with retry mechanism
def listen(max_retries=3, timeout=5):
    if not recognizer:
        print("Recognizer not initialized.")
        return None
    for attempt in range(max_retries):
        try:
            with sr.Microphone() as source:
                print(f"Listening... (Attempt {attempt + 1}/{max_retries})")
                recognizer.adjust_for_ambient_noise(source, duration=1)
                audio = recognizer.listen(source, timeout=timeout, phrase_time_limit=5)
                text = recognizer.recognize_google(audio)
                print(f"You said: {text}")
                return text
        except sr.UnknownValueError:
            print("Could not understand audio.")
            if attempt < max_retries - 1:
                speak("I didn't catch that. Please try again.")
            time.sleep(1)
        except sr.RequestError as e:
            print(f"Speech recognition error: {e}")
            speak("There was an error with speech recognition. Please try again.")
            return None
        except Exception as e:
            print(f"Listen error: {e}")
            speak("An error occurred. Please try again.")
            return None
    return None

# Non-blocking listen to detect interruption
def listen_for_interrupt(timeout=1):
    try:
        with sr.Microphone() as source:
            recognizer.adjust_for_ambient_noise(source, duration=0.5)
            audio = recognizer.listen(source, timeout=timeout, phrase_time_limit=2)
            text = recognizer.recognize_google(audio)
            print(f"Interrupt detected: {text}")
            return text
    except (sr.UnknownValueError, sr.RequestError, sr.WaitTimeoutError):
        return None
    except Exception as e:
        print(f"Interrupt listen error: {e}")
        return None

# Process with Open AI model in a thread
def process_with_llm(question):
    global interrupt_flag, current_answer
    try:
        # Check for interruption while generating
        start_time = time.time()
        while time.time() - start_time < 10:  # Timeout for API call
            if interrupt_flag:
                print("Generation interrupted.")
                return None
            # Use the new Open AI API client
            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant for general knowledge questions."},
                    {"role": "user", "content": question}
                ],
                max_tokens=150,
                temperature=0.7
            )
            answer = response.choices[0].message.content.strip()
            print(f"LLM answer: {answer}")
            current_answer.put(answer)
            return answer
    except Exception as e:
        print(f"Open AI error: {e}")
        return "Sorry, I couldn't process that question."

# Thread to process LLM query
def process_in_thread(question):
    global processing_in_progress
    processing_in_progress = True
    answer = process_with_llm(question)
    if answer and not interrupt_flag:
        speak_in_thread(answer)
    processing_in_progress = False

# Main loop with interruption support
def voice_controlled_llm():
    global interrupt_flag, speaking_thread
    if not all([recognizer, tts_engine]):
        print("One or more components failed to initialize. Check errors above.")
        return
    speak("Welcome to the voice-controlled LLM using Open AI. Ask me a question. Say 'exit' to stop.")
    while True:
        try:
            # Reset interrupt flag
            interrupt_flag = False
            # Clear the answer queue
            while not current_answer.empty():
                current_answer.get()
            # Listen for the initial question
            question = listen()
            if question:
                if question.lower() in ['exit', 'quit', 'stop']:
                    speak("Goodbye!")
                    break
                # Start processing the question in a separate thread
                processing_thread = threading.Thread(target=process_in_thread, args=(question,))
                processing_thread.start()

                # Listen for interruption while processing or speaking
                while processing_thread.is_alive() or speaking_in_progress:
                    interrupt_input = listen_for_interrupt(timeout=1)
                    if interrupt_input:
                        print("Interruption detected. Stopping current process.")
                        # Stop current speech and reset the flag
                        tts_engine.stop()
                        speaking_in_progress = False
                        # Process the new query immediately
                        new_answer = process_with_llm(interrupt_input)
                        if new_answer:
                            speak_in_thread(new_answer)
                        break
            else:
                speak("I couldn't understand you after several attempts. Please try again.")
        except KeyboardInterrupt:
            speak("Program interrupted. Goodbye!")
            break
        except Exception as e:
            print(f"Unexpected error: {e}")
            speak("An unexpected error occurred. Please try again.")

# Run
voice_controlled_llm()


Python version: 3.13.2 (tags/v3.13.2:4f8bb39, Feb  4 2025, 15:23:48) [MSC v.1942 64 bit (AMD64)]
Python executable: C:\Program Files\Python313\python.exe
Current directory: C:\Users\Mahir\AppData\Roaming\Python\Python313\Scripts
Speech recognizer initialized.
Text-to-speech initialized.
Microphones: ['Microsoft Sound Mapper - Input', 'Microphone (High Definition Aud', 'Microsoft Sound Mapper - Output', 'Speakers (High Definition Audio', 'Primary Sound Capture Driver', 'Microphone (High Definition Audio Device)', 'Primary Sound Driver', 'Speakers (High Definition Audio Device)', 'Speakers (High Definition Audio Device)', 'Microphone (High Definition Audio Device)', 'Microphone (HD Audio Mixed capture)', 'Speakers (HD Audio Speaker)']
Spoke: Welcome to the voice-controlled LLM using Open AI. Ask me a question. Say 'exit' to stop.
Listening... (Attempt 1/3)
You said: about iPhone
LLM answer: Sure, how can I help you with information about iPhones?
Spoke: Sure, how can I help you with info

In [None]:
import speech_recognition as sr
import pyttsx3
from openai import OpenAI
import sys
import os
import time
import threading
import queue

# Initialize Open AI client with your API key
client = OpenAI(api_key="")  # Replace with your actual Open AI API key

# Print environment info
print(f"Python version: {sys.version}")
print(f"Python executable: {sys.executable}")
print(f"Current directory: {os.getcwd()}")

# Initialize components
recognizer = None
tts_engine = None

# Initialize the recognizer and text-to-speech engine
try:
    recognizer = sr.Recognizer()
    print("Speech recognizer initialized.")
except Exception as e:
    print(f"Recognizer error: {e}")

try:
    tts_engine = pyttsx3.init()
    print("Text-to-speech initialized.")
except Exception as e:
    print(f"TTS error: {e}")

# Check microphones
if recognizer:
    print("Microphones:", sr.Microphone.list_microphone_names())

# Global variables for interruption
current_answer = queue.Queue()  # To store the current answer from Open AI
interrupt_flag = False  # To signal interruption
speaking_thread = None  # To manage the speaking thread
speaking_in_progress = False  # To track if the system is speaking
processing_in_progress = False  # To track if the system is processing a query

# Convert text to speech with interruption support
def speak(text):
    global speaking_in_progress
    if not tts_engine:
        print("TTS not initialized.")
        return
    try:
        # Check if speech is already in progress
        if speaking_in_progress:
            print("Speech already in progress. Skipping current speech.")
            return
        # Stop any ongoing speech
        tts_engine.stop()
        # Reset speaking status
        speaking_in_progress = True
        print(f"Spoke: {text}")
        tts_engine.say(text)
        tts_engine.runAndWait()
        speaking_in_progress = False  # Speech finished
    except Exception as e:
        print(f"Speak error: {e}")

# Thread to speak the answer
def speak_in_thread(text):
    global speaking_thread
    speaking_thread = threading.Thread(target=speak, args=(text,))
    speaking_thread.start()

# Capture voice input with retry mechanism
def listen(max_retries=3, timeout=5):
    if not recognizer:
        print("Recognizer not initialized.")
        return None
    for attempt in range(max_retries):
        try:
            with sr.Microphone() as source:
                print(f"Listening... (Attempt {attempt + 1}/{max_retries})")
                recognizer.adjust_for_ambient_noise(source, duration=1)
                audio = recognizer.listen(source, timeout=timeout, phrase_time_limit=5)
                text = recognizer.recognize_google(audio)
                print(f"You said: {text}")
                return text
        except sr.UnknownValueError:
            print("Could not understand audio.")
            if attempt < max_retries - 1:
                speak("I didn't catch that. Please try again.")
            time.sleep(1)
        except sr.RequestError as e:
            print(f"Speech recognition error: {e}")
            speak("There was an error with speech recognition. Please try again.")
            return None
        except Exception as e:
            print(f"Listen error: {e}")
            speak("An error occurred. Please try again.")
            return None
    return None

# Non-blocking listen to detect interruption
def listen_for_interrupt(timeout=1):
    try:
        with sr.Microphone() as source:
            recognizer.adjust_for_ambient_noise(source, duration=0.5)
            audio = recognizer.listen(source, timeout=timeout, phrase_time_limit=2)
            text = recognizer.recognize_google(audio)
            print(f"Interrupt detected: {text}")
            return text
    except (sr.UnknownValueError, sr.RequestError, sr.WaitTimeoutError):
        return None
    except Exception as e:
        print(f"Interrupt listen error: {e}")
        return None

# Process with Open AI model in a thread
def process_with_llm(question):
    global interrupt_flag, current_answer
    try:
        # Check for interruption while generating
        start_time = time.time()
        while time.time() - start_time < 10:  # Timeout for API call
            if interrupt_flag:
                print("Generation interrupted.")
                return None
            # Use the new Open AI API client
            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant for general knowledge questions."},
                    {"role": "user", "content": question}
                ],
                max_tokens=150,
                temperature=0.7
            )
            answer = response.choices[0].message.content.strip()
            print(f"LLM answer: {answer}")
            current_answer.put(answer)
            return answer
    except Exception as e:
        print(f"Open AI error: {e}")
        return "Sorry, I couldn't process that question."

# Thread to process LLM query
def process_in_thread(question):
    global processing_in_progress
    processing_in_progress = True
    answer = process_with_llm(question)
    if answer and not interrupt_flag:
        speak_in_thread(answer)
    processing_in_progress = False

# Main loop with interruption support
def voice_controlled_llm():
    global interrupt_flag, speaking_thread
    if not all([recognizer, tts_engine]):
        print("One or more components failed to initialize. Check errors above.")
        return
    speak("Welcome to the voice-controlled LLM using Open AI. Ask me a question. Say 'exit' to stop.")
    while True:
        try:
            # Reset interrupt flag
            interrupt_flag = False
            # Clear the answer queue
            while not current_answer.empty():
                current_answer.get()
            # Listen for the initial question
            question = listen()
            if question:
                if question.lower() in ['exit', 'quit', 'stop']:
                    speak("Goodbye!")
                    break
                # Start processing the question in a separate thread
                processing_thread = threading.Thread(target=process_in_thread, args=(question,))
                processing_thread.start()

                # Listen for interruption while processing or speaking
                while processing_thread.is_alive() or speaking_in_progress:
                    interrupt_input = listen_for_interrupt(timeout=1)
                    if interrupt_input:
                        print("Interruption detected. Stopping current process.")
                        # Stop the ongoing speech immediately
                        tts_engine.stop()
                        speaking_in_progress = False  # Reset the speaking flag
                        # Process the new query immediately
                        new_answer = process_with_llm(interrupt_input)
                        if new_answer:
                            speak_in_thread(new_answer)
                        break
            else:
                speak("I couldn't understand you after several attempts. Please try again.")
        except KeyboardInterrupt:
            speak("Program interrupted. Goodbye!")
            break
        except Exception as e:
            print(f"Unexpected error: {e}")
            speak("An unexpected error occurred. Please try again.")

# Run
voice_controlled_llm()


Python version: 3.13.2 (tags/v3.13.2:4f8bb39, Feb  4 2025, 15:23:48) [MSC v.1942 64 bit (AMD64)]
Python executable: C:\Program Files\Python313\python.exe
Current directory: C:\Users\Mahir\AppData\Roaming\Python\Python313\Scripts
Speech recognizer initialized.
Text-to-speech initialized.
Microphones: ['Microsoft Sound Mapper - Input', 'Microphone (High Definition Aud', 'Microsoft Sound Mapper - Output', 'Speakers (High Definition Audio', 'Primary Sound Capture Driver', 'Microphone (High Definition Audio Device)', 'Primary Sound Driver', 'Speakers (High Definition Audio Device)', 'Speakers (High Definition Audio Device)', 'Microphone (High Definition Audio Device)', 'Microphone (HD Audio Mixed capture)', 'Speakers (HD Audio Speaker)']
Spoke: Welcome to the voice-controlled LLM using Open AI. Ask me a question. Say 'exit' to stop.
Listening... (Attempt 1/3)
You said: build me a diet
LLM answer: I can provide you with some general guidelines for building a healthy and balanced diet. It's i