In [None]:
import os
import speech_recognition as sr
import pyttsx3
import google.generativeai as genai
from dotenv import load_dotenv

# Load .env file if available
load_dotenv()

# Initialize Text-to-Speech engine
tts = pyttsx3.init()

# Configure Gemini with API key
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    raise ValueError("GEMINI_API_KEY not found in environment variables or .env file.")
genai.configure(api_key=api_key)

# Load the Gemini model
model = genai.GenerativeModel("gemini-1.5-flash")

# Initialize recognizer
recognizer = sr.Recognizer()

try:
    with sr.Microphone() as source:
        recognizer.adjust_for_ambient_noise(source, duration=2)
        print("🎤 Listening... Say something after the beep.")
        tts.say("How can I help you?")
        tts.runAndWait()

        audio = recognizer.listen(source, timeout=5, phrase_time_limit=10)

    # Convert speech to text
    query = recognizer.recognize_google(audio)
    user_msg = f"You said: {query}. Please wait while I generate a response."
    print(user_msg)
    tts.say(user_msg)
    tts.runAndWait()

    # Generate response from Gemini
    response = model.generate_content(query)
    print("💬 Gemini says:", response.text)
    tts.say(response.text)
    tts.runAndWait()

except sr.UnknownValueError:
    print("❌ Could not understand audio. Please try again.")
    tts.say("Sorry, I didn't catch that. Please try again.")
    tts.runAndWait()
except sr.RequestError as e:
    print(f"❌ Could not request results from Google Speech Recognition; {e}")
except Exception as e:
    print("❌ Error:", e)


🎤 Listening... Say something after the beep.
❌ Could not understand audio. Please try again.


In [39]:
import pyttsx3

tts = pyttsx3.init()
voices = tts.getProperty('voices')

for i, voice in enumerate(voices):
    print(f"{i}: {voice.name} ({voice.languages}) - {voice.id}")


0: Microsoft David Desktop - English (United States) (['en-US']) - HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_DAVID_11.0
1: Microsoft Zira Desktop - English (United States) (['en-US']) - HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Speech\Voices\Tokens\TTS_MS_EN-US_ZIRA_11.0


In [44]:
import pyttsx3

tts = pyttsx3.init()
voices = tts.getProperty('voices')

# Set to Zira (female voice)
tts.setProperty('voice', voices[0].id)

# Optional: adjust speech rate or volume
tts.setProperty('rate', 160)   # default ~200 wpm
tts.setProperty('volume', 1.0)  # 0.0 to 1.0

tts.say("Hello! I'm Zira. Text to speech is working.")
tts.runAndWait()


In [41]:
import win32com.client

speaker = win32com.client.Dispatch("SAPI.SpVoice")
speaker.Speak("Hello. If you hear this, SAPI is working.")


1

In [42]:
import os
import speech_recognition as sr
import win32com.client
import google.generativeai as genai
from dotenv import load_dotenv

# Load .env file if available
load_dotenv()

# Initialize Windows TTS engine
speaker = win32com.client.Dispatch("SAPI.SpVoice")

def speak(text):
    print("🗣️ Speaking:", text)
    speaker.Speak(text)

# Configure Gemini with API key
api_key = os.getenv("GEMINI_API_KEY")
if not api_key:
    raise ValueError("GEMINI_API_KEY not found in environment variables or .env file.")
genai.configure(api_key=api_key)

# Load the Gemini model
model = genai.GenerativeModel("gemini-1.5-flash")

# Initialize recognizer
recognizer = sr.Recognizer()

try:
    with sr.Microphone() as source:
        recognizer.adjust_for_ambient_noise(source, duration=2)
        print("🎤 Listening... Say something after the beep.")
        speak("How can I help you?")

        audio = recognizer.listen(source, timeout=5, phrase_time_limit=10)

    # Convert speech to text
    query = recognizer.recognize_google(audio)
    user_msg = f"You said: {query}. Please wait while I generate a response."
    print(user_msg)
    speak(user_msg)

    # Generate response from Gemini
    response = model.generate_content(query)
    response_text = str(response.text).strip()
    print("💬 Gemini says:", response_text)
    speak(response_text)

except sr.UnknownValueError:
    print("❌ Could not understand audio. Please try again.")
    speak("Sorry, I didn't catch that. Please try again.")
except sr.RequestError as e:
    print(f"❌ Could not request results from Google Speech Recognition; {e}")
    speak("Speech recognition service failed.")
except Exception as e:
    print("❌ Error:", e)
    speak("An error occurred. Please check the terminal.")


🎤 Listening... Say something after the beep.
🗣️ Speaking: How can I help you?
You said: no I want to understand. Please wait while I generate a response.
🗣️ Speaking: You said: no I want to understand. Please wait while I generate a response.


KeyboardInterrupt: 