In [6]:
pip install google-generativeai speechrecognition pyttsx3 pyaudio python-dotenv

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [8]:
import os
import time
import speech_recognition as sr
import pyttsx3
from dotenv import load_dotenv
import google.generativeai as genai

# ------------------ Load environment variables ------------------
load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

if not api_key:
    raise ValueError("GEMINI_API_KEY not found in .env file.")

# ------------------ Configure Gemini API ------------------
genai.configure(api_key=api_key)
model = genai.GenerativeModel(model_name="models/gemini-1.5-flash")

# ------------------ Create folders and log file if not exist ------------------
if not os.path.exists("responses"):
    os.makedirs("responses")

log_file = "chat_log.txt"
if not os.path.exists(log_file):
    with open(log_file, "w", encoding="utf-8") as f:
        f.write("=== Gemini Voice Assistant Log ===\n\n")

# ------------------ Voice Input ------------------
def get_voice_input():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("\n🎤 Speak now (say 'exit' to stop)...")
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)

    try:
        text = recognizer.recognize_google(audio)
        print("🗣 You said:", text)
        return text.lower()
    except sr.UnknownValueError:
        print("❌ Couldn't understand. Please try again.")
        return None
    except sr.RequestError as e:
        print("⚠ Error with speech recognition:", str(e))
        return None

# ------------------ Gemini Reply ------------------
def get_gemini_response(prompt):
    try:
        response = model.generate_content(prompt)
        reply = response.text.strip()
        if not reply:
            return "🤔 Sorry, I couldn't think of a good response."
        return reply
    except Exception as e:
        return f"⚠ Gemini API error: {str(e)}"

# ------------------ Speak and Save ------------------
def speak_and_save(text, count):
    engine = pyttsx3.init()
    filename = f"responses/response_{count}.mp3"

    # Save audio file
    engine.save_to_file(text, filename)
    engine.runAndWait()

    # Also speak out loud
    print(f"🔊 Gemini says: {text}")
    engine.say(text)
    engine.runAndWait()

    print(f"💾 Response saved to {filename}")

# ------------------ Log Conversation ------------------
def log_conversation(user, gemini):
    with open(log_file, "a", encoding="utf-8") as f:
        timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
        f.write(f"[{timestamp}]\nYou: {user}\nGemini: {gemini}\n\n")

# ------------------ Main Loop ------------------
if __name__ == "__main__":
    print("🤖 Hello! I am your Gemini voice assistant.")
    print("🎙 Just speak to me. Say 'exit' or 'quit' anytime to stop.\n")

    response_count = 1

    while True:
        user_input = get_voice_input()
        if user_input:
            if user_input in ["exit", "quit"]:
                goodbye = "Goodbye! Have a great day."
                print("👋", goodbye)
                speak_and_save(goodbye, response_count)
                break

            gemini_reply = get_gemini_response(user_input)
            speak_and_save(gemini_reply, response_count)
            log_conversation(user_input, gemini_reply)

            response_count += 1

🤖 Hello! I am your Gemini voice assistant.
🎙 Just speak to me. Say 'exit' or 'quit' anytime to stop.


🎤 Speak now (say 'exit' to stop)...
🗣 You said: introduction to Prompt engineering
🔊 Gemini says: Prompt engineering is the art and science of designing effective prompts to elicit desired outputs from large language models (LLMs) like GPT-3, LaMDA, and others.  It's about crafting the right questions, instructions, or requests to guide the model towards generating the specific text, code, or other content you need.  Think of it as having a conversation with a very powerful, but somewhat literal-minded, assistant.  Your ability to communicate clearly and effectively determines the quality of the results.

Here's a breakdown of key aspects:

**1. The Goal:**  Before crafting a prompt, define what you want the LLM to do.  This might be:

* **Generating creative text:**  Writing stories, poems, scripts, articles, etc.
* **Summarizing information:** Condensing long texts into concise summ