# 🤖🗣️✨ **AI Voice Assistant with Text & Voice Command Interface**

A smart desktop assistant that understands both voice and text commands.Features include AI chat, weather, reminders, app launching, and more.

**Created by Arya Kant Pathak and Saswat Subhang**

## 📚✨ **Import Libraries**

In [12]:
import os
import re
import time
import datetime
import webbrowser
import threading
import subprocess
import tkinter as tk
from tkinter import messagebox, scrolledtext
print("[✔] Tkinter modules imported successfully.")

import wikipedia
print("[✔] Wikipedia module imported.")

import sounddevice as sd
import wavio
print("[✔] Audio input modules (sounddevice, wavio) imported.")

import speech_recognition as sr
print("[✔] SpeechRecognition module imported.")

import pyttsx3
print("[✔] Text-to-speech module (pyttsx3) imported.")

import requests
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)  # Disable SSL warning
print("[✔] Requests and urllib3 imported (SSL warnings disabled).")

from PIL import ImageGrab
print("[✔] PIL.ImageGrab imported for screenshots.")

from openai import OpenAI
print("[✔] OpenAI module imported.")

import random
print("[✔] Random module imported.")

print("\n✅ All Libraries Imported Successfully.\n")

[✔] Tkinter modules imported successfully.
[✔] Wikipedia module imported.
[✔] Audio input modules (sounddevice, wavio) imported.
[✔] SpeechRecognition module imported.
[✔] Text-to-speech module (pyttsx3) imported.
[✔] PIL.ImageGrab imported for screenshots.
[✔] OpenAI module imported.
[✔] Random module imported.

✅ All Libraries Imported Successfully.



## 🔐🗂️ **Load Credentials**

In [13]:
def load_secrets(filename=r"C:\Users\aryak\OneDrive\Desktop\python codes\secret.txt"):
    print("🔍 Loading secrets...")
    secrets = {}
    if not os.path.exists(filename):
        print(f"❌ Secrets file '{filename}' not found.")
        raise FileNotFoundError(f"Secrets file '{filename}' not found.")
    with open(filename, "r") as f:
        for line in f:
            if "=" in line:
                key, value = line.strip().split("=", 1)
                secrets[key.strip()] = value.strip().strip('"')
    print("✅ Credentials Loaded Successfully")
    return secrets

SECRETS = load_secrets()

🔍 Loading secrets...
✅ Credentials Loaded Successfully


## 🤖🔌 **Initialize OpenAI Router Client**

In [14]:
print("🔐 Initializing OpenAI client with custom base URL...")
client = OpenAI(api_key=SECRETS["OPENROUTER_API_KEY"], base_url="https://openrouter.ai/api/v1")
print("✅ OpenAI client initialized successfully.")

🔐 Initializing OpenAI client with custom base URL...
✅ OpenAI client initialized successfully.


## 💬📜 **Fallback Quotes**

In [15]:
FALLBACK_QUOTES = [
    "The best way to get started is to quit talking and begin doing. — Walt Disney",
    "Don’t let yesterday take up too much of today. — Will Rogers",
    "It’s not whether you get knocked down, it’s whether you get up. — Vince Lombardi"
]

print(f"✅ Loaded {len(FALLBACK_QUOTES)} fallback motivational quotes.")

✅ Loaded 3 fallback motivational quotes.


## 🏠🖥️ **Home Screen**

In [18]:
class HomeScreen:
    def __init__(self, root):
        self.root = root
        self.root.title("AI Assistant Launcher")
        self.root.geometry("400x300")
        self.root.configure(bg="#2c3e50")
        
        print("🟦 Home screen initialized.")

        tk.Label(root, text="Hello! How may I help you?", font=("Arial", 16), bg="#2c3e50", fg="white").pack(pady=30)
        tk.Button(root, text="🎤 Voice Command Mode", command=self.launch_voice_mode,
                  font=("Arial", 14), bg="#3498db", fg="white", padx=10, pady=10).pack(pady=10)
        tk.Button(root, text="🤖 Launch AI Assistant", command=self.launch_ai_assistant,
                  font=("Arial", 14), bg="#1abc9c", fg="white", padx=10, pady=10).pack(pady=10)

        print("✅ UI elements (label + buttons) created successfully.")

    def launch_voice_mode(self):
        print("🎤 Launching Voice Command Mode...")
        self.root.destroy()
        root_voice = tk.Tk()
        SimpleVoiceMode(root_voice)
        print("✅ Voice Command Mode initialized successfully.")
        root_voice.mainloop()

    def launch_ai_assistant(self):
        print("🤖 Launching Full AI Assistant...")
        self.root.destroy()
        root_ai = tk.Tk()
        AIVoiceAssistantApp(root_ai)
        print("✅ AI Assistant launched successfully.")
        root_ai.mainloop()

print("🧪 HomeScreen class defined successfully and ready for use.")

🧪 HomeScreen class defined successfully and ready for use.


## 🎤🧠 **Voice Command**

In [19]:
class SimpleVoiceMode:
    def __init__(self, root):
        self.root = root
        self.tts = pyttsx3.init()
        self.root.title("Voice Command Assistant")
        self.root.geometry("600x500")
        self.root.configure(bg="#1e272e")

        print("🟪 Voice Command UI initialized.")

        tk.Label(root, text="🎤 Speak a command", font=("Arial", 18, "bold"), bg="#1e272e", fg="#ffffff").pack(pady=10)
        self.output_label = tk.Label(root, text="Waiting for your command...",
                                     font=("Arial", 12), bg="#1e272e", fg="lightgrey")
        self.output_label.pack(pady=5)

        tk.Button(root, text="🎧 Start Listening", command=self.listen,
                  font=("Arial", 14), bg="#0fbcf9", fg="white", padx=15, pady=5).pack(pady=10)

        self.response_box = scrolledtext.ScrolledText(root, width=60, height=10, font=("Arial", 11),
                                                      bg="#2d3436", fg="white", wrap=tk.WORD)
        self.response_box.pack(pady=10)
        self.response_box.insert(tk.END, "📂 Assistant response will appear here.\n")
        self.response_box.config(state=tk.DISABLED)

        self.instructions_box = scrolledtext.ScrolledText(root, width=60, height=8, font=("Arial", 10),
                                                          bg="#485460", fg="white")
        self.instructions_box.pack(pady=10)
        self.instructions_box.insert(tk.END, self.get_instructions())
        self.instructions_box.config(state=tk.DISABLED)

        print("✅ Voice command screen fully loaded.")

    def get_instructions(self):
        return (
            "📝 Try saying:\n"
            "- Open calculator / notepad / paint / task manager\n"
            "- Open google.com or youtube\n"
            "- What time is it / What's the date\n"
            "- What's the weather in [city]\n"
            "- Remind me to drink water\n"
            "- Tell me about {Topic}\n"
            "- Say a quote / give me a quote\n"
            "- Say a fact\n"
            "- Take screenshot\n"
            "- Stop"
        )

    def speak_text(self, text):
        print(f"🗣 Speaking: {text}")
        self.tts.say(text)
        self.tts.runAndWait()

    def listen(self):
        print("🎧 Listening started...")
        threading.Thread(target=self.capture_command, daemon=True).start()

    def capture_command(self):
        filename = "temp.wav"
        try:
            print("🎙 Recording audio...")
            self.output_label.config(text="Listening...", fg="yellow")
            self.speak_text("Listening...")
            recording = sd.rec(int(4 * 44100), samplerate=44100, channels=1, dtype='int16')
            sd.wait()
            wavio.write(filename, recording, 44100, sampwidth=2)

            recognizer = sr.Recognizer()
            with sr.AudioFile(filename) as source:
                audio = recognizer.record(source)
            text = recognizer.recognize_google(audio).lower()
            print(f"📝 Recognized command: {text}")
            self.output_label.config(text=f"You said: {text}", fg="lightgreen")
            self.process_command(text)

        except Exception as e:
            print(f"❌ Voice recognition error: {e}")
            self.output_label.config(text="Error: Try again.", fg="red")
            messagebox.showerror("Error", f"Could not understand. {e}")
        finally:
            for _ in range(5):
                try:
                    if os.path.exists(filename):
                        os.remove(filename)
                        print("🧹 Temp file deleted.")
                    break
                except PermissionError:
                    time.sleep(0.1)

    def show_response(self, text):
        print(f"📣 Response: {text}")
        self.response_box.config(state=tk.NORMAL)
        self.response_box.insert(tk.END, f"\n📣 {text}")
        self.response_box.yview(tk.END)
        self.response_box.config(state=tk.DISABLED)
        self.speak_text(text)

    def process_command(self, cmd):
        cmd = cmd.lower()
        print(f"⚙️ Processing command: {cmd}")
        response = "I didn't understand that command."

        apps = {
            "calculator": "calc.exe",
            "notepad": "notepad.exe",
            "paint": "mspaint.exe",
            "file explorer": "explorer",
            "task manager": "taskmgr",
            "cmd": "cmd"
        }

        if "open" in cmd:
            app = cmd.split("open", 1)[-1].strip()
            if app in apps:
                subprocess.Popen(apps[app])
                response = f"Opening {app}..."
            elif ".com" in app or ".in" in app:
                if not app.startswith("http"):
                    app = "https://" + app
                webbrowser.open(app)
                response = f"Opening {app}..."
            else:
                web_url = f"https://{app}.com"
                webbrowser.open(web_url)
                response = f"Opening {app}..."

        elif "time" in cmd:
            now = datetime.datetime.now().strftime("%I:%M %p")
            response = f"The time is {now}."

        elif "date" in cmd:
            today = datetime.datetime.now().strftime("%A, %d %B %Y")
            response = f"Today is {today}."

        elif "weather" in cmd:
            city = "your city"
            if "in" in cmd:
                city = cmd.split("in", 1)[-1].strip()
            try:
                res = requests.get(f"https://wttr.in/{city}?format=3")
                response = res.text.strip()
            except:
                response = "Sorry, I couldn't fetch the weather."

        elif any(kw in cmd for kw in ["tell me about", "who is", "what is"]):
            keyword = re.sub(r"(tell me about|who is|what is)", "", cmd).strip()
            try:
                summary = wikipedia.summary(keyword, sentences=2, auto_suggest=False, redirect=True)
                response = summary
            except:
                response = f"I couldn't find any information about {keyword}."

        elif "quote" in cmd or "say a quote" in cmd:
            try:
                headers = {'Accept': 'application/json'}
                res = requests.get("https://api.quotable.io/random", headers=headers, timeout=5, verify=False)
                if res.status_code == 200:
                    data = res.json()
                    response = f"Here's a quote: \"{data['content']}\" — {data['author']}"
                else:
                    raise Exception("Bad response")
            except Exception as e:
                print("⚠️ Quote API error:", e)
                response = random.choice(FALLBACK_QUOTES)

        elif "fact" in cmd:
            try:
                res = requests.get("https://uselessfacts.jsph.pl/random.json?language=en", timeout=5).json()
                response = res["text"]
            except:
                response = "Couldn't get a fact now."

        elif "remind me" in cmd:
            reminder = cmd.split("remind me", 1)[-1].strip()
            response = f"Reminder noted: {reminder}"

        elif "screenshot" in cmd:
            try:
                img = ImageGrab.grab()
                path = os.path.join(os.getcwd(), "screenshot.png")
                img.save(path)
                response = f"Screenshot saved to: {path}"
            except:
                response = "Could not take screenshot."

        elif "stop" in cmd or "exit" in cmd:
            print("🛑 Exiting voice assistant.")
            self.root.quit()
            return

        self.show_response(response)
print("🧪 SimpleVoiceMode class defined successfully.")

🧪 SimpleVoiceMode class defined successfully.


## 🗣️🤖 **Voice Assistant**

In [20]:
class AIVoiceAssistantApp:
    def __init__(self, root):
        print("🟩 Initializing AI Voice Assistant App...")
        self.root = root
        self.chat_history = [{"role": "system", "content": SECRETS["SYSTEM_PROMPT"]}]
        self.answer_history = []
        wikipedia.set_lang("en")
        self.tts_engine = pyttsx3.init()
        self.setup_gui()
        print("✅ AI Voice Assistant App initialized successfully.")

    def setup_gui(self):
        print("🎨 Setting up GUI...")
        self.root.title("AI Assistant")
        self.root.geometry("600x700")
        self.root.configure(bg="#2c3e50")

        self.conversation_log = scrolledtext.ScrolledText(self.root, wrap=tk.WORD, bg="#ecf0f1", fg="#2c3e50",
                                                          font=("Helvetica", 10), state=tk.DISABLED)
        self.conversation_log.pack(padx=10, pady=10, fill="both", expand=True)

        self.entry_frame = tk.Frame(self.root, bg="#2c3e50")
        self.entry_frame.pack(pady=5)

        self.input_entry = tk.Entry(self.entry_frame, font=("Arial", 12), width=50)
        self.input_entry.pack(side="left", padx=5)
        self.input_entry.bind("<Return>", lambda e: self.process_text_input())

        tk.Button(self.entry_frame, text="Send", command=self.process_text_input, bg="#3498db", fg="white").pack(side="left", padx=5)
        tk.Button(self.entry_frame, text="🎤 Voice", command=self.speak_input, bg="#1abc9c", fg="white").pack(side="left", padx=5)
        print("✅ GUI setup complete.")

    def update_log(self, speaker, message):
        self.conversation_log.config(state=tk.NORMAL)
        self.conversation_log.insert(tk.END, f"{speaker}: {message}\n\n")
        self.conversation_log.config(state=tk.DISABLED)
        self.conversation_log.yview(tk.END)

    def speak(self, text):
        print(f"🗣 Assistant speaking: {text}")
        self.update_log("Assistant", text)
        self.tts_engine.say(text)
        self.tts_engine.runAndWait()

    def process_text_input(self):
        text = self.input_entry.get().strip()
        if not text:
            print("⚠️ Empty input. Ignoring.")
            return
        print(f"📨 Text input received: {text}")
        self.input_entry.delete(0, tk.END)
        self.update_log("You", text)
        self.ask_ai(text)

    def speak_input(self):
        print("🎤 Capturing voice input...")
        threading.Thread(target=self.capture_and_process_voice, daemon=True).start()

    def capture_and_process_voice(self):
        filename = "temp_ai_voice.wav"
        try:
            print("🎙 Recording voice...")
            recording = sd.rec(int(5 * 44100), samplerate=44100, channels=1, dtype='int16')
            sd.wait()
            wavio.write(filename, recording, 44100, sampwidth=2)

            recognizer = sr.Recognizer()
            with sr.AudioFile(filename) as source:
                audio = recognizer.record(source)
            text = recognizer.recognize_google(audio)
            print(f"📝 Recognized voice input: {text}")
            self.update_log("You (voice)", text)
            self.ask_ai(text)
        except Exception as e:
            print(f"❌ Voice recognition failed: {e}")
            self.speak("Sorry, I could not understand.")
        finally:
            for _ in range(5):
                try:
                    if os.path.exists(filename):
                        os.remove(filename)
                        print("🧹 Temp voice file deleted.")
                    break
                except PermissionError:
                    time.sleep(0.1)

    def ask_ai(self, user_input):
        print(f"🤖 Sending input to AI: {user_input}")
        self.chat_history.append({"role": "user", "content": user_input})
        try:
            response = client.chat.completions.create(
                model=SECRETS["MODEL"],
                messages=self.chat_history,
                max_tokens=500,
                temperature=0.7
            )
            reply = response.choices[0].message.content.strip()
            print(f"✅ AI Response received: {reply}")
            self.speak(reply)
            self.chat_history.append({"role": "assistant", "content": reply})
            self.answer_history.append(reply)
        except Exception as e:
            print("❌ API Error:", e)
            self.speak("I'm having trouble thinking right now.")

# ✅ Class defined test print
print("🧪 AIVoiceAssistantApp class defined successfully.")

🧪 AIVoiceAssistantApp class defined successfully.


## 🧩🚀 **Main Function**

In [21]:
if __name__ == "__main__":
    root = tk.Tk()
    HomeScreen(root)
    root.mainloop()

🟦 Home screen initialized.
✅ UI elements (label + buttons) created successfully.
