In [None]:
import os
import pyttsx3
import speech_recognition as sr
import datetime
import time
import pywhatkit
import wikipedia
import webbrowser
import pyautogui
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

class Assistant:
    def __init__(self):
        self.engine = pyttsx3.init("sapi5")
        self.voices = self.engine.getProperty("voices")
        self.current_voice = "female"  # Default voice set to female
        self.set_voice(voice_gender=self.current_voice)  # Set the default voice to female
        self.set_rate(170)

    def set_voice(self, voice_gender="male"):
        """Set Assistant Voice male or female"""
        if voice_gender.lower() == "male":
            self.engine.setProperty("voice", self.voices[0].id)
            self.current_voice = "male"
        elif voice_gender.lower() == "female":
            if len(self.voices) > 1:
                self.engine.setProperty("voice", self.voices[1].id)
                self.current_voice = "female"
            else:
                print("Female voice not available. Using default male voice.")
        else:
            print("Invalid gender. Please choose 'male' or 'female'.")

    def toggle_voice(self):
        """Toggle the voice between male and female"""
        if self.current_voice == "male":
            self.set_voice("female")
            self.speak("Voice changed to female.")
        else:
            self.set_voice("male")
            self.speak("Voice changed to male.")

    def set_rate(self, rate=170):
        """Set the rate of speech."""
        self.engine.setProperty("rate", rate)

    def speak(self, audio):
        """Convert text to speech."""
        self.engine.say(audio)
        self.engine.runAndWait()

    def take_command(self):
        """Capture and process user voice input."""
        recognizer = sr.Recognizer()
        with sr.Microphone() as source:
            print("Listening...")
            recognizer.pause_threshold = 1
            recognizer.energy_threshold = 300
            try:
                audio = recognizer.listen(source, timeout=4, phrase_time_limit=4)
                print("Processing...")
                query = recognizer.recognize_google(audio, language='en-bn')
                print(f"You said: {query}")
                return query.lower()
            except sr.UnknownValueError:
                print("Could not understand your voice. Please try again.")
            except sr.RequestError as e:
                print(f"Could not request results; {e}")
            except Exception as e:
                print(f"An error occurred: {e}")
            return "None"


class SearchingFromWeb:
    def __init__(self, assistant):
        self.assistant = assistant

    def search_google(self, command):
        """Search Google or Wikipedia based on the user's command."""
        if "google" in command:
            self.assistant.speak("Searching on Google...")
            command = command.replace("google", "").strip()
            try:
                pywhatkit.search(command)
                result = wikipedia.summary(command, sentences=1)
                self.assistant.speak(result)
            except Exception:
                self.assistant.speak("I couldn't find any relevant results.")

    def search_youtube(self, command):
        """Search YouTube based on the user's command."""
        if "youtube" in command:
            self.assistant.speak("Searching on YouTube...")
            command = command.replace("youtube", "").strip()
            pywhatkit.playonyt(command)
            self.assistant.speak("Playing your selection.")

    def search_wikipedia(self, command):
        """Search Wikipedia based on the user's command."""
        if "wikipedia" in command:
            self.assistant.speak("Searching on Wikipedia...")
            command = command.replace("wikipedia", "").strip()
            try:
                result = wikipedia.summary(command, sentences=2)
                self.assistant.speak(f"According to Wikipedia: {result}")
            except wikipedia.exceptions.DisambiguationError:
                self.assistant.speak("There are multiple results for your query.")
            except wikipedia.exceptions.PageError:
                self.assistant.speak("I couldn't find any results.")
            except Exception as e:
                self.assistant.speak(f"An error occurred: {e}")


class ControlAppWeb:
    """Control apps and web browsers."""

    def __init__(self, assistant):
        self.assistant = assistant
        self.dict_app = {
            "command prompt": "cmd",
            "paint": "mspaint",
            "word": "winword",
            "excel": "excel",
            "chrome": "chrome",
            "vscode": "code",
            "powerpoint": "powerpnt",
        }

    def open_webapp(self, command):
        """Open a website or application."""
        self.assistant.speak("Opening, sir.")
        if ".com" in command or ".org" in command:
            webbrowser.open(f"https://www.{command.replace('open', '').strip()}")
        else:
            for app, process in self.dict_app.items():
                if app in command:
                    os.system(f"start {process}")

    def close_app_web(self, command):
        """Close tabs or applications."""
        if "tab" in command:
            count = int(command.split(" ")[0]) if command.split(" ")[0].isdigit() else 1
            for _ in range(count):
                pyautogui.hotkey("ctrl", "w")
                time.sleep(0.2)
            self.assistant.speak(f"Closed {count} tab(s).")
        else:
            for app, process in self.dict_app.items():
                if app in command:
                    os.system(f"taskkill /f /im {process}.exe")

    def open_new_tab(self):
        """Open a new browser tab."""
        self.assistant.speak("Opening a new tab, sir.")
        pyautogui.hotkey("ctrl", "t")

    def navigate_tabs(self, direction="next"):
        """Navigate to the next or previous tab."""
        if direction == "next":
            self.assistant.speak("Navigating to the next tab, sir.")
            pyautogui.hotkey("ctrl", "tab")
        elif direction == "previous":
            self.assistant.speak("Navigating to the previous tab, sir.")
            pyautogui.hotkey("ctrl", "shift", "tab")
        else:
            self.assistant.speak("Invalid command. Please say 'next' or 'previous', sir.")


def wish_me(assistant):
    """Greet the user based on the time of day."""
    hour = int(datetime.datetime.now().hour)
    if 6 <= hour < 12:
        assistant.speak("Good morning, sir.")
    elif 12 <= hour < 18:
        assistant.speak("Good afternoon, sir.")
    else:
        assistant.speak("Good evening, sir.")


class Chatbot:
    def __init__(self, model_name="microsoft/DialoGPT-small"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name)
        self.chat_history_ids = None
        self.training_data = []

    def generate_response(self, user_input):
        new_user_input_ids = self.tokenizer.encode(user_input + self.tokenizer.eos_token, return_tensors="pt")
        bot_input_ids = torch.cat([self.chat_history_ids, new_user_input_ids], dim=-1) if self.chat_history_ids is not None else new_user_input_ids
        self.chat_history_ids = self.model.generate(bot_input_ids, max_length=1000, pad_token_id=self.tokenizer.eos_token_id, temperature=0.7, top_k=50, top_p=0.9)
        response = self.tokenizer.decode(self.chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
        self.training_data.append({"input": user_input, "response": response})
        self.auto_train()
        return response

    def auto_train(self):
        if len(self.training_data) > 10:
            print("Training model with new data...")
            inputs = self.tokenizer([data["input"] for data in self.training_data], return_tensors="pt", padding=True, truncation=True)
            labels = self.tokenizer([data["response"] for data in self.training_data], return_tensors="pt", padding=True, truncation=True)["input_ids"]
            self.model.train()
            with torch.no_grad():
                self.model(**inputs, labels=labels)
            self.training_data = []
            print("Model fine-tuned with new conversations.")


if __name__ == "__main__":
    assistant = Assistant()
    web_search = SearchingFromWeb(assistant)
    control_app_web = ControlAppWeb(assistant)
    chatbot = Chatbot()

    while True:
        command = assistant.take_command()
        if "wake up" in command:
            wish_me(assistant)
            assistant.speak("Hello sir, how can I assist you?")

            while True:
                command = assistant.take_command()
                if "sleep" in command:
                    assistant.speak("Goodbye!")
                    break
                elif "change voice" in command:
                    assistant.toggle_voice()
                elif "google" in command:
                    web_search.search_google(command)
                elif "youtube" in command:
                    web_search.search_youtube(command)
                elif "wikipedia" in command:
                    web_search.search_wikipedia(command)
                elif "open new tab" in command:
                    control_app_web.open_new_tab()
                elif "next tab" in command:
                    control_app_web.navigate_tabs(direction="next")
                elif "previous tab" in command:
                    control_app_web.navigate_tabs(direction="previous")
                elif "open" in command:
                    control_app_web.open_webapp(command)
                elif "close" in command:
                    control_app_web.close_app_web(command)
                elif "the time" in command:
                    strTime = datetime.datetime.now().strftime("%H:%M")
                    assistant.speak(f"Sir, the time is {strTime}")
                elif command != "None":
                    response = chatbot.generate_response(command)
                    assistant.speak(response)


Listening...
Processing...
You said: wake up
Listening...
Processing...
You said: how


The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Listening...
Processing...
You said: what are you doing now
Listening...
Processing...
You said: play song on YouTube
Listening...
Processing...
Could not understand your voice. Please try again.
Listening...


In [1]:
! pip install transformers datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-18.1.0-cp312-cp312-win_amd64.whl.metadata (3.4 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp312-cp312-win_amd64.whl.metadata (13 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py312-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
Downloading dill-0.3.8-py3-none-any.whl (116 kB)
Downloading fsspec-2024.9.0-py3-none-any.whl (179 kB)
Downloading multiprocess-0.70.16-py312-none-any.whl (146 kB)
Downloading pyarrow-18.1.0-cp312-cp312-win_amd64.whl (25.1 MB)
   -----------------------------------