# EE782 - Programming Assignment 2: AI Guard Agent

**Group Members:** Your Name, Your Partner's Name

This notebook presents a complete implementation of the AI Guard Agent. The system uses vision, speech, and language models to monitor a room, recognize trusted individuals, and interact with unrecognized persons.

### Activation and Basic Input

#### 1. Trusted User Enrollment

In [7]:
import face_recognition
import pickle
import os
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
import numpy as np

# --- Configuration ---
TRUSTED_FACES_DIR = "D:/AI_Guard_Agent/trusted_faces"
MODEL_FILE = "known_faces_model.pkl"

print("[STRETCH GOAL] Starting face enrollment and classifier training...")
known_encodings = []
known_names = []

if not os.path.exists(TRUSTED_FACES_DIR):
    os.makedirs(TRUSTED_FACES_DIR)
    print(f"[WARNING] Created '{TRUSTED_FACES_DIR}' directory. Please add images and run again.")
else:
    for filename in os.listdir(TRUSTED_FACES_DIR):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            try:
                image_path = os.path.join(TRUSTED_FACES_DIR, filename)
                
                # --- NEW LOGIC TO PARSE NAMES ---
                # Extracts the name from 'name_1.jpg' as 'name'
                name = os.path.splitext(filename)[0].split('_')[0]
                # --------------------------------

                image = face_recognition.load_image_file(image_path)
                face_encodings = face_recognition.face_encodings(image)
                
                if face_encodings:
                    known_encodings.append(face_encodings[0])
                    known_names.append(name)
                    print(f"[SUCCESS] Processed {filename} for person: {name}")
                else:
                    print(f"[WARNING] No face found in {filename}. Skipping.")
            except Exception as e:
                print(f"[ERROR] Could not process {filename}: {e}")

    if len(np.unique(known_names)) < 2:
        print("\n[ERROR] Classifier training requires at least two different people.")
        print("Please add images for at least one more person and run again.")
    elif known_encodings:
        print(f"\n[INFO] Found {len(known_encodings)} faces for {len(np.unique(known_names))} people. Training SVM classifier...")
        
        label_encoder = LabelEncoder()
        labels = label_encoder.fit_transform(known_names)
        
        # Train the SVM classifier
        classifier = SVC(gamma="scale", probability=True)
        classifier.fit(known_encodings, labels)
        print("[INFO] Classifier training complete.")
        
        # Save the trained model
        with open(MODEL_FILE, "wb") as f:
            pickle.dump({"classifier": classifier, "label_encoder": label_encoder}, f)
        print(f"[INFO] Saved trained model to {MODEL_FILE}")
    else:
        print("[INFO] No faces were enrolled.")



[STRETCH GOAL] Starting face enrollment and classifier training...
[SUCCESS] Processed kshitiz_yadav.png for person: kshitiz
[SUCCESS] Processed naresh_1.png for person: naresh
[SUCCESS] Processed naresh_10.png for person: naresh
[SUCCESS] Processed naresh_11.png for person: naresh
[SUCCESS] Processed naresh_12.png for person: naresh
[SUCCESS] Processed naresh_2.png for person: naresh
[SUCCESS] Processed naresh_3.png for person: naresh
[SUCCESS] Processed naresh_4.png for person: naresh
[SUCCESS] Processed naresh_5.png for person: naresh
[SUCCESS] Processed naresh_6.png for person: naresh
[SUCCESS] Processed naresh_7.png for person: naresh
[SUCCESS] Processed naresh_8.png for person: naresh
[SUCCESS] Processed naresh_9.png for person: naresh
[SUCCESS] Processed saarthak_1.png for person: saarthak
[SUCCESS] Processed saarthak_2.png for person: saarthak

[INFO] Found 15 faces for 3 people. Training SVM classifier...
[INFO] Classifier training complete.
[INFO] Saved trained model to known

#### Text-to-Speech Module (Testing Speaker Output)

In [8]:
from gtts import gTTS
from playsound import playsound
import os
import tempfile

def test_speak(text):
    """A standalone function to test the Text-to-Speech (TTS) module."""
    print(f"[TTS DEMO]: Attempting to say: '{text}'")
    try:
        tts = gTTS(text=text, lang='en')
        # Use the system's temporary directory to avoid permission errors
        temp_dir = tempfile.gettempdir()
        audio_file = os.path.join(temp_dir, "tts_test.mp3")
        
        tts.save(audio_file)
        playsound(audio_file)
        os.remove(audio_file)
        print("[SUCCESS] TTS function executed successfully.")
    except Exception as e:
        print(f"[ERROR] TTS failed: {e}")

if __name__ == "__main__":
    test_speak("This is a test of the text to speech system.")

[TTS DEMO]: Attempting to say: 'This is a test of the text to speech system.'
[SUCCESS] TTS function executed successfully.


#### 2. Speech Recognition (ASR) + Text-to-Speech (TTS):

In [11]:
import speech_recognition as sr
from gtts import gTTS
from playsound import playsound
import os
import time
import tempfile

# --- Configuration ---
ACTIVATION_COMMAND = "guard my room"
DEACTIVATION_COMMAND = "stand down"

class AI_Guard:
    def __init__(self):
        """Initializes the AI Guard System."""
        self.guard_mode_active = False
        self.recognizer = sr.Recognizer()
        self.microphone = sr.Microphone()
        
        print("[INFO] AI Guard System Initialized. Calibrating microphone...")
        with self.microphone as source:
            self.recognizer.adjust_for_ambient_noise(source, duration=1)
        print("[INFO] Microphone calibrated. Say 'guard my room' to activate.")

    def speak(self, text):
        """Converts text to speech using gTTS and plays it."""
        print(f"[GUARD SAYS]: {text}")
        try:
            tts = gTTS(text=text, lang='en')
            # Use the system's temporary directory to avoid permission errors
            temp_dir = tempfile.gettempdir()
            audio_file = os.path.join(temp_dir, "response.mp3")
            tts.save(audio_file)
            playsound(audio_file)
            os.remove(audio_file)
        except Exception as e:
            print(f"[ERROR] Could not speak due to an error: {e}")

    def listen_for_command(self):
        """Listens for a command from the user and converts it to text."""
        command = ""
        try:
            with self.microphone as source:
                print("[INFO] Listening for command...")
                audio = self.recognizer.listen(source, timeout=5, phrase_time_limit=3)
            command = self.recognizer.recognize_google(audio).lower()
            print(f"[USER SAID]: {command}")
        except sr.WaitTimeoutError:
            pass 
        except sr.UnknownValueError:
            print("[INFO] Could not understand the audio.")
        except sr.RequestError as e:
            print(f"[ERROR] Could not request results from Google Speech Recognition service; {e}")
        return command

    def run(self):
        """Main loop for the AI Guard with graceful shutdown."""
        try:
            while True:
                command = self.listen_for_command()

                if self.guard_mode_active:
                    if DEACTIVATION_COMMAND in command:
                        self.guard_mode_active = False
                        self.speak("Guard mode deactivated.")
                    else:
                        print("[STATUS] Guard mode is active. Monitoring...")
                else:
                    if ACTIVATION_COMMAND in command:
                        self.guard_mode_active = True
                        self.speak("Guard mode activated. I will protect this room.")

                time.sleep(1) # Small delay to prevent high CPU usage
        except KeyboardInterrupt:
            print("\n[INFO] Program interrupted by user. Shutting down.")

if __name__ == "__main__":
    guard = AI_Guard()
    guard.run()


[INFO] AI Guard System Initialized. Calibrating microphone...
[INFO] Microphone calibrated. Say 'guard my room' to activate.
[INFO] Listening for command...
[USER SAID]: guard my room
[GUARD SAYS]: Guard mode activated. I will protect this room.
[INFO] Listening for command...
[USER SAID]: stand down you fucking idiot
[GUARD SAYS]: Guard mode deactivated.
[INFO] Listening for command...
[INFO] Could not understand the audio.
[INFO] Listening for command...

[INFO] Program interrupted by user. Shutting down.


### Face Recognition and Verify Trusted User Enrollment

In [None]:
import speech_recognition as sr
from gtts import gTTS
import os
import cv2
import face_recognition
import numpy as np
import pickle
import time
import tempfile
import threading
import pygame # Import the pygame library

# --- Configuration ---
ACTIVATION_COMMAND = "guard my room"
DEACTIVATION_COMMAND = "stand down"
MODEL_FILE = "known_faces_model.pkl"
RECOGNITION_THRESHOLD = 0.75


# --- Base Class Definition (UPDATED with Pygame) ---
class AI_Guard:
    def __init__(self):
        self.guard_mode_active = False
        self.recognizer = sr.Recognizer()
        self.microphone = sr.Microphone()
        
        # --- NEW: Initialize the Pygame mixer for audio playback ---
        pygame.mixer.init()
        # --------------------------------------------------------

        print("[INFO] AI Guard System Initialized. Calibrating microphone...")
        with self.microphone as source:
            self.recognizer.adjust_for_ambient_noise(source, duration=1)
        print("[INFO] Microphone calibrated.")

    def speak(self, text):
        """Converts text to speech using gTTS and plays it with Pygame."""
        print(f"[GUARD SAYS]: {text}")
        try:
            tts = gTTS(text=text, lang='en')
            temp_dir = tempfile.gettempdir()
            audio_file = os.path.join(temp_dir, "response.mp3")
            tts.save(audio_file)

            # --- NEW: Use Pygame to load and play the audio ---
            pygame.mixer.music.load(audio_file)
            pygame.mixer.music.play()
            while pygame.mixer.music.get_busy():
                time.sleep(0.1)
            pygame.mixer.music.unload() # Unload the file to allow deletion
            # ------------------------------------------------

            os.remove(audio_file)
        except Exception as e:
            print(f"[ERROR] Could not speak due to an error: {e}")

    def listen_for_command(self):
        command = ""
        try:
            with self.microphone as source:
                print("[INFO] Listening for a command...")
                audio = self.recognizer.listen(source, timeout=5, phrase_time_limit=4)
            command = self.recognizer.recognize_google(audio).lower()
            print(f"[USER SAID]: {command}")
        except (sr.WaitTimeoutError, sr.UnknownValueError, sr.RequestError):
            pass
        return command


# --- Vision Class (No changes needed in this class) ---
class AI_Guard_Vision(AI_Guard):
    def __init__(self):
        super().__init__()
        try:
            with open(MODEL_FILE, "rb") as f:
                self.model_data = pickle.load(f)
            print("[INFO] Loaded trained face recognition model.")
        except FileNotFoundError:
            self.speak(f"Error: Model file '{MODEL_FILE}' not found. Please run the enrollment script first.")
            exit()
            
        self.video_capture = cv2.VideoCapture(0)
        if not self.video_capture.isOpened():
            self.speak("Error: Cannot open webcam.")
            exit()
            
        self.last_seen_trusted_time = 0
        self.last_unrecognized_alert_time = 0
        self.cooldown_period = 10
        self.stop_event = threading.Event()
        self.vision_window_active = False

    def process_vision(self):
        ret, frame = self.video_capture.read()
        if not ret: return

        rgb_small_frame = cv2.cvtColor(cv2.resize(frame, (0, 0), fx=0.25, fy=0.25), cv2.COLOR_BGR2RGB)
        face_locations = face_recognition.face_locations(rgb_small_frame)
        face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
        is_any_person_present = len(face_encodings) > 0
        found_trusted_person = False

        classifier = self.model_data["classifier"]
        label_encoder = self.model_data["label_encoder"]

        for (top, right, bottom, left), face_encoding in zip(face_locations, face_encodings):
            probabilities = classifier.predict_proba([face_encoding])[0]
            best_match_index = np.argmax(probabilities)
            predicted_name = label_encoder.classes_[best_match_index]
            confidence = probabilities[best_match_index]
            
            print(f"[DEBUG] Predicted: {predicted_name}, Confidence: {confidence:.2f}")
            
            display_name = "Unrecognized"
            if confidence > RECOGNITION_THRESHOLD:
                found_trusted_person = True
                display_name = predicted_name.replace('_', ' ')
                
                current_time = time.time()
                if current_time - self.last_seen_trusted_time > self.cooldown_period:
                    self.speak(f"Welcome, {display_name}. Glad to see you.")
                    self.last_seen_trusted_time = current_time

            top *= 4; right *= 4; bottom *= 4; left *= 4
            cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
            label = f"{display_name} ({confidence:.2f})"
            cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 255, 0), cv2.FILLED)
            cv2.putText(frame, label, (left + 6, bottom - 6), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 1)

        if not found_trusted_person and is_any_person_present:
            current_time = time.time()
            if current_time - self.last_unrecognized_alert_time > self.cooldown_period:
                self.speak("Warning. An unrecognized person has been detected.")
                self.last_unrecognized_alert_time = current_time
        
        cv2.imshow('AI Guard Vision', frame)
        self.vision_window_active = True

    def _threaded_listener(self):
        while not self.stop_event.is_set():
            command = self.listen_for_command()
            if ACTIVATION_COMMAND in command and not self.guard_mode_active:
                self.guard_mode_active = True
                self.speak("Guard mode activated. Vision system online.")
            elif DEACTIVATION_COMMAND in command and self.guard_mode_active:
                self.guard_mode_active = False
                self.speak("Guard mode deactivated. Vision system offline.")

    def run(self):
        self.stop_event.clear()
        listener_thread = threading.Thread(target=self._threaded_listener, daemon=True)
        listener_thread.start()
        
        try:
            print(f"\n[INFO] AI Guard is running. System is now listening for activation commands.")
            while not self.stop_event.is_set():
                if self.guard_mode_active:
                    self.process_vision()
                else:
                    if self.vision_window_active:
                        cv2.destroyWindow('AI Guard Vision')
                        self.vision_window_active = False
                
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    print("\n[INFO] 'q' key pressed. Shutting down.")
                    break
                
                time.sleep(0.05 if self.guard_mode_active else 0.5)

        except KeyboardInterrupt:
            print("\n[INFO] Program interrupted by user. Shutting down.")
        finally:
            self.stop_event.set()
            listener_thread.join(timeout=1.0)
            self.video_capture.release()
            cv2.destroyAllWindows()
            print("[INFO] Webcam released and all windows closed.")

if __name__ == "__main__":
    guard = AI_Guard_Vision()
    guard.run()


[INFO] AI Guard System Initialized. Calibrating microphone...
[INFO] Microphone calibrated.
[INFO] Loaded trained face recognition model.

[INFO] AI Guard is running. System is now listening for activation commands.
[INFO] Listening for a command...
[USER SAID]: guard my room
[GUARD SAYS]: Guard mode activated. Vision system online.
[DEBUG] Predicted: naresh, Confidence: 0.82
[ERROR] Could not speak due to an error: No such file or directory: 'C:\Users\nares\AppData\Local\Temp\response.mp3'.
[INFO] Listening for a command...
[DEBUG] Predicted: naresh, Confidence: 0.77
[DEBUG] Predicted: naresh, Confidence: 0.80

[INFO] Listening for a command...[DEBUG] Predicted: naresh, Confidence: 0.83

[DEBUG] Predicted: naresh, Confidence: 0.81
[DEBUG] Predicted: naresh, Confidence: 0.82
[DEBUG] Predicted: naresh, Confidence: 0.83
[DEBUG] Predicted: naresh, Confidence: 0.82
[DEBUG] Predicted: naresh, Confidence: 0.80
[DEBUG] Predicted: naresh, Confidence: 0.80
[DEBUG] Predicted: naresh, Confidence:

### Escalation Dialogue and Full System Integration

In [None]:
# Import necessary libraries for all system functionalities
import speech_recognition as sr  # For converting speech to text (ASR)
from gtts import gTTS             # Google Text-to-Speech for generating audio from text
import os                         # For interacting with the operating system (e.g., file deletion)
import cv2                        # OpenCV for camera access and image processing
import face_recognition           # For finding and encoding faces in images
import numpy as np                # For numerical operations, especially with face recognition arrays
import pickle                     # For loading the pre-trained face recognition model
import time                       # For handling delays and timing (e.g., cooldowns, pauses)
import tempfile                   # For creating temporary files in the system's temp directory
import threading                  # For running tasks in parallel (e.g., listening for voice commands while processing video)
import pygame                     # For reliable audio playback, replacing less stable libraries

# --- System Configuration ---
# These are global constants that can be easily tuned

ACTIVATION_COMMAND = "guard my room"          # Voice command to activate the guard mode
DEACTIVATION_COMMAND = "stand down"           # Voice command to deactivate the guard mode
MODEL_FILE = "known_faces_model.pkl"          # Filename for the saved face recognition model

# Confidence threshold for face recognition. A face is considered a match only if the
# classifier's confidence is above this value. This is a key parameter to tune.
RECOGNITION_THRESHOLD = 0.75

# --- IMPORTANT: PASTE YOUR GROQ API KEY HERE ---
# The API key is required to use the Groq service for the LLM.
GROQ_API_KEY = "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" # Groq API KEY

# --- Global Client Initialization for the LLM ---
# Initialize the 'client' variable to None. It will be configured if an API key is provided.
client = None
try:
    # Check if a valid API key has been provided (and is not the placeholder)
    if GROQ_API_KEY != "YOUR_GROQ_API_KEY" and GROQ_API_KEY:
        from groq import Groq                                   # Import the Groq library only if needed
        client = Groq(api_key=GROQ_API_KEY)                     # Create the client object to communicate with the Groq API
        print("[INFO] Groq client configured successfully.")
    else:
        print("[WARNING] Groq API Key is not set. LLM features will be disabled.")
except Exception as e:
    print(f"[ERROR] Failed to configure Groq client: {e}")

# --- Base AI Guard Class ---
# This class handles the core audio input (ASR) and output (TTS) functionalities.
class AI_Guard:
    def __init__(self):
        self.guard_mode_active = False        # State variable to track if the guard mode is active
        self.recognizer = sr.Recognizer()     # Initialize the speech recognizer
        self.microphone = sr.Microphone()     # Initialize the microphone
        pygame.mixer.init()                   # Initialize the Pygame mixer for reliable audio playback

        # Create a threading lock to prevent multiple parts of the program
        # from trying to speak at the exact same time, which can cause file access errors.
        self.speak_lock = threading.Lock()

        print("[INFO] AI Guard System Initialized. Calibrating microphone...")
        # Listen for 1 second to adjust the recognizer for ambient noise levels
        with self.microphone as source:
            self.recognizer.adjust_for_ambient_noise(source, duration=1)
        print("[INFO] Microphone calibrated.")

    def speak(self, text):
        """Converts text to speech using gTTS and plays it with Pygame."""
        # Acquire the lock. This ensures that if another thread tries to call speak(),
        # it will wait until the current speech is finished.
        with self.speak_lock:
            print(f"[GUARD SAYS]: {text}")
            try:
                tts = gTTS(text=text, lang='en')                              # Create the gTTS object with the text to be spoken
                temp_dir = tempfile.gettempdir()                              # Get the system's temporary directory path to avoid permission errors
                audio_file = os.path.join(temp_dir, "response.mp3")            # Define the full path for the temporary audio file
                tts.save(audio_file)                                           # Save the generated speech to the mp3 file

                # Use Pygame's music mixer to play the audio file
                pygame.mixer.music.load(audio_file)
                pygame.mixer.music.play()
                
                # Wait in a loop until the audio has finished playing
                while pygame.mixer.music.get_busy():
                    time.sleep(0.1)
                
                pygame.mixer.music.unload()   # Unload the file so it can be safely deleted
                os.remove(audio_file)        # Remove the temporary audio file
            except Exception as e:
                print(f"[ERROR] Could not speak due to an error: {e}")
        # The lock is automatically released here

    def listen_for_command(self):
        """Listens for a command via the microphone and uses Google Web Speech API."""
        command = ""
        try:
            with self.microphone as source:
                print("[INFO] Listening for a command...")
                # Listen for up to 5 seconds, stopping after 4 seconds of speech
                audio = self.recognizer.listen(source, timeout=5, phrase_time_limit=4)
            
            print("[INFO] Transcribing with Google Speech Recognition...")
            # Use Google's online service to convert the audio to text
            command = self.recognizer.recognize_google(audio).lower()
            print(f"[USER SAID]: {command}")

        # Handle common exceptions for speech recognition
        except sr.WaitTimeoutError:
            pass # This is expected if no one speaks
        except sr.UnknownValueError:
            print("[INFO] Google Speech Recognition could not understand audio.")
        except sr.RequestError as e:
            print(f"[ERROR] Could not request results from Google service; {e}")
        return command

# --- Full System Class ---
# This class inherits from AI_Guard and adds vision, LLM, and state management.
class AI_Guard_Full(AI_Guard):
    def __init__(self):
        # Initialize the parent AI_Guard class (ASR, TTS, etc.)
        super().__init__()
        try:
            # Load the pre-trained SVM classifier and label encoder
            with open(MODEL_FILE, "rb") as f:
                self.model_data = pickle.load(f)
            print("[INFO] Loaded trained face recognition model.")
        except FileNotFoundError:
            self.speak(f"Error: Model file '{MODEL_FILE}' not found. Please run enroll_faces.py first.")
            exit()
        
        # Initialize the webcam (device 0 is usually the built-in one)
        self.video_capture = cv2.VideoCapture(0)
        if not self.video_capture.isOpened():
            self.speak("Error: Cannot open webcam.")
            exit()
        
        # Timestamps to prevent spamming welcome/warning messages
        self.last_seen_trusted_time = 0
        self.cooldown_period = 10  # 10-second cooldown
        # Event to signal the background listener thread to stop
        self.stop_event = threading.Event()
        # Flag to track if the OpenCV window is currently open
        self.vision_window_active = False
        # Dictionary to manage the state of an intruder encounter
        self.intruder_state = {"detected": False, "start_time": None, "escalation_level": 0, "last_warning_time": 0}
        # Time intervals (in seconds) for escalating warnings
        self.escalation_intervals = {1: 0, 2: 15, 3: 30}

    def generate_response(self, level):
        """Generates a spoken response from the LLM based on the escalation level."""
        if not client: return "Language model not available."
        
        # Context-specific prompts for a college hostel environment
        system_prompts = {
            1: "You are a friendly AI assistant guarding a college hostel room. In one short, casual sentence, politely ask who they are or if they are looking for the room's resident.",
            2: "The unrecognized person has not left. Now, adopt a firmer tone. In one short sentence, state that this is a private hostel room and they need to leave.",
            3: "The intruder is still here. Be very stern and issue a final warning. In one short sentence, state that they are trespassing and that the hostel warden or campus security will be alerted immediately if they don't leave."
        }
        
        # Get the appropriate prompt for the current level
        prompt_text = system_prompts.get(level, "An error occurred in security logic.")
        
        try:
            print(f"[INFO] Generating Groq response for escalation level {level}...")
            # Send the prompt to the Groq API using the Llama 3.1 model
            chat_completion = client.chat.completions.create(
                messages=[{"role": "system", "content": prompt_text}],
                model="llama-3.1-8b-instant",
                temperature=0.7, max_tokens=50
            )
            # Extract and return the generated text
            return chat_completion.choices[0].message.content.strip()
        except Exception as e:
            print(f"[ERROR] Groq generation failed: {e}")
            return "There seems to be an issue with my response circuits."

    def handle_unrecognized_person(self):
        """Manages the state and logic for an escalating encounter with an intruder."""
        current_time = time.time()
        # If this is the first time seeing an intruder
        if not self.intruder_state["detected"]:
            print("[ALERT] Intruder detected for the first time.")
            # Update the state to start the encounter
            self.intruder_state.update({"detected": True, "start_time": current_time, "escalation_level": 1, "last_warning_time": current_time})
            # Generate and speak the Level 1 warning
            response = self.generate_response(1)
            self.speak(response)
            return
        
        # Calculate how long the intruder has been present
        time_since_detection = current_time - self.intruder_state["start_time"]
        new_level = 0
        # Determine if it's time to escalate to the next warning level
        if time_since_detection > self.escalation_intervals[3] and self.intruder_state["escalation_level"] < 3:
            new_level = 3
        elif time_since_detection > self.escalation_intervals[2] and self.intruder_state["escalation_level"] < 2:
            new_level = 2
        
        # If a new escalation level has been reached
        if new_level > self.intruder_state["escalation_level"]:
            print(f"[ALERT] Escalating to level {new_level}.")
            self.intruder_state.update({"escalation_level": new_level, "last_warning_time": current_time})
            response = self.generate_response(new_level)
            self.speak(response)

    def reset_intruder_state(self):
        """Resets the intruder encounter state back to default."""
        if self.intruder_state["detected"]:
            print("[INFO] Threat cleared. Resetting intruder state.")
            self.intruder_state = {"detected": False, "start_time": None, "escalation_level": 0, "last_warning_time": 0}

    def process_vision(self):
        """The main computer vision loop: captures frame, finds faces, and identifies them."""
        ret, frame = self.video_capture.read()
        if not ret: return
        
        # Create a smaller version of the frame for faster face recognition
        rgb_small_frame = cv2.cvtColor(cv2.resize(frame, (0, 0), fx=0.25, fy=0.25), cv2.COLOR_BGR2RGB)
        
        # Find all faces and their encodings in the small frame
        face_locations = face_recognition.face_locations(rgb_small_frame)
        face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
        
        is_trusted_person_present = False
        is_any_person_present = len(face_encodings) > 0
        classifier = self.model_data["classifier"]
        label_encoder = self.model_data["label_encoder"]

        # Loop through each detected face
        for (top, right, bottom, left), face_encoding in zip(face_locations, face_encodings):
            # Use the trained SVM classifier to get prediction probabilities
            probabilities = classifier.predict_proba([face_encoding])[0]
            best_match_index = np.argmax(probabilities)
            predicted_name = label_encoder.classes_[best_match_index]
            confidence = probabilities[best_match_index]
            
            print(f"[DEBUG] Predicted: {predicted_name}, Confidence: {confidence:.2f}")

            display_name = "Unrecognized"
            # If the confidence is above our threshold, we have a match
            if confidence > RECOGNITION_THRESHOLD:
                is_trusted_person_present = True
                display_name = predicted_name.replace('_', ' ')
                self.reset_intruder_state() # A trusted person is here, so the threat is cleared
                
                current_time = time.time()
                # Greet the trusted person if enough time has passed since the last greeting
                if current_time - self.last_seen_trusted_time > self.cooldown_period:
                    self.speak(f"Welcome back, {display_name}.")
                    self.last_seen_trusted_time = current_time
            
            # --- Visual Feedback on the Pop-up Window ---
            top *= 4; right *= 4; bottom *= 4; left *= 4                                 # Scale face locations back up to the original frame size
            cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)              # Draw a green box around the face
            label = f"{display_name} ({confidence:.2f})"                                        # Create the text label with the name and confidence score
            cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 255, 0), cv2.FILLED)     # Draw a filled rectangle as a background for the label
            cv2.putText(frame, label, (left + 6, bottom - 6), cv2.FONT_HERSHEY_DUPLEX, 0.8, (255, 255, 255), 1)     # Draw the text label on the frame

        # Logic to handle an unrecognized person
        if not is_trusted_person_present and is_any_person_present:
            self.handle_unrecognized_person()
        # Logic to reset the intruder state if everyone leaves
        elif not is_any_person_present:
            if self.intruder_state["detected"]:
                self.reset_intruder_state()
        
        # Display the resulting frame in a pop-up window
        cv2.imshow('AI Guard System', frame)
        self.vision_window_active = True

    def _threaded_listener(self):
        """This function runs in a separate thread, dedicated to listening for voice commands."""
        # This loop runs continuously in the background
        while not self.stop_event.is_set():
            # Listen for a single command
            command = self.listen_for_command()

            # Process the command
            if ACTIVATION_COMMAND in command and not self.guard_mode_active:
                self.guard_mode_active = True
                self.speak("Guard mode activated. Monitoring the room.")
            elif DEACTIVATION_COMMAND in command and self.guard_mode_active:
                self.guard_mode_active = False
                self.speak("Guard mode deactivated.")
                self.reset_intruder_state()

    def run(self):
        """The main application entry point."""
        self.stop_event.clear()
        # Create and start the background listener thread
        listener_thread = threading.Thread(target=self._threaded_listener, daemon=True)
        listener_thread.start()
        
        try:
            print(f"\n[INFO] AI Guard is running. Say '{ACTIVATION_COMMAND}' to activate.")
            # The main loop now primarily handles vision processing and window management
            while not self.stop_event.is_set():
                if self.guard_mode_active:
                    self.process_vision()
                else:
                    # If idle, ensure the vision window is closed
                    if self.vision_window_active:
                        cv2.destroyWindow('AI Guard System')
                        self.vision_window_active = False
                
                # Check if the 'q' key was pressed in the OpenCV window to quit
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    print("\n[INFO] 'q' key pressed. Shutting down.")
                    break
                
                # Sleep to manage CPU usage. Short sleep when active, longer when idle.
                time.sleep(0.05 if self.guard_mode_active else 0.5)

        except KeyboardInterrupt:
            print("\n[INFO] Program interrupted by user. Shutting down.")
        finally:
            # --- Graceful Shutdown ---
            self.stop_event.set()   # Signal all threads to stop
            listener_thread.join(timeout=1.0)    # Wait for the listener thread to finish
            self.video_capture.release()       # Release the webcam
            cv2.destroyAllWindows()       # Close all OpenCV windows
            pygame.mixer.quit()
            print("[INFO] System resources released. Shutdown complete.")

if __name__ == "__main__":
    guard = AI_Guard_Full()
    guard.run()



[INFO] Groq client configured successfully.
[INFO] AI Guard System Initialized. Calibrating microphone...
[INFO] Microphone calibrated.
[INFO] Loaded trained face recognition model.

[INFO] AI Guard is running. Say 'guard my room' to activate.
[INFO] Listening for a command...
[INFO] Transcribing with Google Speech Recognition...
[USER SAID]: guard my room
[GUARD SAYS]: Guard mode activated. Monitoring the room.
[DEBUG] Predicted: naresh, Confidence: 0.84
[GUARD SAYS]: Welcome back, naresh.
[INFO] Listening for a command...
[DEBUG] Predicted: naresh, Confidence: 0.73
[ALERT] Intruder detected for the first time.
[INFO] Generating Groq response for escalation level 1...
[INFO] Transcribing with Google Speech Recognition...
[GUARD SAYS]: Hi there, can I help you - are you a resident of this room or looking for someone in particular?
[INFO] Google Speech Recognition could not understand audio.
[INFO] Listening for a command...
[INFO] Transcribing with Google Speech Recognition...
[USER SA