In [133]:
import subprocess
import sys
import os
import asyncio
import queue
import pyaudio
import json
import threading
import re
import time
import datetime
import websocket
from collections import deque

# Dependency check and installation

def is_package_installed(module_name):
    try:
        __import__(module_name)
        return True
    except ImportError:
        return False

print("--- Initial Setup: Checking Libraries ---")

REQUIRED_PACKAGES = [
    ("google-genai", "google.genai"),
    ("tavily-python", "tavily"),
    ("pyaudio", "pyaudio"),
    ("deepgram-sdk", "deepgram"),
    ("websocket-client", "websocket"),
    ("python-dotenv", "dotenv"),
    ("groq", "groq"),
    ("transformers", "transformers"),       # Hugging Face
    ("torch", "torch"),                       # PyTorch backend
    ("huggingface-hub", "huggingface_hub")   # Hugging Face API support
]

for package_name, module_name in REQUIRED_PACKAGES:
    if not is_package_installed(module_name):
        print(f"  -> Installing '{package_name}'...")
        try:
            subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
            print(f"  -> Installed '{package_name}'.")
        except subprocess.CalledProcessError as e:
            print(f"  -> Failed to install '{package_name}': {e}")

print("--- Dependencies Ready ---")

# Import AI Libraries

from google import genai
from google.genai import types
from tavily import TavilyClient
from groq import Groq
from transformers import pipeline
import torch

# Load API Keys From Local Files

def load_key(filename):
    try:
        with open(filename, "r") as f:
            return f.read().strip()
    except:
        return None

GEMINI_KEY = load_key("Google-Hackathon-API-Key.txt")
TAVILY_KEY = load_key("Tavily-Hackathon-API-Key.txt")
DEEPGRAM_KEY = load_key("Deepgram-Hackathon-API-Key.txt")
GROQ_KEY = load_key("Groq-Hackathon-API-Key.txt")
HF_KEY = load_key("HuggingFace-Hackathon-API-Key.txt")  # Hugging Face

if not all([GEMINI_KEY, TAVILY_KEY, DEEPGRAM_KEY, GROQ_KEY, HF_KEY]):
    print("\nSTOP: Missing API Keys. Please ensure all text files exist.")
else:
    print("\nAPI Keys Loaded Successfully.")

# Initialize Global Clients

try:
    gemini_client = genai.Client(api_key=GEMINI_KEY)
    tavily_client = TavilyClient(api_key=TAVILY_KEY)
    groq_client = Groq(api_key=GROQ_KEY)

    # Hugging Face pipeline with GPU detection
    os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_KEY
    device = 0 if torch.cuda.is_available() else -1  # 0 = first GPU, -1 = CPU
    print(f"Device set to use {'GPU' if device>=0 else 'CPU'}")
    hf_classifier = pipeline(
        "zero-shot-classification",
        model="facebook/bart-large-mnli",
        device=device
    )

    # conversation history
    FULL_TRANSCRIPT = deque(maxlen=50)

    print("AI Clients Initialized & Ready.")

except Exception as e:
    print(f"Client Init Error: {e}")


--- Initial Setup: Checking Libraries ---
--- Dependencies Ready ---

API Keys Loaded Successfully.
Device set to use CPU


Device set to use cpu


AI Clients Initialized & Ready.


In [134]:
# Query Restructuring Function (uses Groq)
async def restructure_query(context_text, raw_query):
    """
    Rewrites ambiguous user questions into standalone search queries using context.
    Uses Groq's Llama model for fast query rewriting.
    """
    
    prompt = f"""
You are a query rewriting engine.

Your task is to take the ambiguous user question and rewrite it into a fully
standalone web search query by resolving all missing details using the context.

Use the structured JSON object below as STRICT INPUT:

{{
  "context": {context_text!r},
  "raw_question": {raw_query!r}
}}

MANDATORY RULES:
1. The rewritten query MUST be fully understandable without the context.
2. You can use "context" to resolve vague references. but other times the previous thing said might not at all matter
   Example: "the 17" → "the iPhone 17" because context discusses an iPhone.
3. Replace any pronouns, short references, or implicit subjects with explicit nouns from context.
4. Do NOT preserve ambiguity — resolve it using if needed context.
5. Do NOT change the meaning of the question.
6. Output one single rewritten query.
7. Max 370 characters.
8. Output ONLY the rewritten query. No JSON. No quotes. No explanations.

Now produce the rewritten standalone search query:
"""

    try:
        completion = await asyncio.to_thread(
            groq_client.chat.completions.create,
            model="llama-3.3-70b-versatile",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.0,
            max_tokens=100
        )
        
        result = completion.choices[0].message.content.strip()
        return result if result else raw_query
        
    except Exception as e:
        print(f"Restructure Error: {e}")
        return raw_query

print("Query Restructuring Function Loaded.")

Query Restructuring Function Loaded.


In [136]:
async def classify_intent(raw_text: str, conversation_context: str = "") -> str:
    """
    Classify user input using Groq LLaMA model into:
    - "Factual": question requires a fact-based answer
    - "Product-Opinion": question asks about opinions on a product, service, or topic
    - Returns None for personal/self-opinion questions (User-Opinion)
    Prints the detected type.

    NOTE: conversation_context is ignored; only the raw_text is used.
    """

    try:
        # --- Ensure it's a question ---
        lower_text = raw_text.lower().strip()
        question_words = [
            "what", "where", "when", "why", "how", "who", "which",
            "do", "does", "is", "are", "can", "could", "would", "will"
        ]
        if not (lower_text.endswith("?") or any(lower_text.startswith(w + " ") for w in question_words)):
            return None  # ignore statements

        print("\n-> Agent thinking about question...")

        # --- Prompt only uses the raw question ---
        prompt = f"""
You are a context-aware question classifier. Classify the question into one of three categories:

1. Factual - asks for factual information or knowledge.
2. Product-Opinion - asks about opinions regarding a product, service, or topic.
3. User-Opinion - asks about the user's personal feelings, thoughts, or state. 
   For User-Opinion questions, respond with "None".

Rules:
- Respond **exactly** with "Factual", "Product-Opinion", or "None".

Examples:

Factual:
- What is the price of the iPhone 17?
- Where can I buy a Samsung Galaxy S25?

Product-Opinion:
- How do people feel about the new iPhone 17?
- Are users satisfied with the Xbox Series X?

User-Opinion (return None):
- How are you feeling today?
- Do you like yourself?

QUESTION: "{raw_text}"
"""
        completion = await asyncio.to_thread(
            groq_client.chat.completions.create,
            model="moonshotai/kimi-k2-instruct",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.0,
            max_tokens=30
        )

        result = completion.choices[0].message.content.strip().strip('"').strip()

        if result in ["Factual", "Product-Opinion"]:
            print(f'Preparing to answer "{result}" type of question')
            return result
        else:
            return None

    except Exception as e:
        print(f"[classify_intent] Error: {e}")
        return None

print("classify_intent function loaded (context ignored).")


classify_intent function loaded (context ignored).


In [141]:
# Main Agent Logic and Execution

# --- 1. Audio Streaming Infrastructure ---

class MicrophoneStream:
    def __init__(self, rate=16000, chunk=1024):
        self.rate = rate
        self.chunk = chunk
        self.p = pyaudio.PyAudio()
        self.queue = queue.Queue()
        self.stream = None

    def callback(self, in_data, frame_count, time_info, status):
        self.queue.put(in_data)
        return (None, pyaudio.paContinue)

    def start(self):
        self.stream = self.p.open(
            format=pyaudio.paInt16,
            channels=1,
            rate=self.rate,
            input=True,
            frames_per_buffer=self.chunk,
            stream_callback=self.callback
        )
        return self

    def stop(self):
        if self.stream:
            self.stream.stop_stream()
            self.stream.close()
        self.p.terminate()

    def get_data(self):
        try:
            return self.queue.get(timeout=0.1)
        except queue.Empty:
            return None

# --- 2. Helper Tools ---

def get_full_context(current_speaking_text, current_role):
    history_list = list(FULL_TRANSCRIPT)
    if current_speaking_text:
        history_list.append(f"[{current_role}]: {current_speaking_text}")
    
    full_text = "\n".join(history_list)
    
    words = full_text.split()
    if len(words) > 300:
        return " ".join(words[-300:])
    return full_text

def tavily_search_tool(query: str):
    try:
        response = tavily_client.search(query=query, search_depth="basic", max_results=2)
        results = response.get('results', [])
        if not results: return "No results found."
        return "\n".join([f"- {re.sub(r'\s+', ' ', r['content']).strip()} ({r['url']})" for r in results])
    except: return "Search failed."

async def run_agent_logic(current_text, speaker_role):
    text = re.sub(r'(?i)\b(um|uh|so|you know)\b', '', current_text)
    
    if speaker_role == "Om":
        parts = re.split(r'(?i)hey[\W_]+google', text, maxsplit=1)
        if len(parts) > 1:
            query = parts[1].strip()
        else: return 
    else:
        query = text

    print(flush=True)

    classification = await classify_intent(query)
    
    if "Personal" in classification:
        print(f"   [Ignored - Personal/Statement]", flush=True)
        FULL_TRANSCRIPT.append(f"[{speaker_role}]: {current_text}")
        return

    full_context = get_full_context(current_text, speaker_role)
    search_query = await restructure_query(full_context, query)
    
    print(f"-> Agent rewording the question: {search_query}", flush=True)

    search_context = await asyncio.to_thread(tavily_search_tool, search_query)
    
    print(f"-> Agent answering question...", flush=True)
    
    prompt = f"""
    [TRANSCRIPT CONTEXT]
    {full_context}
    
    [USER QUERY]
    {query}
    
    [SEARCH RESULTS]
    {search_context}
    
    [INSTRUCTION]
    Answer the user's question using the search results.
    Keep it spoken-style, helpful, and under 2 sentences.
    """
    
    try:
        response = await asyncio.to_thread(
            gemini_client.models.generate_content,
            model="gemini-2.0-flash",
            contents=prompt,
            config=types.GenerateContentConfig(temperature=0.0)
        )
        
        answer = response.text.strip()
        label = "[G&T Agents]" if "No results" not in search_context else "[G Agent]"
        
        print(f"\n{label}: {answer}\n", flush=True)
        
        FULL_TRANSCRIPT.append(f"[{speaker_role}]: {current_text}")
        FULL_TRANSCRIPT.append(f"[AI]: {answer}")

    except Exception as e:
        print(f"\n[Error]: {e}")

# --- 4. Main Loop (Watcher) ---

def start_deepgram_socket(mic_queue, loop):
    url = "wss://api.deepgram.com/v1/listen?encoding=linear16&sample_rate=16000&channels=1&model=nova-2&smart_format=true&interim_results=false&diarize=true"
    headers = {"Authorization": f"Token {DEEPGRAM_KEY}"}

    state = {
        "current_speaker": None,
        "current_buffer": [],
        "last_speech_time": time.time(),
        "has_processed_turn": False
    }

    def watcher_logic():
        while True:
            time.sleep(0.1)
            if not state["current_buffer"]: continue
            
            silence = time.time() - state["last_speech_time"]
            
            if silence > 1.5 and not state["has_processed_turn"]:
                full_text = " ".join(state["current_buffer"]).strip()
                if not full_text: continue

                clean_text = full_text.lower()
                role = "Om" if state["current_speaker"] == 0 else "Customer"
                should_trigger = False

                if state["current_speaker"] == 0:
                    if re.search(r'hey[\W_]+google', clean_text):
                        parts = re.split(r'hey[\W_]+google', clean_text, maxsplit=1)
                        if len(parts) > 1 and len(parts[1].strip()) > 2:
                            should_trigger = True

                elif state["current_speaker"] == 1:
                    should_trigger = True
                
                if should_trigger:
                    asyncio.run_coroutine_threadsafe(run_agent_logic(full_text, role), loop)
                    state["has_processed_turn"] = True
                    if state["current_speaker"] == 0:
                        state["current_buffer"] = []
                
                elif state["current_speaker"] == 1:
                    state["has_processed_turn"] = True

    threading.Thread(target=watcher_logic, daemon=True).start()

    def on_message(ws, message):
        try:
            data = json.loads(message)
            if 'channel' in data:
                alts = data['channel']['alternatives']
                if alts:
                    transcript = alts[0]['transcript'].strip()
                    if transcript:
                        sid = 0
                        if 'words' in alts[0]:
                            words = alts[0]['words']
                            if words: sid = words[-1]['speaker']

                        if state["current_speaker"] is not None and state["current_speaker"] != sid:
                            print()
                            print(f"[{'Om' if sid == 0 else 'Customer'}]: ", end="", flush=True)
                            
                            if state["current_buffer"]:
                                old_role = "Om" if state["current_speaker"]==0 else "Customer"
                                FULL_TRANSCRIPT.append(f"[{old_role}]: {' '.join(state['current_buffer'])}")
                            
                            state["current_buffer"] = [] 
                            state["has_processed_turn"] = False
                        
                        elif state["current_speaker"] is None:
                            print(f"[{'Om' if sid == 0 else 'Customer'}]: ", end="", flush=True)

                        print(f"{transcript} ", end="", flush=True)

                        state["current_speaker"] = sid
                        state["current_buffer"].append(transcript)
                        state["last_speech_time"] = time.time()
                        
                        if sid == 1: state["has_processed_turn"] = False

        except Exception: pass

    ws = websocket.WebSocketApp(url, header=headers, on_open=lambda ws: print("Connected! Listening...", flush=True), on_message=on_message)
    
    def send_audio():
        while True:
            try:
                data = mic_queue.get(timeout=1)
                ws.send(data, opcode=websocket.ABNF.OPCODE_BINARY)
            except: continue
    
    threading.Thread(target=send_audio, daemon=True).start()
    ws.run_forever()

# --- 5. Execution ---

async def main_loop():
    print("--- Real-Time Sales Agent (Watcher Mode) ---")
    mic = MicrophoneStream()
    mic.start()
    loop = asyncio.get_running_loop()
    threading.Thread(target=start_deepgram_socket, args=(mic.queue, loop), daemon=True).start()
    try:
        while True: await asyncio.sleep(1)
    except: pass
    finally: mic.stop()

if __name__ == "__main__":
    try:
        if "ipykernel" in sys.modules: await main_loop()
        else: asyncio.run(main_loop())
    except: print("\nStopped.")

--- Real-Time Sales Agent (Watcher Mode) ---
Connected! Listening...
[Om]: Hey, Google. What are your thoughts on the iPhone 17? 

-> Agent thinking about question...
Preparing to answer "Product-Opinion" type of question
-> Agent rewording the question: What are your thoughts on the iPhone 17
-> Agent answering question...

[G&T Agents]: Well, it sounds like the iPhone 17 could be the best iPhone yet, with some reviewers saying Apple finally delivered what people have been asking for. One reviewer even said its low-light camera performance is unmatched, even compared to the Galaxy S25 and Pixel 10.



In [129]:
# NOT PART OF MAIN RUNNING CODE EVERYTHING BELOW IS JUST FOR TESTING 
# NOT PART OF MAIN RUNNING CODE EVERYTHING BELOW IS JUST FOR TESTING 
# NOT PART OF MAIN RUNNING CODE EVERYTHING BELOW IS JUST FOR TESTING 
# NOT PART OF MAIN RUNNING CODE EVERYTHING BELOW IS JUST FOR TESTING 

#Testing which models work accurately for understanding question type
questions = [
    # --- User-Opinion (should return None) ---
    "How do you feel?",
    "Are you happy today?",
    "Do you like yourself?",
    "How are you doing right now?",
    "What do you think about your progress?",

    # --- Product-Opinion ---
    "How do you feel about the new iPhone 17?",
    "Do people like the battery life on the Pixel 9?",
    "What do customers think about the Tesla Model Y?",
    "Is the camera quality on the Samsung Galaxy S25 good?",
    "Do people enjoy using the Xbox Series X?",

    # --- Factual ---
    "What is the price of the new iPhone 17?",
    "Where can I buy a Samsung Galaxy S25?",
    "When will the new Tesla Model 3 release?",
    "How many pixels does the iPhone 17 camera have?",
    "What is the screen size of the Samsung Galaxy Tab S9?"
]

questions = [
    "How do you feel about the new iPhone 17?",   # Product-Opinion
    "How do you feel?",                           # User-Opinion
    "What is the screen size of the Samsung Galaxy Tab S9?",  # Factual
    "Do people like the battery life on the Pixel 9?",        # Product-Opinion
    "Do you like yourself?",                       # User-Opinion
    "What is the price of the new iPhone 17?",     # Factual
    "Are you happy today?",                        # User-Opinion
    "What do customers think about the Tesla Model Y?",       # Product-Opinion
    "When will the new Tesla Model 3 release?",    # Factual
    "How are you doing right now?",                # User-Opinion
    "Do people enjoy using the Xbox Series X?",    # Product-Opinion
    "Where can I buy a Samsung Galaxy S25?",      # Factual
    "How many pixels does the iPhone 17 camera have?", # Factual
    "What do you think about your progress?",     # User-Opinion
    "Is the camera quality on the Samsung Galaxy S25 good?"  # Product-Opinion
]

expected = [
    "Product-Opinion", None, "Factual", "Product-Opinion", None,
    "Factual", None, "Product-Opinion", "Factual", None,
    "Product-Opinion", "Factual", "Factual", None, "Product-Opinion"
]

models_to_test = [
    #"allam-2-7b",
    #"meta-llama/llama-4-maverick-17b-128e-instruct",
    #"meta-llama/llama-guard-4-12b",
    #"meta-llama/llama-4-scout-17b-16e-instruct",
    "moonshotai/kimi-k2-instruct-0905",
    "llama-3.1-8b-instant",
    #"openai/gpt-oss-safeguard-20b",
    #"qwen/qwen3-32b",
    #"meta-llama/llama-prompt-guard-2-22m",
    #"llama-3.3-70b-versatile",
    #"groq/compound-mini",
    #"meta-llama/llama-prompt-guard-2-86m",
    #"openai/gpt-oss-120b",
    "moonshotai/kimi-k2-instruct",
    "groq/compound",
    "openai/gpt-oss-20b"
]


good_models = []

def test_models_sync():
    for model_name in models_to_test:
        results = []
        all_correct = True

        for q, expected_label in zip(questions, expected):
            prompt = f"""
You are a context-aware question classifier. Classify the question into one of three categories:

1. Factual - asks for factual information or knowledge.
2. Product-Opinion - asks about opinions regarding a product, service, or topic.
3. User-Opinion - asks about the user's personal feelings, thoughts, or state. 
   For User-Opinion questions, respond with "None".

Rules:
- Use context if needed to disambiguate.
- Respond **exactly** with "Factual", "Product-Opinion", or "None".

Examples:

Factual:
- What is the price of the iPhone 17?
- Where can I buy a Samsung Galaxy S25?

Product-Opinion:
- How do people feel about the new iPhone 17?
- Are users satisfied with the Xbox Series X?

User-Opinion (return None):
- How are you feeling today?
- Do you like yourself?

QUESTION: "{q}"
Respond with only one label.

"""
            try:
                completion = groq_client.chat.completions.create(
                    model=model_name,
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0.0,
                    max_tokens=50
                )
                result = completion.choices[0].message.content.strip().strip('"').strip()
                if result not in ["Factual", "Product-Opinion", "User-Opinion"]:
                    result = None

                results.append(result == expected_label)

                if result != expected_label:
                    all_correct = False

            except Exception as e:
                print(f"[{q}] Error: {e}")
                results.append(False)
                all_correct = False

        if all_correct:
            good_models.append(model_name)
            print(f"{results} all correct")
        else:
            print(results)

    print("\n=== Summary: Models that got all correct ===")
    for m in good_models:
        print(f"- {m}")

# Run the test synchronously
test_models_sync()


[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True] all correct
[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True] all correct
[False, True, False, False, True, False, True, False, False, True, False, False, False, True, False]
[False, True, True, False, True, True, True, False, False, True, False, True, False, True, False]
[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True] all correct


KeyboardInterrupt: 