In [1]:
SYNONYMS = {"ag": "Ahmed Group", "ahmed reit": "AGPT", "ag trust": "AGPT", "ag property trust": "AGPT"}

def normalize_text_by_synonyms(text: str) -> str:
    if not isinstance(text, str):
        return text
    low = text.lower()
    for k in sorted(SYNONYMS.keys(), key=len, reverse=True):
        if k in low:
            low = low.replace(k, SYNONYMS[k].lower())
    return low

APPROVED = {
    "company_overview_short": "Ahmed Group is a Canadian family office and real-estate firm (est. 1966) focused on discovering and developing purpose-built rental and community-enhancing projects. We manage the full stack: development, asset management, property management, and joint ventures.",
    "agpt": "AGPT is a private real estate investment trust launched Aug 28, 2025, now open to accredited investors.",
    "dundas_project": "A 568-unit mixed-use, purpose-built rental development at 1000 & 1024 Dundas St. E., Mississauga advanced after a May 21, 2024 settlement with Mother Parkers.",
    "contact_hq": "Corporate Head Office: 1-1024 Dundas St. E., Mississauga, ON L4Y 2B8 | (905) 949-0999 | contact@ahmed.group."
}

CMS_QA = {
    "about": APPROVED["company_overview_short"],
    "location": APPROVED["contact_hq"],
    "dundas": APPROVED["dundas_project"],
    "vendor": "Review our Vendor Guidelines and Tenders/Bidding pages; I can also collect your details for Procurement.",
    "agpt": "I can connect you to our Investor Relations team and share an AGPT overview. Please confirm your accreditation and best email."
}

INVESTMENT_DISCLAIMER = "The information provided about AGPT is for informational purposes only and does not constitute an offer to sell or a solicitation to buy any securities. Offers are made only by official offering documents to qualified (accredited) investors and are subject to applicable securities laws. Past performance is not indicative of future results."

In [2]:
import os, time, subprocess, re
import numpy as np
import sounddevice as sd
import webrtcvad
import torch
from dotenv import load_dotenv
from collections import deque
from faster_whisper import WhisperModel

load_dotenv()

openai_api_key = os.environ.get("OPENAI_API_KEY")
USE_GPU_WHISPER = os.environ.get("USE_GPU_WHISPER", "0") == "1"
GPU_COMPUTE_TYPE = os.environ.get("GPU_COMPUTE_TYPE", "float16")
PIPER_VOICE = os.environ.get("PIPER_VOICE", "./en_US-amy-medium.onnx")
PIPER_BIN = os.environ.get("PIPER_BIN", "./piper/piper")

SAMPLE_RATE = 16000
FRAME_MS = 10
UTTER_TIMEOUT = 0.4
MIN_UTTER_SEC = 0.5
MAX_AUDIO_BUFFER_SEC = 10

SYSTEM_PROMPT = "You are Ahmed Group Concierge. Be professional and brief (1-2 sentences). Only provide contact details (phone, email, address) if the user explicitly asks for contact information or how to reach Ahmed Group. Otherwise, do not include contact info."

SEED_SNIPPETS = [
    {"id": "about-001", "title": "Company Overview", "text": "Ahmed Group is a Canadian real-estate firm and family office (est. 1966) focused on purpose-built rental and community-enhancing projects."},
    {"id": "globe-2025-08-28", "title": "AG Property Trust", "text": "AGPT is a private real estate investment trust launched Aug 28, 2025, now open to accredited investors."},
    {"id": "dundas-2024-05-21", "title": "Dundas Settlement", "text": "A 568-unit mixed-use rental at 1000 & 1024 Dundas St. E., Mississauga."},
    {"id": "contact-001", "title": "Contact", "text": "1-1024 Dundas St. E., Mississauga, ON | (905) 949-0999 | contact@ahmed.group."}
]

CONTACT_KEYWORDS = ["contact", "phone", "email", "reach", "call", "address", "location", "where are you", "hq", "head office"]
FAREWELL_KEYWORDS = ["thank you that's all", "thanks that's all", "that's all thank you", "that's all thanks", "that's everything", "that'll be all", "that will be all", "i'm good thank you", "i'm all set", "that's it thank you", "that's it thanks"]

EMAIL_RE = re.compile(r"\b[\w\.-]+@[\w\.-]+\.[A-Za-z]{2,}\b")
PHONE_RE = re.compile(r"\+?\d[\d\s\-()]{7,}\d")
ADDRESS_HINTS = ["dundas st", "l4y", "mississauga", "1-1024 dundas"]

def contact_intent(text: str) -> bool:
    t = (text or "").lower()
    return any(k in t for k in CONTACT_KEYWORDS)

def farewell_intent(text: str) -> bool:
    t = (text or "").lower().strip()
    if any(phrase in t for phrase in FAREWELL_KEYWORDS):
        return True
    if len(t.split()) <= 6 and "thank" in t and "all" in t:
        return True
    return False

def contains_contact_info(s: str) -> bool:
    ss = s.lower()
    return EMAIL_RE.search(s) or PHONE_RE.search(s) or any(h in ss for h in ADDRESS_HINTS)

class RecentDeduper:
    def __init__(self, max_items: int = 64):
        self.buf = deque()
        self.max_items = max_items

    def _norm(self, s: str) -> str:
        return normalize_for_dedupe(s)

    def seen(self, s: str, within: float = 8.0) -> bool:
        now = time.time()
        key = self._norm(s)
        while self.buf and (now - self.buf[0][1]) > within:
            self.buf.popleft()
        for txt, ts in self.buf:
            if txt == key:
                return True
        self.buf.append((key, now))
        if len(self.buf) > self.max_items:
            self.buf.popleft()
        return False

def normalize_for_dedupe(s: str) -> str:
    if not isinstance(s, str):
        return ""
    t = s.strip().lower()
    t = t.replace("\u2013", "-").replace("\u2014", "-")
    t = re.sub(r"\s+", " ", t)
    t = re.sub(r"[^a-z0-9 ]+", "", t)
    return t.strip()

class VAD:
    def __init__(self):
        self.vad = webrtcvad.Vad(1)
        self.sr = SAMPLE_RATE
        self.noise_gate = 300

    def is_speech(self, pcm16: np.ndarray) -> bool:
        if np.abs(pcm16).max() < self.noise_gate:
            return False
        try:
            return self.vad.is_speech(pcm16.tobytes(), self.sr)
        except:
            return False

def kw_rag(query: str, k: int = 2):
    q = query.lower().split()
    allow_contact = contact_intent(query)
    docs = list(SEED_SNIPPETS)
    if "APPROVED" in globals():
        docs.extend([
            {"id": "approved-about", "title": "Company Overview (Approved)", "text": APPROVED.get("company_overview_short", "")},
            {"id": "approved-agpt", "title": "AGPT (Approved)", "text": APPROVED.get("agpt", "")},
            {"id": "approved-dundas", "title": "Dundas Project (Approved)", "text": APPROVED.get("dundas_project", "")},
            {"id": "approved-contact", "title": "Contact (Approved)", "text": APPROVED.get("contact_hq", "")}
        ])
    if not allow_contact:
        docs = [d for d in docs if not ("contact" in d.get("title", "").lower() or "contact" in str(d.get("id", "")).lower())]
    scored = [(sum(w in d["text"].lower() for w in q), i, d) for i, d in enumerate(docs)]
    scored.sort(reverse=True, key=lambda x: x[0])
    return [{"title": d["title"], "text": d["text"][:200], "docId": d.get("id", str(i))} for _, i, d in scored[:k]]

def cms_match(user_text: str):
    if "CMS_QA" not in globals():
        return None
    text = user_text if isinstance(user_text, str) else ""
    norm = text.lower()
    if "normalize_text_by_synonyms" in globals():
        try:
            norm = normalize_text_by_synonyms(text)
        except:
            norm = text.lower()

    def any_in(words):
        return any(w in norm for w in words)

    if farewell_intent(text):
        return "Thank you for contacting Ahmed Group. Feel free to reach out anytime—we're here to help!"
    if any_in(["agpt", "accredited investor", "accredited", "ag property trust", "ag trust", "ahmed reit"]):
        base = CMS_QA.get("agpt")
        if not base:
            return None
        if "INVESTMENT_DISCLAIMER" in globals():
            return base + " " + INVESTMENT_DISCLAIMER
        return base
    if any_in(["dundas", "1000", "1024", "mother parkers", "dundas st. e."]):
        return CMS_QA.get("dundas")
    if any_in(["vendor", "procure", "procurement", "tender", "bid", "bidding"]):
        return CMS_QA.get("vendor")
    if any_in(CONTACT_KEYWORDS):
        return CMS_QA.get("location")
    if any_in(["what do you do", "about", "company", "what is ahmed", "who are you", "overview"]):
        return CMS_QA.get("about")
    return None

ASR = None

def init_asr():
    global ASR
    if USE_GPU_WHISPER and torch.cuda.is_available():
        try:
            print(f"Loading Whisper tiny.en on GPU ({GPU_COMPUTE_TYPE})...")
            ASR = WhisperModel("tiny.en", device="cuda", compute_type=GPU_COMPUTE_TYPE, num_workers=1)
            print("✓ ASR ready on GPU")
            return
        except Exception as e:
            print(f"GPU init failed ({e}); falling back to CPU int8")
    print("Loading Whisper tiny.en on CPU (int8)...")
    ASR = WhisperModel("tiny.en", device="cpu", compute_type="int8", num_workers=2)

def transcribe(pcm16: np.ndarray) -> str:
    audio = pcm16.astype(np.float32) / 32768.0
    segments, _ = ASR.transcribe(audio, language="en", beam_size=1)
    return " ".join(s.text.strip() for s in segments).strip()

def llm_generate(user_text: str, snippets) -> str:
    from openai import OpenAI
    client = OpenAI()
    ctx = "\n".join(f"- {s['title']}: {s['text']}" for s in snippets)
    messages = [
        {"role": "system", "content": f"{SYSTEM_PROMPT}\n\nUse the following context to answer the user's question:\n{ctx}"},
        {"role": "user", "content": user_text}
    ]
    response = client.chat.completions.create(model="gpt-3.5-turbo", messages=messages, temperature=0.3, max_tokens=150)
    return response.choices[0].message.content.strip()

def llm_stream_sentences(user_text: str, snippets):
    from openai import OpenAI
    client = OpenAI()
    ctx = "\n".join(f"- {s['title']}: {s['text']}" for s in snippets)
    messages = [
        {"role": "system", "content": f"{SYSTEM_PROMPT}\n\nUse the following context to answer the user's question:\n{ctx}"},
        {"role": "user", "content": user_text}
    ]
    buffer = ""
    try:
        stream = client.chat.completions.create(model="gpt-3.5-turbo", messages=messages, temperature=0.3, max_tokens=150, stream=True)
        for chunk in stream:
            try:
                token = getattr(getattr(chunk.choices[0], "delta", None), "content", None)
            except:
                token = None
            if not token:
                continue
            buffer += token
            parts = re.split(r"(?<=[.!?])\s+", buffer)
            for sent in parts[:-1]:
                s = sent.strip()
                if s:
                    yield s
            buffer = parts[-1]
        if buffer.strip():
            yield buffer.strip()
    except:
        yield llm_generate(user_text, snippets)

def piper_tts(text: str):
    if not os.path.exists(PIPER_BIN) or not os.path.exists(PIPER_VOICE):
        return np.zeros(0, dtype=np.int16), SAMPLE_RATE
    env = os.environ.copy()
    env['LD_LIBRARY_PATH'] = f"{os.path.dirname(PIPER_BIN)}:{env.get('LD_LIBRARY_PATH', '')}"
    proc = subprocess.Popen([PIPER_BIN, "--model", PIPER_VOICE, "--sentence_silence", "0.08", "--length_scale", "0.8", "--output-raw"],
                           stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, env=env)
    pcm_bytes, _ = proc.communicate(input=text.encode("utf-8"), timeout=20)
    pcm = np.frombuffer(pcm_bytes, dtype=np.int16)
    return pcm, 22050

def sentence_split(text: str):
    return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text or "") if s.strip()]

def speak_sentences(sentences, allow_contact: bool, dedupe_ai: RecentDeduper, ioa):
    spoken_keys = set()
    output_sentences = []
    for sent in sentences:
        if not sent:
            continue
        if not allow_contact and contains_contact_info(sent):
            continue
        key = normalize_for_dedupe(sent)
        if not key or key in spoken_keys:
            continue
        if dedupe_ai.seen(sent, within=10.0):
            continue
        spoken_keys.add(key)
        output_sentences.append(sent)
    if output_sentences:
        print(f"AI: {' '.join(output_sentences)}")
        for sent in output_sentences:
            audio, sr = piper_tts(sent)
            ioa.play(audio, sr)

def process_text(text: str, ioa, dedupe_ai: RecentDeduper):
    allow_contact = contact_intent(text)
    cms = cms_match(text)
    if cms:
        return speak_sentences(sentence_split(cms), allow_contact, dedupe_ai, ioa)
    snippets = kw_rag(text)
    return speak_sentences(llm_stream_sentences(text, snippets), allow_contact, dedupe_ai, ioa)

class AudioIO:
    def __init__(self):
        self.sr = SAMPLE_RATE
    
    def mic_frames(self):
        frames = int(self.sr * FRAME_MS / 1000)
        with sd.InputStream(samplerate=self.sr, channels=1, dtype='int16', blocksize=frames) as stream:
            while True:
                data, _ = stream.read(frames)
                yield data.flatten()
    
    def play(self, pcm16: np.ndarray, sr: int | None = None):
        if pcm16.size == 0:
            return
        sd.play(pcm16, sr if sr else self.sr)
        sd.wait()

def main():
    init_asr()
    ioa = AudioIO()
    vad = VAD()
    dedupe_user = RecentDeduper()
    dedupe_ai = RecentDeduper()
    
    greeting = "Welcome to Ahmed Group. How can I assist you today?"
    print(f"AI: {greeting}")
    audio, sr = piper_tts(greeting)
    ioa.play(audio, sr)
    
    buf = []
    last_speech = time.time()

    try:
        for frame in ioa.mic_frames():
            if vad.is_speech(frame):
                buf.append(frame)
                last_speech = time.time()
                continue
            
            if buf and (time.time() - last_speech) > UTTER_TIMEOUT:
                pcm = np.concatenate(buf)
                buf = []
                duration = len(pcm) / SAMPLE_RATE
                if duration < MIN_UTTER_SEC or duration > MAX_AUDIO_BUFFER_SEC:
                    continue
                text = transcribe(pcm)
                if not text:
                    continue
                if dedupe_user.seen(text, within=6.0):
                    continue
                print(f"User: {text}")
                process_text(text, ioa, dedupe_ai)
    except KeyboardInterrupt:
        print("\nStopped")

if globals().get("_VOICE_CONCIERGE_RUNNING"):
    print("(Already running — ignoring duplicate start)")
else:
    globals()["_VOICE_CONCIERGE_RUNNING"] = True
    try:
        main()
    finally:
        globals()["_VOICE_CONCIERGE_RUNNING"] = False

KeyboardInterrupt: 

In [None]:
import os, io, time, subprocess, re, math, wave, struct, pathlib, json
import numpy as np
import sounddevice as sd
import webrtcvad
import torch
from dotenv import load_dotenv
from collections import deque
from faster_whisper import WhisperModel

# --------- Setup & Env ----------
load_dotenv()

openai_api_key = os.environ.get("OPENAI_API_KEY")
USE_GPU_WHISPER = os.environ.get("USE_GPU_WHISPER", "0") == "1"
GPU_COMPUTE_TYPE = os.environ.get("GPU_COMPUTE_TYPE", "float16")

OPENAI_TTS_MODEL = os.environ.get("OPENAI_TTS_MODEL", "gpt-4o-mini-tts")
VOICE_NAME = os.environ.get("VOICE_NAME", "alloy")

EMBED_MODEL = os.environ.get("EMBED_MODEL", "text-embedding-3-small")
INDEX_PATH = os.environ.get("RAG_INDEX_PATH", "./rag_index.faiss")
INDEX_META_PATH = os.environ.get("RAG_META_PATH", "./rag_meta.json")

SAMPLE_RATE = 16000
FRAME_MS = 10
UTTER_TIMEOUT = 0.40
MIN_UTTER_SEC = 0.50
MAX_AUDIO_BUFFER_SEC = 10.0

SYSTEM_PROMPT = (
    "You are Ahmed Group Concierge. Be professional and brief (1-2 sentences). "
    "Only provide contact details (phone, email, address) if the user explicitly "
    "asks for contact information or how to reach Ahmed Group. Otherwise, do not include contact info."
)

SEED_SNIPPETS = [
    {"id": "about-001", "title": "Company Overview", "text": "Ahmed Group is a Canadian real-estate firm and family office (est. 1966) focused on purpose-built rental and community-enhancing projects."},
    {"id": "globe-2025-08-28", "title": "AG Property Trust", "text": "AGPT is a private real estate investment trust launched Aug 28, 2025, now open to accredited investors."},
    {"id": "dundas-2024-05-21", "title": "Dundas Settlement", "text": "A 568-unit mixed-use rental at 1000 & 1024 Dundas St. E., Mississauga."},
    {"id": "contact-001", "title": "Contact", "text": "1-1024 Dundas St. E., Mississauga, ON | (905) 949-0999 | contact@ahmed.group."}
]

CONTACT_KEYWORDS = ["contact", "phone", "email", "reach", "call", "address", "location", "where are you", "hq", "head office"]
FAREWELL_KEYWORDS = [
    "thank you that's all","thanks that's all","that's all thank you","that's all thanks",
    "that's everything","that'll be all","that will be all","i'm good thank you",
    "i'm all set","that's it thank you","that's it thanks"
]

EMAIL_RE = re.compile(r"\b[\w\.-]+@[\w\.-]+\.[A-Za-z]{2,}\b")
PHONE_RE = re.compile(r"\+?\d[\d\s\-()]{7,}\d")
ADDRESS_HINTS = ["dundas st", "l4y", "mississauga", "1-1024 dundas"]

# --------- OpenAI client helper ----------
def get_openai():
    from openai import OpenAI
    key = os.environ.get("OPENAI_API_KEY") or openai_api_key
    if not key:
        raise RuntimeError("OPENAI_API_KEY not set")
    return OpenAI(api_key=key)

# --------- Anti-repeat helpers ----------
def normalize_for_dedupe(s: str) -> str:
    if not isinstance(s, str):
        return ""
    t = s.strip().lower()
    t = t.replace("\u2013", "-").replace("\u2014", "-")
    t = re.sub(r"\s+", " ", t)
    t = re.sub(r"[^a-z0-9 ]+", "", t)
    return t.strip()

class RecentDeduper:
    def __init__(self, max_items: int = 64):
        from collections import deque
        self.buf = deque()
        self.max_items = max_items
    def seen(self, s: str, within: float = 8.0) -> bool:
        now = time.time()
        key = normalize_for_dedupe(s)
        while self.buf and (now - self.buf[0][1]) > within:
            self.buf.popleft()
        for txt, ts in self.buf:
            if txt == key:
                return True
        self.buf.append((key, now))
        if len(self.buf) > self.max_items:
            self.buf.popleft()
        return False

# --------- Intent helpers ----------
def contact_intent(text: str) -> bool:
    t = (text or "").lower()
    return any(k in t for k in CONTACT_KEYWORDS)

def farewell_intent(text: str) -> bool:
    t = (text or "").lower().strip()
    if any(phrase in t for phrase in FAREWELL_KEYWORDS):
        return True
    if len(t.split()) <= 6 and "thank" in t and "all" in t:
        return True
    return False

def contains_contact_info(s: str) -> bool:
    ss = s.lower()
    return EMAIL_RE.search(s) or PHONE_RE.search(s) or any(h in ss for h in ADDRESS_HINTS)

# --------- VAD ----------
class VAD:
    def __init__(self):
        self.vad = webrtcvad.Vad(1)
        self.sr = SAMPLE_RATE
        self.noise_gate = 300
    def is_speech(self, pcm16: np.ndarray) -> bool:
        if np.abs(pcm16).max() < self.noise_gate:
            return False
        try:
            return self.vad.is_speech(pcm16.tobytes(), self.sr)
        except:
            return False

# --------- Whisper ASR ----------
ASR = None
def init_asr():
    global ASR
    if USE_GPU_WHISPER and torch.cuda.is_available():
        try:
            print(f"Loading Whisper tiny.en on GPU ({GPU_COMPUTE_TYPE})...")
            ASR = WhisperModel("tiny.en", device="cuda", compute_type=GPU_COMPUTE_TYPE, num_workers=1)
            print("✓ ASR ready on GPU")
            return
        except Exception as e:
            print(f"GPU init failed ({e}); falling back to CPU int8")
    print("Loading Whisper tiny.en on CPU (int8)...")
    ASR = WhisperModel("tiny.en", device="cpu", compute_type="int8", num_workers=2)

def transcribe(pcm16: np.ndarray) -> str:
    audio = pcm16.astype(np.float32) / 32768.0
    segments, _ = ASR.transcribe(audio, language="en", beam_size=1)
    return " ".join(s.text.strip() for s in segments).strip()

# --------- Simple seed-RAG ----------
def kw_rag(query: str, k: int = 2):
    q = query.lower().split()
    allow_contact = contact_intent(query)
    docs = list(SEED_SNIPPETS)
    # merge vector-RAG results later as well
    scored = [(sum(w in d["text"].lower() for w in q), i, d) for i, d in enumerate(docs)]
    scored.sort(reverse=True, key=lambda x: x[0])
    out = [{"title": d["title"], "text": d["text"][:200], "docId": d.get("id", str(i))} for _, i, d in scored[:k]]
    if not allow_contact:
        out = [d for d in out if "contact" not in d["title"].lower()]
    return out

# --------- Vector RAG over PDFs & extra text ----------
def load_pdfs_to_texts(pdf_paths):
    from pypdf import PdfReader
    chunks = []
    for p in pdf_paths:
        try:
            reader = PdfReader(p)
            text = "\n".join(page.extract_text() or "" for page in reader.pages)
            chunks.extend(split_text(text, max_len=1200, overlap=150))
        except Exception as e:
            print(f"[RAG] PDF read failed for {p}: {e}")
    return chunks

def split_text(txt, max_len=1200, overlap=150):
    txt = re.sub(r"\s+", " ", txt).strip()
    out = []
    start = 0
    while start < len(txt):
        end = min(len(txt), start + max_len)
        # try to cut on sentence boundary
        cut = txt.rfind(". ", start, end)
        if cut == -1 or cut <= start + 200:
            cut = end
        out.append(txt[start:cut].strip())
        start = max(cut - overlap, 0) if cut < len(txt) else len(txt)
    return [c for c in out if c]

def build_or_load_faiss(chunks, force_rebuild=False):
    import faiss
    from openai import OpenAI
    client = get_openai()
    # load existing
    if (not force_rebuild) and os.path.exists(INDEX_PATH) and os.path.exists(INDEX_META_PATH):
        try:
            index = faiss.read_index(INDEX_PATH)
            meta = json.load(open(INDEX_META_PATH, "r", encoding="utf-8"))
            print(f"[RAG] Loaded existing index with {index.ntotal} vectors.")
            return index, meta
        except Exception as e:
            print(f"[RAG] Failed to load index; rebuilding: {e}")

    # embed
    print(f"[RAG] Embedding {len(chunks)} chunks...")
    vecs = []
    B = 128
    for i in range(0, len(chunks), B):
        batch = chunks[i:i+B]
        emb = client.embeddings.create(model=EMBED_MODEL, input=batch)
        vecs.extend([np.array(e.embedding, dtype=np.float32) for e in emb.data])

    d = len(vecs[0]) if vecs else 1536
    index = faiss.IndexFlatIP(d)
    faiss.normalize_L2(np.asarray(vecs))
    index.add(np.asarray(vecs))
    faiss.write_index(index, INDEX_PATH)
    json.dump({"chunks": chunks}, open(INDEX_META_PATH, "w", encoding="utf-8"))
    print(f"[RAG] Indexed {len(chunks)} chunks.")
    return index, {"chunks": chunks}

def vector_retrieve(query, index, meta, k=3):
    if index is None or meta is None:
        return []
    from openai import OpenAI
    client = get_openai()
    emb = client.embeddings.create(model=EMBED_MODEL, input=[query]).data[0].embedding
    q = np.array(emb, dtype=np.float32)[None, :]
    import faiss
    faiss.normalize_L2(q)
    D, I = index.search(q, k)
    chunks = [meta["chunks"][i] for i in I[0] if 0 <= i < len(meta["chunks"])]
    return [{"title": "PDF KB", "text": c[:200], "docId": f"kb-{i}"} for i, c in zip(I[0], chunks)]

# --------- LLM orchestration ----------
def llm_answer(user_text: str, snippets):
    from openai import OpenAI
    client = get_openai()
    ctx = "\n".join(f"- {s['title']}: {s['text']}" for s in snippets)
    messages = [
        {"role": "system", "content": f"{SYSTEM_PROMPT}\n\nUse the following context to answer the user's question:\n{ctx}"},
        {"role": "user", "content": user_text}
    ]
    resp = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        temperature=0.3,
        max_tokens=150
    )
    return resp.choices[0].message.content.strip()

def llm_stream_sentences(user_text: str, snippets):
    from openai import OpenAI
    client = get_openai()
    ctx = "\n".join(f"- {s['title']}: {s['text']}" for s in snippets)
    messages = [
        {"role": "system", "content": f"{SYSTEM_PROMPT}\n\nUse the following context to answer the user's question:\n{ctx}"},
        {"role": "user", "content": user_text}
    ]
    buffer = ""
    try:
        stream = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=messages,
            temperature=0.3,
            max_tokens=150,
            stream=True
        )
        for chunk in stream:
            token = getattr(getattr(chunk.choices[0], "delta", None), "content", None)
            if not token: 
                continue
            buffer += token
            parts = re.split(r"(?<=[.!?])\s+", buffer)
            for sent in parts[:-1]:
                s = sent.strip()
                if s:
                    yield s
            buffer = parts[-1]
        if buffer.strip():
            yield buffer.strip()
    except Exception:
        yield llm_answer(user_text, snippets)

# --------- OpenAI TTS (WAV) ----------
def wav_bytes_to_np(wav_bytes: bytes):
    with wave.open(io.BytesIO(wav_bytes), "rb") as wf:
        sr = wf.getframerate()
        nframes = wf.getnframes()
        audio = wf.readframes(nframes)
        sampwidth = wf.getsampwidth()
        if sampwidth != 2:
            # convert to 16-bit if needed
            dtype = {1: np.int8, 2: np.int16, 3: None, 4: np.int32}.get(sampwidth, None)
            if dtype is None:
                raise RuntimeError(f"Unsupported sample width: {sampwidth}")
        pcm16 = np.frombuffer(audio, dtype=np.int16)
    return pcm16, sr

def soften_punctuation(text: str):
    # Reduce hard stops to limit pauses (don’t remove the final one)
    text = re.sub(r"(?<!\.)\.(\s+)", r", ", text)   # interior periods -> commas
    text = re.sub(r";", ",", text)
    return text

def coalesce_sentences(sentences, max_chars=350):
    # Group sentences so each TTS call has a natural paragraph
    acc, acc_len = [], 0
    out = []
    for s in sentences:
        s = s.strip()
        if not s:
            continue
        if acc_len + len(s) + 1 > max_chars and acc:
            out.append(" ".join(acc))
            acc, acc_len = [s], len(s)
        else:
            acc.append(s); acc_len += len(s) + 1
    if acc:
        out.append(" ".join(acc))
    return out

def tts_openai(text: str):
    # Request WAV bytes from OpenAI TTS (fixes the RIFF error)
    if not text:
        return np.zeros(0, dtype=np.int16), SAMPLE_RATE
    from openai import OpenAI
    client = get_openai()

    text = soften_punctuation(text)
    # Non-streaming is simpler and yields fewer artifacts for short prompts
    audio = client.audio.speech.create(
        model=OPENAI_TTS_MODEL,
        voice=VOICE_NAME,
        input=text,
        response_format="wav"   # <---- IMPORTANT: not 'format'
    )
    wav_bytes = audio.content if hasattr(audio, "content") else audio  # SDK returns a Response with .content
    if isinstance(wav_bytes, bytes) is False:
        # Some SDK versions return a dict-like; get .content_bytes()
        wav_bytes = audio.get("content") if isinstance(audio, dict) else bytes(audio)
    pcm16, sr = wav_bytes_to_np(wav_bytes)
    return pcm16, sr

# --------- IO ----------
class AudioIO:
    def __init__(self):
        self.sr = SAMPLE_RATE
    def mic_frames(self):
        frames = int(self.sr * FRAME_MS / 1000)
        with sd.InputStream(samplerate=self.sr, channels=1, dtype='int16', blocksize=frames) as stream:
            while True:
                data, _ = stream.read(frames)
                yield data.flatten()
    def play(self, pcm16: np.ndarray, sr: int | None = None):
        if pcm16.size == 0:
            return
        sd.play(pcm16, sr if sr else self.sr)
        sd.wait()

# --------- Speak pipeline (with dedupe & fewer pauses) ----------
def sentence_split(text: str):
    return [s.strip() for s in re.split(r"(?<=[.!?])\s+", text or "") if s.strip()]

def speak_sentences(sentences, allow_contact: bool, dedupe_ai: RecentDeduper, ioa: AudioIO):
    # Filter + coalesce, then one TTS per paragraph for smoother prosody
    filtered = []
    seen_keys = set()
    for sent in sentences:
        if not sent:
            continue
        if not allow_contact and contains_contact_info(sent):
            continue
        key = normalize_for_dedupe(sent)
        if not key or key in seen_keys:
            continue
        if dedupe_ai.seen(sent, within=10.0):
            continue
        seen_keys.add(key)
        filtered.append(sent)

    if not filtered:
        return

    # Group into paragraphs to reduce awkward gaps
    paragraphs = coalesce_sentences(filtered, max_chars=350)
    print(f"AI: {' '.join(paragraphs)}")
    for p in paragraphs:
        audio, sr = tts_openai(p)
        ioa.play(audio, sr)

# --------- RAG glue ----------
FAISS_INDEX = None
FAISS_META = None

def build_kb(pdf_paths=None, extra_texts=None, force_rebuild=False):
    global FAISS_INDEX, FAISS_META
    pdf_paths = pdf_paths or []
    extra_texts = extra_texts or []
    txts = []
    if pdf_paths:
        txts += load_pdfs_to_texts(pdf_paths)
    for t in extra_texts:
        txts += split_text(t, max_len=1200, overlap=150)
    if not txts:
        FAISS_INDEX, FAISS_META = None, None
        print("[RAG] No KB provided.")
        return
    FAISS_INDEX, FAISS_META = build_or_load_faiss(txts, force_rebuild=force_rebuild)

def retrieve_context(query: str, k_seed=2, k_vec=3):
    snippets = kw_rag(query, k=k_seed)
    vec_snips = vector_retrieve(query, FAISS_INDEX, FAISS_META, k=k_vec) if FAISS_INDEX else []
    # Remove contact unless asked
    allow_contact = contact_intent(query)
    all_snips = snippets + vec_snips
    if not allow_contact:
        all_snips = [s for s in all_snips if "contact" not in s["title"].lower()]
    return all_snips

def process_text(text: str, ioa: AudioIO, dedupe_ai: RecentDeduper):
    allow_contact = contact_intent(text)
    # build context from both seed & vector KB
    snippets = retrieve_context(text)
    # Stream sentences, but we’ll coalesce before TTS
    speak_sentences(llm_stream_sentences(text, snippets), allow_contact, dedupe_ai, ioa)

# --------- Main loop ----------
def main(pdf_paths=None, extra_texts=None, force_rebuild=False):
    # Optional: build RAG from PDFs & extra text
    if pdf_paths or extra_texts:
        build_kb(pdf_paths=pdf_paths, extra_texts=extra_texts, force_rebuild=force_rebuild)

    init_asr()
    ioa = AudioIO()
    vad = VAD()
    dedupe_user = RecentDeduper()
    dedupe_ai = RecentDeduper()

    greeting = "Welcome to Ahmed Group. How can I assist you today?"
    print(f"AI: {greeting}")
    g_pcm, g_sr = tts_openai(greeting)
    ioa.play(g_pcm, g_sr)

    buf = []
    last_speech = time.time()

    try:
        for frame in ioa.mic_frames():
            if vad.is_speech(frame):
                buf.append(frame)
                last_speech = time.time()
                continue

            if buf and (time.time() - last_speech) > UTTER_TIMEOUT:
                pcm = np.concatenate(buf)
                buf = []
                duration = len(pcm) / SAMPLE_RATE
                if duration < MIN_UTTER_SEC or duration > MAX_AUDIO_BUFFER_SEC:
                    continue
                text = transcribe(pcm)
                if not text:
                    continue
                if dedupe_user.seen(text, within=6.0):
                    continue
                print(f"User: {text}")
                process_text(text, ioa, dedupe_ai)
    except KeyboardInterrupt:
        print("\nStopped")

# --------- Entrypoint ----------
if globals().get("_VOICE_CONCIERGE_RUNNING"):
    print("(Already running — ignoring duplicate start)")
else:
    globals()["_VOICE_CONCIERGE_RUNNING"] = True
    try:
        # Example: point these to your 4 PDFs; add any extra knowledge text blobs
        pdfs = [
            "/home/kxngh/Desktop/GenieAI/AIcallerV2/kb/Exempt Market Proficiency Course-EMP.pdf",
            "/home/kxngh/Desktop/GenieAI/AIcallerV2/kb/Tawakkul Fund Deck.pdf",
            "/home/kxngh/Desktop/GenieAI/AIcallerV2/kb/Unregistered Principal Training Program.pdf",
            "kb/v5.3 - AGPT Deck.pdf",
        ]
        extra_blobs = [
            # "Short policy note ...",
            # "FAQ entry ...",
        ]
        main(pdf_paths=pdfs, extra_texts=extra_blobs, force_rebuild=False)
    finally:
        globals()["_VOICE_CONCIERGE_RUNNING"] = False


[RAG] Loaded existing index with 860 vectors.
Loading Whisper tiny.en on GPU (float16)...
✓ ASR ready on GPU
AI: Welcome to Ahmed Group. How can I assist you today?
✓ ASR ready on GPU
AI: Welcome to Ahmed Group. How can I assist you today?


AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-**************************************************************************************************7Zu-. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

In [None]:
import os, io, time, re, json, shutil, threading, wave
from pathlib import Path
from collections import deque

import numpy as np
import sounddevice as sd
import webrtcvad
from dotenv import load_dotenv
from faster_whisper import WhisperModel

from pypdf import PdfReader
import faiss
from rank_bm25 import BM25Okapi

import nltk
from nltk.tokenize import word_tokenize


# NLTK bootstrap
def ensure_nltk_resources():
    resources = {
        "punkt": "tokenizers/punkt",
        "punkt_tab": "tokenizers/punkt_tab/english.pickle",
    }
    for pkg, path in resources.items():
        try:
            nltk.data.find(path)
        except LookupError:
            try:
                nltk.download(pkg, quiet=True)
            except Exception as err:
                print(f"[NLTK] failed to download {pkg}: {err}")

ensure_nltk_resources()


# Load environment variables
load_dotenv()

VALID_VOICES = {
    "alloy","echo","fable","onyx","nova","shimmer",
    "coral","verse","ballad","ash","sage","marin","cedar"
}

def _sanitize_env_enum(var_name: str, default: str, allowed: set[str]) -> str:
    raw = os.getenv(var_name, default) or default
    token = re.split(r"[#,;]", raw, maxsplit=1)[0]
    token = token.strip().strip('"').strip("'").replace("”","").replace("“","").replace("’","").replace("‘","")
    token_l = token.lower()
    if token_l not in allowed:
        print(f"[TTS] Warning: {var_name}='{raw}' sanitized -> '{token_l}', invalid. Using '{default}'.")
        return default
    return token_l


OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_TTS_MODEL = os.getenv("OPENAI_TTS_MODEL", "gpt-4o-mini-tts")
VOICE_NAME = _sanitize_env_enum("OPENAI_TTS_VOICE", "verse", VALID_VOICES)

WHISPER_MODEL_NAME = os.getenv("WHISPER_MODEL_NAME", "tiny.en")
WHISPER_COMPUTE_TYPE = os.getenv("WHISPER_COMPUTE_TYPE", "int8")

SAMPLE_RATE = 16000
FRAME_MS = 10
UTTER_TIMEOUT = 0.4
MIN_UTTER_SEC = 0.5
MAX_AUDIO_BUFFER_SEC = 10


SYSTEM_PROMPT = (
    "You are Ahmed Group Concierge. Be professional and brief (1–2 sentences). "
)

INVESTMENT_DISCLAIMER = (
    "This is not an offer to sell or a solicitation to buy securities. "
    "No performance projections or returns are offered."
)

CMS_QA = {
    "about":
        "Ahmed Group is a Canadian family office and real-estate firm (est. 1966).",
    "agpt":
        "AGPT is available for accredited investors. " + INVESTMENT_DISCLAIMER,
    "dundas":
        "Ahmed Group reached a settlement on May 21, 2024 to advance a 568-unit rental development.",
    "vendor":
        "Please review our Vendor Guidelines and current tenders. I can also collect your details.",
    "location":
        "Corporate Head Office: 1-1024 Dundas St. E., Mississauga, ON L4Y 2B8.",
    "resident_script":
        "We’re active in the Mississauga community, including senior residences. What’s your ideal move-in window?",
    "media_script":
        "Ahmed Group reached a settlement on May 21, 2024 regarding the Dundas project. What’s your outlet and deadline?",
}

SEED_SNIPPETS = [
    {"id": "about-001", "title": "Company Overview",
     "text": "Ahmed Group is a Canadian family office and real-estate firm founded in 1966."},
    {"id": "agpt-2025-08-28", "title": "AG Property Trust",
     "text": "AGPT is a private real estate investment trust for accredited investors. " + INVESTMENT_DISCLAIMER},
    {"id": "dundas-2024-05-21", "title": "Dundas St. E. Project",
     "text": "A 568-unit rental development advanced after the May 21, 2024 settlement."},
    {"id": "contact-001", "title": "Contact / HQ",
     "text": "Corporate Head Office: 1-1024 Dundas St. E., Mississauga, ON L4Y 2B8."},
]


CONTACT_KEYWORDS = ["contact","phone","email","reach","call","address","location","where are you","hq","head office","based"]
FAREWELL_KEYWORDS = [
    "thank you that's all","thanks that's all","that's all thank you","that's all thanks",
    "that's everything","that'll be all","that will be all","i'm good thank you",
    "i'm all set","that's it thank you","that's it thanks"
]

EMAIL_RE = re.compile(r"\b[\w\.-]+@[\w\.-]+\.[A-Za-z]{2,}\b")
PHONE_RE = re.compile(r"\+?\d[\d\s\-()]{7,}\d")
ADDRESS_HINTS = ["dundas st", "l4y", "mississauga", "1-1024 dundas"]


def get_openai():
    if not OPENAI_API_KEY:
        raise RuntimeError("OPENAI_API_KEY not set")
    from openai import OpenAI
    return OpenAI(api_key=OPENAI_API_KEY)


def contact_intent(text: str) -> bool:
    t = (text or "").lower()
    return any(k in t for k in CONTACT_KEYWORDS)


def farewell_intent(text: str) -> bool:
    t = (text or "").lower().strip()
    if any(phrase in t for phrase in FAREWELL_KEYWORDS):
        return True
    return len(t.split()) <= 6 and "thank" in t and "all" in t


def contains_contact_info(s: str) -> bool:
    ss = s.lower()
    return EMAIL_RE.search(s) or PHONE_RE.search(s) or any(h in ss for h in ADDRESS_HINTS)


def normalize_for_dedupe(s: str) -> str:
    if not isinstance(s, str): return ""
    t = s.strip().lower()
    t = t.replace("\u2013", "-").replace("\u2014", "-")
    t = re.sub(r"\s+", " ", t)
    t = re.sub(r"[^a-z0-9 ]+", "", t)
    return t.strip()


class RecentDeduper:
    def __init__(self, max_items: int = 64):
        self.buf = deque()
        self.max_items = max_items

    def seen(self, s: str, within: float = 8.0) -> bool:
        now = time.time()
        key = normalize_for_dedupe(s)

        while self.buf and (now - self.buf[0][1]) > within:
            self.buf.popleft()

        for txt, ts in self.buf:
            if txt == key:
                return True

        self.buf.append((key, now))
        if len(self.buf) > self.max_items:
            self.buf.popleft()
        return False


class VAD:
    def __init__(self):
        self.vad = webrtcvad.Vad(1)
        self.sr = SAMPLE_RATE
        self.noise_gate = 400

    def is_speech(self, pcm16: np.ndarray) -> bool:
        if np.abs(pcm16).max() < self.noise_gate:
            return False
        try:
            return self.vad.is_speech(pcm16.tobytes(), self.sr)
        except:
            return False



# RAG (FAISS + BM25)

RAG_DIR = Path("./rag_index")
EMBED_MODEL = "text-embedding-3-small"
INDEX = None
INDEX_METADATA = []
BM25 = None


def _embed_texts(texts):
    client = get_openai()
    resp = client.embeddings.create(model=EMBED_MODEL, input=texts)
    return np.array([d.embedding for d in resp.data], dtype="float32")


def _norm_ws(s: str) -> str:
    return re.sub(r"\s+", " ", (s or "").strip())


def _smart_chunks(text: str, max_chars=900, overlap=120):
    paras = [p.strip() for p in re.split(r"\n\s*\n", text or "") if p.strip()]
    chunks, cur = [], ""
    for p in paras:
        if not cur:
            cur = p
        elif len(cur) + 1 + len(p) <= max_chars:
            cur = f"{cur}\n{p}"
        else:
            chunks.append(_norm_ws(cur))
            tail = cur[-overlap:] if overlap else ""
            cur = f"{tail}\n{p}" if tail else p
    if cur.strip():
        chunks.append(_norm_ws(cur))
    return chunks


def _read_pdf(path: str) -> str:
    try:
        r = PdfReader(path)
        out = []
        for p in r.pages:
            out.append(p.extract_text() or "")
        return "\n".join(out)
    except Exception as e:
        print(f"[RAG] PDF read failed {path}: {e}")
        return ""


def _ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)


def _save_index(index, docs):
    _ensure_dir(RAG_DIR)
    faiss.write_index(index, str(RAG_DIR / "faiss.index"))
    (RAG_DIR / "meta.json").write_text(json.dumps(docs, ensure_ascii=False))
    print(f"[RAG] Saved index with {len(docs)} chunks.")


def _load_index():
    global INDEX, INDEX_METADATA, BM25
    if not (RAG_DIR / "faiss.index").exists():
        return False
    INDEX = faiss.read_index(str(RAG_DIR / "faiss.index"))
    INDEX_METADATA = json.loads((RAG_DIR / "meta.json").read_text())
    corpus_texts = [d["text"] for d in INDEX_METADATA]
    tokenized = [word_tokenize(t.lower()) for t in corpus_texts]
    BM25 = BM25Okapi(tokenized)
    print(f"[RAG] Loaded index with {len(INDEX_METADATA)} chunks.")
    return True


def reset_index():
    if RAG_DIR.exists():
        shutil.rmtree(RAG_DIR)
    print("[RAG] Reset index directory.")


def load_knowledge(pdf_paths=None, extra_text_files=None, rebuild=False):
    global INDEX, INDEX_METADATA, BM25

    if not rebuild and _load_index():
        return

    pdf_paths = pdf_paths or []
    extra_text_files = extra_text_files or []
    docs = []

    for s in SEED_SNIPPETS:
        for i, ch in enumerate(_smart_chunks(s["text"])):
            docs.append({"id": f"seed::{s['id']}::{i}", "title": s["title"], "text": ch})

    for p in pdf_paths:
        txt = _read_pdf(p)
        if not txt:
            continue
        for i, chunk in enumerate(_smart_chunks(txt, max_chars=1100, overlap=160)):
            docs.append({"id": f"pdf::{os.path.basename(p)}::{i}", "title": os.path.basename(p), "text": chunk})

    for tpath in extra_text_files:
        try:
            raw = Path(tpath).read_text(encoding="utf-8")
            for i, chunk in enumerate(_smart_chunks(raw, max_chars=1100, overlap=160)):
                docs.append({"id": f"text::{os.path.basename(tpath)}::{i}", "title": os.path.basename(tpath), "text": chunk})
        except Exception as e:
            print(f"[RAG] text read failed {tpath}: {e}")

    if not docs:
        INDEX, INDEX_METADATA, BM25 = None, [], None
        print("[RAG] No docs to index.")
        return

    texts = [d["text"] for d in docs]
    embs = _embed_texts(texts).astype("float32")
    dim = embs.shape[1]
    faiss.normalize_L2(embs)
    index = faiss.IndexFlatIP(dim)
    index.add(embs)
    INDEX = index
    INDEX_METADATA = docs

    tokenized = [word_tokenize(t.lower()) for t in texts]
    BM25 = BM25Okapi(tokenized)
    _save_index(INDEX, INDEX_METADATA)


def _mmr(query_emb, cand_embs, lambda_mult=0.55, top_k=5):
    sim = np.dot(cand_embs, query_emb.T).reshape(-1)
    selected_idx = []
    selected = []

    while len(selected_idx) < min(top_k, len(sim)):
        if not selected_idx:
            i = int(np.argmax(sim))
            selected_idx.append(i)
            selected.append(cand_embs[i])
            continue

        rem_idx = [i for i in range(len(sim)) if i not in selected_idx]
        div = []

        for i in rem_idx:
            max_sim = max(np.dot(cand_embs[i], s) for s in selected)
            score = lambda_mult * sim[i] - (1 - lambda_mult) * max_sim
            div.append((score, i))

        i = max(div, key=lambda x: x[0])[1]
        selected_idx.append(i)
        selected.append(cand_embs[i])

    return selected_idx


def _hybrid_search(query: str, k=6, allow_contact=False, expand=False):
    q_emb = _embed_texts([query]).astype("float32")
    faiss.normalize_L2(q_emb)

    oversample = min(50, len(INDEX_METADATA))
    D, I = INDEX.search(q_emb, oversample)
    cand_idxs = list(dict.fromkeys(I[0].tolist()))

    bm25_scores = np.zeros(len(INDEX_METADATA))
    if BM25:
        toks = word_tokenize(query.lower())
        s = np.array(BM25.get_scores(toks))
        bm25_scores = np.maximum(bm25_scores, s)

    fused = []
    for i in cand_idxs:
        meta = INDEX_METADATA[i]
        if not allow_contact and "contact" in meta["title"].lower():
            continue
        kw_score = bm25_scores[i]
        combined = 0.65 + 0.35 * (kw_score / (kw_score + 3.0 + 1e-9))
        fused.append((i, combined))

    if not fused:
        return []

    fused.sort(key=lambda x: x[1], reverse=True)
    top_idx = [i for i, _ in fused[:min(24, len(fused))]]

    cand_embs = _embed_texts([INDEX_METADATA[i]["text"] for i in top_idx]).astype("float32")
    faiss.normalize_L2(cand_embs)
    picked = _mmr(q_emb[0], cand_embs, lambda_mult=0.55, top_k=k)
    final_idx = [top_idx[i] for i in picked]

    return [{"title": INDEX_METADATA[i]["title"],
             "text": INDEX_METADATA[i]["text"],
             "docId": INDEX_METADATA[i]["id"],
             "rank": r+1}
            for r, i in enumerate(final_idx)]


def rag_search(query: str, k=4, allow_contact=False):
    if INDEX is None or INDEX.ntotal == 0:
        return kw_rag_keyword(query, k=k, allow_contact=allow_contact)
    return _hybrid_search(query, k=k, allow_contact=allow_contact)


def kw_rag_keyword(query: str, k: int = 2, allow_contact=False):
    q = query.lower().split()
    docs = list(SEED_SNIPPETS)
    if not allow_contact:
        docs = [d for d in docs if "contact" not in d["title"].lower()]

    scored = [(sum(w in d["text"].lower() for w in q), i, d)
              for i, d in enumerate(docs)]
    scored.sort(reverse=True, key=lambda x: x[0])
    return [{"title": d["title"], "text": d["text"][:400], "docId": d["id"]}
            for _, i, d in scored[:k]]


def cms_match(user_text: str):
    norm = (user_text or "").lower()

    def has(words): return any(w in norm for w in words)

    if farewell_intent(norm):
        return "Thank you for contacting Ahmed Group."

    if has(["agpt","accredited investor","accredited","ag trust","reit"]):
        return CMS_QA["agpt"]

    if has(["dundas","1000","1024","mother parkers"]):
        return CMS_QA["dundas"]

    if has(["vendor","procure","tender","bid"]):
        return CMS_QA["vendor"]

    if has(["senior","residence","the pearl"]):
        return CMS_QA["resident_script"]

    if has(["media","press","journalist","reporter"]):
        return CMS_QA["media_script"]

    if has(CONTACT_KEYWORDS):
        return CMS_QA["location"]

    if has(["what do you do","about","company","overview","who are you"]):
        return CMS_QA["about"]

    return None


ASR = None
def init_asr():
    global ASR
    print(f"Loading Whisper {WHISPER_MODEL_NAME} on CPU...")
    ASR = WhisperModel(WHISPER_MODEL_NAME, device="cpu",
                       compute_type=WHISPER_COMPUTE_TYPE, num_workers=2)


def transcribe(pcm16: np.ndarray) -> str:
    audio = pcm16.astype(np.float32) / 32768.0
    segments, _ = ASR.transcribe(audio, language="en", beam_size=1)
    return " ".join((s.text or "").strip() for s in segments).strip()


def _build_context(snippets):
    return "\n".join(f"- {s['title']}: {s['text']}" for s in snippets)


def llm_generate(user_text: str, snippets, allow_contact=False) -> str:
    client = get_openai()
    ctx = _build_context(snippets)

    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"Context:\n{ctx}\n\nUser question: {user_text}"}
    ]

    resp = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=messages,
        temperature=0.2,
        max_tokens=220
    )

    out = (resp.choices[0].message.content or "").strip()

    if not allow_contact:
        out = EMAIL_RE.sub("", out)
        out = PHONE_RE.sub("", out)
        for hint in ADDRESS_HINTS:
            out = re.sub(re.escape(hint), "", out, flags=re.I)
        out = re.sub(r"\s{2,}", " ", out).strip()

    return out


# TTS

def tts_openai(text: str):
    return tts_openai_wav_only(text)


def tts_openai_wav_only(text: str):
    try:
        if not text:
            return np.zeros(0, dtype=np.int16), SAMPLE_RATE

        client = get_openai()
        use_voice = VOICE_NAME if VOICE_NAME in VALID_VOICES else "verse"

        resp = client.audio.speech.create(
            model=OPENAI_TTS_MODEL,
            voice=use_voice,
            input=text,
            response_format="wav",
        )

        wav_bytes = getattr(resp, "content", None)
        if not wav_bytes:
            try: wav_bytes = resp.read()
            except: pass

        if not wav_bytes:
            print("[TTS] Empty response")
            return np.zeros(0, dtype=np.int16), SAMPLE_RATE

        with wave.open(io.BytesIO(wav_bytes), "rb") as wf:
            sr = wf.getframerate()
            ch = wf.getnchannels()
            sw = wf.getsampwidth()
            frames = wf.readframes(wf.getnframes())

            pcm = np.frombuffer(frames, dtype=np.int16)
            if ch > 1:
                pcm = pcm.reshape(-1, ch).mean(axis=1).astype(np.int16)

        if sr != SAMPLE_RATE:
            duration = pcm.size / float(sr)
            target_len = max(int(round(duration * SAMPLE_RATE)), 1)
            x_old = np.linspace(0.0, 1.0, pcm.size, endpoint=False, dtype=np.float32)
            x_new = np.linspace(0.0, 1.0, target_len, endpoint=False, dtype=np.float32)
            resampled = np.interp(x_new, x_old, pcm.astype(np.float32))
            pcm16 = np.clip(resampled, -32768, 32767).astype(np.int16)
        else:
            pcm16 = pcm.astype(np.int16)

        print(f"[TTS] ok: samples={len(pcm16)} sr={SAMPLE_RATE}")
        return pcm16, SAMPLE_RATE

    except Exception as e:
        print(f"[TTS] Fatal error: {e}")
        return np.zeros(0, dtype=np.int16), SAMPLE_RATE


class AudioPlayer:
    def __init__(self, sr=SAMPLE_RATE):
        self.sr = sr
        self._lock = threading.Lock()
        self._is_playing = False
        self._play_thread = None

    def is_playing(self):
        with self._lock:
            return self._is_playing

    def _play_blocking(self, pcm16: np.ndarray):
        if pcm16.size == 0:
            with self._lock: self._is_playing = False
            return

        with self._lock:
            self._is_playing = True

        try:
            sd.play(pcm16, self.sr)
            sd.wait()
        except Exception as e:
            print(f"[Audio] Playback error: {e}")

        with self._lock:
            self._is_playing = False

    def play_async(self, pcm16: np.ndarray):
        t = threading.Thread(target=self._play_blocking, args=(pcm16,), daemon=True)
        self._play_thread = t
        t.start()


def speak_text_one_shot(text: str, allow_contact: bool, player: AudioPlayer):
    if not text:
        return
    if not allow_contact and contains_contact_info(text):
        lines = [ln for ln in text.splitlines() if not contains_contact_info(ln)]
        text = " ".join(lines).strip()

    audio, _ = tts_openai(text)
    print(f"AI: {text}")
    player.play_async(audio)


def process_text(text: str, player: AudioPlayer):
    allow_contact = contact_intent(text)
    cms = cms_match(text)
    if cms:
        return speak_text_one_shot(cms, allow_contact, player)

    snippets = rag_search(text, k=4, allow_contact=allow_contact)
    answer = llm_generate(text, snippets, allow_contact=allow_contact)
    return speak_text_one_shot(answer, allow_contact, player)


def main():
    load_knowledge(pdf_paths=[], extra_text_files=[], rebuild=False)
    init_asr()

    class AudioIO:
        def __init__(self): self.sr = SAMPLE_RATE
        def mic_frames(self):
            frames = int(self.sr * FRAME_MS / 1000)
            with sd.InputStream(samplerate=self.sr, channels=1, dtype='int16', blocksize=frames) as stream:
                while True:
                    data, _ = stream.read(frames)
                    yield data.flatten()

    ioa = AudioIO()
    vad = VAD()
    player = AudioPlayer(sr=ioa.sr)

    greeting = "Welcome to Ahmed Group. How can I assist you today?"
    print(f"AI: {greeting}")
    g_audio, _ = tts_openai(greeting)
    print(f"[TTS] Greeting samples: {len(g_audio)}")
    player.play_async(g_audio)

    while player.is_playing():
        time.sleep(0.01)

    buf = []
    last_speech = time.time()

    try:
        for frame in ioa.mic_frames():
            if vad.is_speech(frame):
                buf.append(frame)
                last_speech = time.time()
                continue

            if buf and (time.time() - last_speech) > UTTER_TIMEOUT:
                pcm = np.concatenate(buf)
                buf = []
                duration = len(pcm) / SAMPLE_RATE

                if duration < MIN_UTTER_SEC or duration > MAX_AUDIO_BUFFER_SEC:
                    continue

                text = transcribe(pcm)
                if not text:
                    continue

                print(f"User: {text}")
                process_text(text, player)

    except KeyboardInterrupt:
        print("\nStopped")


if __name__ == "__main__":
    if globals().get("_VOICE_CONCIERGE_RUNNING", False):
        print("(Already running — ignoring duplicate start)")
    else:
        globals()["_VOICE_CONCIERGE_RUNNING"] = True
        try:
            main()
        finally:
            globals()["_VOICE_CONCIERGE_RUNNING"] = False


  import pkg_resources
  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 

In [None]:
import os, io, time, re, json, shutil, threading, wave, asyncio, base64, ssl
from pathlib import Path
from collections import deque

import numpy as np
import sounddevice as sd
import webrtcvad
from dotenv import load_dotenv
from faster_whisper import WhisperModel

# RAG deps
from pypdf import PdfReader
import faiss

# RAG extras
from rank_bm25 import BM25Okapi
import nltk
from nltk.tokenize import word_tokenize

# -------------------- NLTK bootstrap --------------------
def ensure_nltk_resources():
    resources = {
        "punkt": "tokenizers/punkt",
        "punkt_tab": "tokenizers/punkt_tab/english.pickle",
    }
    for pkg, path in resources.items():
        try:
            nltk.data.find(path)
        except LookupError:
            try:
                nltk.download(pkg, quiet=True)
            except Exception as err:
                print(f"[NLTK] failed to download {pkg}: {err}")

ensure_nltk_resources()

# -------------------- ENV & CONFIG --------------------
load_dotenv()

VALID_VOICES = {
    "alloy","echo","fable","onyx","nova","shimmer",
    "coral","verse","ballad","ash","sage","marin","cedar"
}

def _sanitize_env_enum(var_name: str, default: str, allowed: set[str]) -> str:
    raw = os.getenv(var_name, default) or default
    token = re.split(r"[#,;]", raw, maxsplit=1)[0]
    token = token.strip().strip('"').strip("'").replace("”","").replace("“","").replace("’","").replace("‘","")
    token_l = token.lower()
    if token_l not in allowed:
        print(f"[TTS] Warning: {var_name}='{raw}' sanitized -> '{token_l}', invalid. Falling back to '{default}'.")
        return default
    return token_l

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
VOICE_NAME = _sanitize_env_enum("OPENAI_TTS_VOICE", "verse", VALID_VOICES)

# New: Realtime model + URL (override via env if you like)
OPENAI_REALTIME_MODEL = os.getenv("OPENAI_REALTIME_MODEL", "gpt-realtime")
OPENAI_REALTIME_WS = os.getenv(
    "OPENAI_REALTIME_WS",
    f"wss://api.openai.com/v1/realtime?model={OPENAI_REALTIME_MODEL}"
)

# CPU-only Whisper
WHISPER_MODEL_NAME = os.getenv("WHISPER_MODEL_NAME", "tiny.en")
WHISPER_COMPUTE_TYPE = os.getenv("WHISPER_COMPUTE_TYPE", "int8")

# Audio IO
SAMPLE_RATE = 16000
FRAME_MS = 10
UTTER_TIMEOUT = 0.4
MIN_UTTER_SEC = 0.5
MAX_AUDIO_BUFFER_SEC = 10

# -------------------- Ahmed Group content --------------------
SYSTEM_PROMPT = (
    "You are Ahmed Group Concierge. Be professional and brief (1–2 sentences). "
    "Only provide contact details if the user explicitly asks for contact information or how to reach Ahmed Group."
)

INVESTMENT_DISCLAIMER = (
    "This is not an offer to sell or a solicitation to buy securities. "
    "No performance projections or returns are offered. Qualified investors will receive official documents directly from Investor Relations."
)

CMS_QA = {
    "about":
        "Ahmed Group is a Canadian family office and real-estate firm (est. 1966) focused on discovering and developing purpose-built rental and community-enhancing projects. We manage the full stack: development, asset management, property management, and joint ventures.",
    "agpt":
        "Ahmed Group has launched AG Property Trust (AGPT) for accredited investors. I can share an overview and connect you with Investor Relations to provide official documents. Are you an accredited investor, and what’s your preferred email? "
        + INVESTMENT_DISCLAIMER,
    "dundas":
        "Ahmed Group reached a settlement on May 21, 2024 to advance a 568-unit mixed-use, purpose-built rental at 1000 & 1024 Dundas St. E., Mississauga.",
    "vendor":
        "Please review our Vendor Guidelines and current Tenders/Bidding. I can also collect your details for Procurement.",
    "location":
        "Corporate Head Office: 1-1024 Dundas St. E., Mississauga, ON L4Y 2B8 | (905) 949-0999 | contact@ahmed.group.",
    "resident_script":
        "We’re active in the Mississauga community, including Mississauga Senior Residences and The Pearl. I can arrange a call-back or add you to a waitlist. What’s your ideal move-in window?",
    "media_script":
        "Yes—Ahmed Group reached a settlement on May 21, 2024 to advance a 568-unit mixed-use, purpose-built rental at 1000 & 1024 Dundas St. E. I can route you to our communications team—what’s your outlet and deadline?",
}

SEED_SNIPPETS = [
    {"id": "about-001", "title": "Company Overview",
     "text": "Ahmed Group is a Canadian family office and real-estate firm (est. 1966) focused on discovering and developing purpose-built rental and community-enhancing projects. We manage development, asset management, property management, and joint ventures."},
    {"id": "agpt-2025-08-28", "title": "AG Property Trust",
     "text": "AGPT is a private real estate investment trust launched Aug 28, 2025, now open to accredited investors. " + INVESTMENT_DISCLAIMER},
    {"id": "dundas-2024-05-21", "title": "Dundas St. E. Project",
     "text": "A 568-unit mixed-use, purpose-built rental at 1000 & 1024 Dundas St. E., Mississauga advanced after a May 21, 2024 settlement with Mother Parkers."},
    {"id": "contact-001", "title": "Contact / HQ",
     "text": "Corporate Head Office: 1-1024 Dundas St. E., Mississauga, ON L4Y 2B8 | (905) 949-0999 | contact@ahmed.group."},
]

CONTACT_KEYWORDS = ["contact","phone","email","reach","call","address","location","where are you","hq","head office","based"]
FAREWELL_KEYWORDS = ["thank you that's all","thanks that's all","that's all thank you","that's all thanks",
                     "that's everything","that'll be all","that will be all","i'm good thank you",
                     "i'm all set","that's it thank you","that's it thanks"]

EMAIL_RE = re.compile(r"\b[\w\.-]+@[\w\.-]+\.[A-Za-z]{2,}\b")
PHONE_RE = re.compile(r"\+?\d[\d\s\-()]{7,}\d")
ADDRESS_HINTS = ["dundas st", "l4y", "mississauga", "1-1024 dundas"]

# -------------------- OpenAI client (for embeddings + chat) --------------------
def get_openai():
    if not OPENAI_API_KEY:
        raise RuntimeError("OPENAI_API_KEY not set. Put it in .env or export it in the shell.")
    from openai import OpenAI
    return OpenAI(api_key=OPENAI_API_KEY)

# -------------------- Intent helpers --------------------
def contact_intent(text: str) -> bool:
    t = (text or "").lower()
    return any(k in t for k in CONTACT_KEYWORDS)

def farewell_intent(text: str) -> bool:
    t = (text or "").lower().strip()
    if any(phrase in t for phrase in FAREWELL_KEYWORDS):
        return True
    if len(t.split()) <= 6 and "thank" in t and "all" in t:
        return True
    return False

def contains_contact_info(s: str) -> bool:
    ss = s.lower()
    return EMAIL_RE.search(s) or PHONE_RE.search(s) or any(h in ss for h in ADDRESS_HINTS)

# -------------------- Dedupe --------------------
def normalize_for_dedupe(s: str) -> str:
    if not isinstance(s, str):
        return ""
    t = s.strip().lower()
    t = t.replace("\u2013", "-").replace("\u2014", "-")
    t = re.sub(r"\s+", " ", t)
    t = re.sub(r"[^a-z0-9 ]+", "", t)
    return t.strip()

class RecentDeduper:
    def __init__(self, max_items: int = 64):
        self.buf = deque()
        self.max_items = max_items
    def _norm(self, s: str) -> str:
        return normalize_for_dedupe(s)
    def seen(self, s: str, within: float = 8.0) -> bool:
        now = time.time()
        key = self._norm(s)
        while self.buf and (now - self.buf[0][1]) > within:
            self.buf.popleft()
        for txt, ts in self.buf:
            if txt == key:
                return True
        self.buf.append((key, now))
        if len(self.buf) > self.max_items:
            self.buf.popleft()
        return False

# -------------------- VAD (for utterance end detection only) --------------------
class VAD:
    def __init__(self):
        self.vad = webrtcvad.Vad(1)
        self.sr = SAMPLE_RATE
        self.noise_gate = 400
    def is_speech(self, pcm16: np.ndarray) -> bool:
        if np.abs(pcm16).max() < self.noise_gate:
            return False
        try:
            return self.vad.is_speech(pcm16.tobytes(), self.sr)
        except:
            return False

# -------------------- RAG (FAISS+BM25) --------------------
RAG_DIR = Path("./rag_index")
EMBED_MODEL = "text-embedding-3-small"
INDEX = None
INDEX_METADATA = []
BM25 = None

def _embed_texts(texts):
    client = get_openai()
    resp = client.embeddings.create(model=EMBED_MODEL, input=texts)
    return np.array([d.embedding for d in resp.data], dtype="float32")

def _norm_ws(s: str) -> str:
    return re.sub(r"\s+", " ", (s or "").strip())

def _smart_chunks(text: str, max_chars=900, overlap=120):
    paras = [p.strip() for p in re.split(r"\n\s*\n", text or "") if p.strip()]
    chunks, cur = [], ""
    for p in paras:
        if not cur:
            cur = p
        elif len(cur) + 1 + len(p) <= max_chars:
            cur = f"{cur}\n{p}"
        else:
            chunks.append(_norm_ws(cur))
            tail = cur[-overlap:] if overlap else ""
            cur = f"{tail}\n{p}" if tail else p
    if cur.strip():
        chunks.append(_norm_ws(cur))
    return chunks

def _read_pdf(path: str) -> str:
    try:
        r = PdfReader(path)
        out = []
        for p in r.pages:
            out.append(p.extract_text() or "")
        return "\n".join(out)
    except Exception as e:
        print(f"[RAG] PDF read failed {path}: {e}")
        return ""

def _ensure_dir(p: Path):
    p.mkdir(parents=True, exist_ok=True)

def _save_index(index, docs):
    _ensure_dir(RAG_DIR)
    faiss.write_index(index, str(RAG_DIR / "faiss.index"))
    (RAG_DIR / "meta.json").write_text(json.dumps(docs, ensure_ascii=False))
    print(f"[RAG] Saved index with {len(docs)} chunks.")

def _load_index():
    global INDEX, INDEX_METADATA, BM25
    if not (RAG_DIR / "faiss.index").exists():
        return False
    INDEX = faiss.read_index(str(RAG_DIR / "faiss.index"))
    INDEX_METADATA = json.loads((RAG_DIR / "meta.json").read_text())
    corpus_texts = [d["text"] for d in INDEX_METADATA]
    tokenized = [word_tokenize(t.lower()) for t in corpus_texts]
    BM25 = BM25Okapi(tokenized)
    print(f"[RAG] Loaded index with {len(INDEX_METADATA)} chunks.")
    return True

def reset_index():
    if RAG_DIR.exists():
        shutil.rmtree(RAG_DIR)
    print("[RAG] Reset index directory.")

def load_knowledge(pdf_paths=None, extra_text_files=None, rebuild=False):
    global INDEX, INDEX_METADATA, BM25
    if not rebuild and _load_index():
        return

    pdf_paths = pdf_paths or []
    extra_text_files = extra_text_files or []
    docs = []

    for s in SEED_SNIPPETS:
        for i, ch in enumerate(_smart_chunks(s["text"])):
            docs.append({"id": f"seed::{s['id']}::{i}", "title": s["title"], "text": ch})

    for p in pdf_paths:
        txt = _read_pdf(p)
        if not txt:
            continue
        for i, chunk in enumerate(_smart_chunks(txt, max_chars=1100, overlap=160)):
            docs.append({"id": f"pdf::{os.path.basename(p)}::{i}", "title": os.path.basename(p), "text": chunk})

    for tpath in (extra_text_files or []):
        try:
            raw = Path(tpath).read_text(encoding="utf-8")
            for i, chunk in enumerate(_smart_chunks(raw, max_chars=1100, overlap=160)):
                docs.append({"id": f"text::{os.path.basename(tpath)}::{i}", "title": os.path.basename(tpath), "text": chunk})
        except Exception as e:
            print(f"[RAG] text read failed {tpath}: {e}")

    if not docs:
        INDEX, INDEX_METADATA, BM25 = None, [], None
        print("[RAG] No docs to index.")
        return

    texts = [d["text"] for d in docs]
    embs = _embed_texts(texts).astype("float32")
    dim = embs.shape[1]
    faiss.normalize_L2(embs)
    index = faiss.IndexFlatIP(dim)
    index.add(embs)
    INDEX = index
    INDEX_METADATA = docs

    tokenized = [word_tokenize(t.lower()) for t in texts]
    BM25 = BM25Okapi(tokenized)
    _save_index(INDEX, INDEX_METADATA)

def _mmr(query_emb, cand_embs, lambda_mult=0.55, top_k=5):
    sim = np.dot(cand_embs, query_emb.T).reshape(-1)
    selected, selected_idx = [], []
    while len(selected_idx) < min(top_k, len(sim)):
        if not selected_idx:
            i = int(np.argmax(sim))
            selected_idx.append(i)
            selected.append(cand_embs[i])
            continue
        rem_idx = [i for i in range(len(sim)) if i not in selected_idx]
        div = []
        for i in rem_idx:
            max_sim_to_sel = max(np.dot(cand_embs[i], s) for s in selected)
            score = lambda_mult * sim[i] - (1 - lambda_mult) * max_sim_to_sel
            div.append((score, i))
        i = max(div, key=lambda x: x[0])[1]
        selected_idx.append(i)
        selected.append(cand_embs[i])
    return selected_idx

def _hybrid_search(query: str, k=6, allow_contact=False, expand=False):
    q_variants = [query]

    bm25_scores = np.zeros(len(INDEX_METADATA))
    if BM25:
        for q in q_variants:
            toks = word_tokenize(q.lower())
            s = np.array(BM25.get_scores(toks))
            bm25_scores = np.maximum(bm25_scores, s)

    q_emb = _embed_texts([query]).astype("float32")
    faiss.normalize_L2(q_emb)
    oversample = min(50, len(INDEX_METADATA))
    D, I = INDEX.search(q_emb, oversample)
    I = I[0].tolist()
    cand_idxs = list(dict.fromkeys(I))

    fused = []
    for i in cand_idxs:
        meta = INDEX_METADATA[i]
        if not allow_contact and ("contact" in meta["title"].lower() or "contact" in meta["id"].lower()):
            continue
        emb_score = 1.0
        kw_score = bm25_scores[i] if BM25 is not None else 0.0
        fused.append((i, 0.65 * emb_score + 0.35 * (kw_score / (kw_score + 3.0 + 1e-9))))

    if not fused:
        return []

    fused.sort(key=lambda x: x[1], reverse=True)
    top_idx = [i for i, _ in fused[:min(24, len(fused))]]

    cand_embs = _embed_texts([INDEX_METADATA[i]["text"] for i in top_idx]).astype("float32")
    faiss.normalize_L2(cand_embs)
    picked_local = _mmr(q_emb[0], cand_embs, lambda_mult=0.55, top_k=k)
    final_idx = [top_idx[i] for i in picked_local]

    results = [{"title": INDEX_METADATA[i]["title"], "text": INDEX_METADATA[i]["text"], "docId": INDEX_METADATA[i]["id"], "rank": r+1}
               for r, i in enumerate(final_idx)]
    return results

def rag_search(query: str, k=4, allow_contact=False):
    if INDEX is None or INDEX.ntotal == 0:
        return kw_rag_keyword(query, k=k, allow_contact=allow_contact)
    return _hybrid_search(query, k=k, allow_contact=allow_contact, expand=False)

def kw_rag_keyword(query: str, k: int = 2, allow_contact=False):
    q = query.lower().split()
    docs = list(SEED_SNIPPETS)
    if not allow_contact:
        docs = [d for d in docs if "contact" not in d.get("title","").lower() and "contact" not in str(d.get("id","")).lower()]
    scored = [(sum(w in d["text"].lower() for w in q), i, d) for i, d in enumerate(docs)]
    scored.sort(reverse=True, key=lambda x: x[0])
    return [{"title": d["title"], "text": d["text"][:400], "docId": d.get("id", str(i))} for _, i, d in scored[:k]]

# -------------------- CMS matcher --------------------
def cms_match(user_text: str):
    text = user_text if isinstance(user_text, str) else ""
    norm = text.lower()

    def any_in(words):
        return any(w in norm for w in words)

    if farewell_intent(text):
        return "Thank you for contacting Ahmed Group. Feel free to reach out anytime—we're here to help!"
    if any_in(["agpt", "accredited investor", "accredited", "ag property trust", "ag trust", "reit"]):
        return CMS_QA.get("agpt")
    if any_in(["dundas", "1000", "1024", "mother parkers", "dundas st. e.", "dundas street"]):
        return CMS_QA.get("dundas")
    if any_in(["vendor", "procure", "procurement", "tender", "bid", "bidding"]):
        return CMS_QA.get("vendor")
    if any_in(["senior", "residence", "residences", "the pearl", "mississauga senior"]):
        return CMS_QA.get("resident_script")
    if any_in(["media", "press", "journalist", "reporter", "interview", "statement"]):
        return CMS_QA.get("media_script")
    if any_in(CONTACT_KEYWORDS):
        return CMS_QA.get("location")
    if any_in(["what do you do", "about", "company", "what is ahmed", "who are you", "overview"]):
        return CMS_QA.get("about")
    return None

# -------------------- ASR (CPU-only) --------------------
ASR = None
def init_asr():
    global ASR
    print(f"Loading Whisper {WHISPER_MODEL_NAME} on CPU ({WHISPER_COMPUTE_TYPE})...")
    ASR = WhisperModel(WHISPER_MODEL_NAME, device="cpu", compute_type=WHISPER_COMPUTE_TYPE, num_workers=2)

def transcribe(pcm16: np.ndarray) -> str:
    audio = pcm16.astype(np.float32) / 32768.0
    segments, _ = ASR.transcribe(audio, language="en", beam_size=1)
    return " ".join((s.text or "").strip() for s in segments).strip()

# -------------------- LLM (no bracketed tags) --------------------
def _build_context(snippets):
    return "\n".join(f"- {s['title']}: {s['text']}" for s in snippets)

def llm_generate(user_text: str, snippets, allow_contact=False) -> str:
    client = get_openai()
    ctx = _build_context(snippets)
    messages = [
        {"role": "system", "content":
         f"""{SYSTEM_PROMPT}

You must:
- Answer concisely in 2–3 short sentences.
- Only use facts from the provided context; if unknown, say you’re not sure.
- Do NOT include any bracketed citations or tags.
- Do NOT include contact details unless the user explicitly asked for them."""},
        {"role": "user", "content": f"Context:\n{ctx}\n\nUser question: {user_text}"}
    ]
    resp = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=messages,
        temperature=0.2,
        max_tokens=220
    )
    out = (resp.choices[0].message.content or "").strip()

    if not allow_contact:
        out = EMAIL_RE.sub("", out)
        out = PHONE_RE.sub("", out)
        for hint in ADDRESS_HINTS:
            out = re.sub(re.escape(hint), "", out, flags=re.I)
        out = re.sub(r"\s{2,}", " ", out).strip()
    return out

# -------------------- TTS via GPT Realtime (WAV; no barge-in) --------------------
# Requires: pip install websockets
import websockets

async def _realtime_tts_request(text: str, voice: str, ws_url: str, api_key: str) -> bytes:
    """
    Open a Realtime WS session, ask for audio (wav) for provided text,
    collect streamed audio chunks, return full WAV bytes.
    """
    if not text:
        return b""

    headers = {
        "Authorization": f"Bearer {api_key}",
        "OpenAI-Beta": "realtime=v1",
    }

    # Use default SSL context
    ssl_context = ssl.create_default_context()

    async with websockets.connect(ws_url, extra_headers=headers, ssl=ssl_context, max_size=None) as ws:
        # Send the user text into the conversation
        await ws.send(json.dumps({"type": "input_text", "text": text}))

        # Create a response that returns audio in the selected voice
        await ws.send(json.dumps({
            "type": "response.create",
            "response": {
                "modalities": ["audio"],
                "instructions": SYSTEM_PROMPT,
                "audio": {"voice": voice, "format": "wav"}
            }
        }))

        audio_chunks = []
        wav_bytes = b""
        # Collect until the server says the response is completed
        while True:
            msg = await ws.recv()
            try:
                evt = json.loads(msg)
            except Exception:
                # some servers may send binary pings; ignore
                continue

            t = evt.get("type")
            if t == "response.audio.delta":
                # base64-encoded audio chunk (wav stream)
                delta_b64 = evt.get("delta") or ""
                if delta_b64:
                    try:
                        audio_chunks.append(base64.b64decode(delta_b64))
                    except Exception:
                        pass
            elif t in ("response.completed", "response.stop", "error"):
                # stitch chunks (many deltas together are valid WAV if server streams full headers once;
                # to be safe, just concat—server typically sends a self-contained wav)
                wav_bytes = b"".join(audio_chunks)
                break

        return wav_bytes

def _wav_bytes_to_pcm16(wav_bytes: bytes, target_sr=SAMPLE_RATE) -> np.ndarray:
    if not wav_bytes:
        return np.zeros(0, dtype=np.int16)
    try:
        with wave.open(io.BytesIO(wav_bytes), "rb") as wf:
            sr = wf.getframerate()
            ch = wf.getnchannels()
            sw = wf.getsampwidth()
            n  = wf.getnframes()
            frames = wf.readframes(n)
        if sw != 2:
            print(f"[TTS] Unexpected sample width {sw}, coercing to int16.")
        pcm = np.frombuffer(frames, dtype=np.int16)
        if ch == 2:
            pcm = pcm.reshape(-1, 2).mean(axis=1).astype(np.int16)
        elif ch != 1:
            try:
                pcm = pcm.reshape(-1, ch).mean(axis=1).astype(np.int16)
            except Exception:
                pcm = pcm.astype(np.int16)

        if sr != target_sr:
            duration = pcm.size / float(sr)
            target_len = max(int(round(duration * target_sr)), 1)
            x_old = np.linspace(0.0, 1.0, pcm.size, endpoint=False, dtype=np.float32)
            x_new = np.linspace(0.0, 1.0, target_len, endpoint=False, dtype=np.float32)
            resampled = np.interp(x_new, x_old, pcm.astype(np.float32))
            pcm16 = np.clip(resampled, -32768, 32767).astype(np.int16)
        else:
            pcm16 = pcm.astype(np.int16, copy=False)
        return pcm16
    except Exception as e:
        print(f"[TTS] WAV decode error: {e}")
        return np.zeros(0, dtype=np.int16)

def tts_openai(text: str):
    """Public TTS entry (kept same signature)."""
    return tts_openai_realtime_wav(text)

def tts_openai_realtime_wav(text: str):
    try:
        if not text:
            return np.zeros(0, dtype=np.int16), SAMPLE_RATE

        use_voice = VOICE_NAME if VOICE_NAME in VALID_VOICES else "verse"

        # Run the async WS call synchronously
        wav_bytes = asyncio.run(_realtime_tts_request(
            text=text,
            voice=use_voice,
            ws_url=OPENAI_REALTIME_WS,
            api_key=OPENAI_API_KEY
        ))

        if not wav_bytes:
            print("[TTS] Empty WAV from Realtime")
            return np.zeros(0, dtype=np.int16), SAMPLE_RATE

        pcm16 = _wav_bytes_to_pcm16(wav_bytes, target_sr=SAMPLE_RATE)
        print(f"[TTS/Realtime] ok: samples={len(pcm16)} sr={SAMPLE_RATE}")
        return pcm16, SAMPLE_RATE

    except RuntimeError as e:
        # In case asyncio loop already running (e.g., in some environments)
        try:
            loop = asyncio.get_event_loop()
            wav_bytes = loop.run_until_complete(_realtime_tts_request(
                text=text,
                voice=use_voice,
                ws_url=OPENAI_REALTIME_WS,
                api_key=OPENAI_API_KEY
            ))
            pcm16 = _wav_bytes_to_pcm16(wav_bytes, target_sr=SAMPLE_RATE)
            return pcm16, SAMPLE_RATE
        except Exception as e2:
            print(f"[TTS] Realtime fallback failed: {e2}")
            return np.zeros(0, dtype=np.int16), SAMPLE_RATE
    except Exception as e:
        print(f"[TTS] Fatal Realtime TTS error: {e}")
        return np.zeros(0, dtype=np.int16), SAMPLE_RATE

# -------------------- Audio Player (no interrupts) --------------------
class AudioPlayer:
    def __init__(self, sr=SAMPLE_RATE):
        self.sr = sr
        self._lock = threading.Lock()
        self._play_thread = None
        self._is_playing = False

    def is_playing(self):
        with self._lock:
            return self._is_playing

    def _play_blocking(self, pcm16: np.ndarray):
        if pcm16.size == 0:
            with self._lock:
                self._is_playing = False
            return
        with self._lock:
            self._is_playing = True
        try:
            sd.play(pcm16, self.sr)
            sd.wait()  # block until finished
        except Exception as e:
            print(f"[Audio] Playback error: {e}")
        with self._lock:
            self._is_playing = False

    def play_async(self, pcm16: np.ndarray):
        # just play; nothing can stop it mid-way (barge-in removed)
        t = threading.Thread(target=self._play_blocking, args=(pcm16,), daemon=True)
        self._play_thread = t
        t.start()

# -------------------- Speak (one-shot) --------------------
def speak_text_one_shot(text: str, allow_contact: bool, player: AudioPlayer):
    if not text:
        return
    if not allow_contact and contains_contact_info(text):
        lines = [ln for ln in text.splitlines() if not contains_contact_info(ln)]
        text = " ".join(lines).strip()
    audio, _ = tts_openai(text)
    print(f"AI: {text}")
    player.play_async(audio)

# -------------------- Orchestration --------------------
def process_text(text: str, player: AudioPlayer):
    allow_contact = contact_intent(text)
    cms = cms_match(text)
    if cms:
        return speak_text_one_shot(cms, allow_contact, player)
    snippets = rag_search(text, k=4, allow_contact=allow_contact)
    answer = llm_generate(text, snippets, allow_contact=allow_contact)
    return speak_text_one_shot(answer, allow_contact, player)

def main():
    load_knowledge(pdf_paths=[], extra_text_files=[], rebuild=False)
    init_asr()

    class AudioIO:
        def __init__(self): self.sr = SAMPLE_RATE
        def mic_frames(self):
            frames = int(self.sr * FRAME_MS / 1000)
            with sd.InputStream(samplerate=self.sr, channels=1, dtype='int16', blocksize=frames) as stream:
                while True:
                    data, _ = stream.read(frames)
                    yield data.flatten()

    ioa = AudioIO()
    vad = VAD()
    player = AudioPlayer(sr=ioa.sr)

    # Greeting (plays fully; nothing can interrupt it)
    greeting = "Welcome to Ahmed Group. How can I assist you today?"
    print(f"AI: {greeting}")
    g_audio, _ = tts_openai(greeting)
    print(f"[TTS] Greeting samples: {len(g_audio)}")
    player.play_async(g_audio)
    while player.is_playing():
        time.sleep(0.01)

    buf = []
    last_speech = time.time()

    try:
        for frame in ioa.mic_frames():
            if vad.is_speech(frame):
                buf.append(frame)
                last_speech = time.time()
                continue

            if buf and (time.time() - last_speech) > UTTER_TIMEOUT:
                pcm = np.concatenate(buf); buf = []
                duration = len(pcm) / SAMPLE_RATE
                if duration < MIN_UTTER_SEC or duration > MAX_AUDIO_BUFFER_SEC:
                    continue
                text = transcribe(pcm)
                if not text: continue
                print(f"User: {text}")
                process_text(text, player)
    except KeyboardInterrupt:
        print("\nStopped")

# -------------------- Entry guard --------------------
if __name__ == "__main__":
    if globals().get("_VOICE_CONCIERGE_RUNNING", False):
        print("(Already running — ignoring duplicate start)")
    else:
        globals()["_VOICE_CONCIERGE_RUNNING"] = True
        try:
            main()
        finally:
            globals()["_VOICE_CONCIERGE_RUNNING"] = False


[RAG] Loaded index with 4 chunks.
Loading Whisper tiny.en on CPU (int8)...
AI: Welcome to Ahmed Group. How can I assist you today?
[TTS] Realtime fallback failed: This event loop is already running
[TTS] Greeting samples: 0


  return np.zeros(0, dtype=np.int16), SAMPLE_RATE



Stopped
