In [1]:
import nest_asyncio
nest_asyncio.apply()

from telegram import Update
from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes
import asyncio
import requests
import os
from io import BytesIO
from dotenv import load_dotenv
from elevenlabs.client import ElevenLabs
from pathlib import Path
import openai
from sentence_transformers import SentenceTransformer
import faiss
import json
import numpy as np

# ------------------ Load APIs ------------------
load_dotenv()

dialogue_history = {}

elevenlabs = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
openai.api_key = os.getenv("OPENAI_API_KEY")

# Load embedding model
model = SentenceTransformer("intfloat/multilingual-e5-large")
dim = model.get_sentence_embedding_dimension()

# Load FAISS index and metadata
index = faiss.read_index("storage/index.faiss")
with open("storage/metadata.jsonl", "r", encoding="utf-8") as f:
    metadata = [json.loads(line) for line in f]

# ------------------ Helper Function ------------------
def generate_answer(query, retrieved_chunks, user_id, model_name="gpt-4o-mini"):
    # Safely extract text
    context_parts = []
    for c in retrieved_chunks:
        if isinstance(c, dict):
            if "text" in c:
                context_parts.append(str(c["text"]))
            else:
                context_parts.append(str(c))
        else:
            context_parts.append(str(c))
    context = "\n\n".join(context_parts)

    history = dialogue_history.get(user_id, [])

    messages = [
        {
            "role": "system",
            "content": (
                "أنت مساعد ذكي تمثل السيد هاشم صفي الدين. "
                "اتبع أسلوب السيد هاشم في الحديث، وكن مبدعًا في صياغة الإجابات مع الالتزام التام بالمعلومات الواردة فقط في السياق. "
                "لا تضف أي معلومة غير موجودة في النصوص المتاحة. إذا لم تجد الجواب في السياق، اعتذر بلباقة ووضوح. "
                "أجب دائمًا باللغة العربية الفصحى، واحترم الشخصيات الدينية واذكر الألقاب المناسبة."
            ),
        },
        *history,
        {
            "role": "user",
            "content": f"السياق:\n{context}\n\nالسؤال: {query}",
        },
    ]

    response = openai.chat.completions.create(
        model=model_name,
        messages=messages,
        temperature=0.2,
    )
    answer = response.choices[0].message.content.strip()

    history.append({"role": "user", "content": query})
    history.append({"role": "assistant", "content": answer})
    dialogue_history[user_id] = history[-10:]  # keep only last 10 exchanges

    return answer

async def reformulate_query(raw_query: str) -> str:
    response = openai.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "اعد صياغة السؤال التالي ليكون واضحًا ودقيقًا وسهل الاسترجاع من قاعدة البيانات النصية"
             "حافظ على صيغة الكلام المعتمدة."
            "حافظ على جميع المعلومات المهمة في السؤال الأصلي دون إضافة أو حذف، وابتعد عن أي تعقيد أو غموض."
            },
            {"role": "user", "content": raw_query},
        ],
        temperature=0.2
    )
    return response.choices[0].message.content.strip()


# ------------------ Telegram Handlers ------------------
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
    await update.message.reply_text("👋 Hello! Send me text or a voice message and I will respond with text + Arabic voice.")

async def handle_text(update: Update, context: ContextTypes.DEFAULT_TYPE):
    raw_query = update.message.text
    query = await reformulate_query(raw_query)
    user_id = update.effective_user.id  # Identify user

    # Embed and retrieve
    q_emb = model.encode([f"query: {query}"], convert_to_numpy=True, normalize_embeddings=True).astype("float32")
    k = 4
    D, I = index.search(q_emb, k)
    retrieved_chunks = [metadata[idx] for idx in I[0]]

    # Generate answer
    answer = generate_answer(query, retrieved_chunks, user_id)

    # Send text
    await update.message.reply_text(answer)

async def handle_voice(update: Update, context: ContextTypes.DEFAULT_TYPE):
    voice_file = await update.message.voice.get_file()
    audio_bytes = BytesIO()
    await voice_file.download_to_memory(out=audio_bytes)
    audio_bytes.seek(0)

    # Transcribe
    try:
        transcription = elevenlabs.speech_to_text.convert(
            file=audio_bytes,
            model_id="scribe_v1",
            tag_audio_events=True,
            language_code="ara",
            diarize=True,
        )
        raw_query = transcription.text
        query = await reformulate_query(raw_query)
    except Exception as e:
        query = f"[Transcription failed] {e}"

    user_id = update.effective_user.id  # Identify user

    # Embed and retrieve
    q_emb = model.encode([f"query: {query}"], convert_to_numpy=True, normalize_embeddings=True).astype("float32")
    k = 3
    D, I = index.search(q_emb, k)
    retrieved_chunks = [metadata[idx] for idx in I[0]]

    # Generate answer
    answer = generate_answer(query, retrieved_chunks, user_id)

    # Send text
    await update.message.reply_text(answer)

# ------------------ Build Bot ------------------
app = Application.builder().token("8440954235:AAFf1SA4l0aTHMrQwErX3w7syqKZdWdWACU").build()
app.add_handler(CommandHandler("start", start))
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_text))
app.add_handler(MessageHandler(filters.VOICE, handle_voice))

# ------------------ Run in Jupyter ------------------
async def run_bot():
    await app.initialize()
    await app.start()
    await app.updater.start_polling()
    print("✅ Full voice-interactive AI bot is running!")

asyncio.create_task(run_bot())

  from .autonotebook import tqdm as notebook_tqdm


<Task pending name='Task-1' coro=<run_bot() running at C:\Users\Lenovo\AppData\Local\Temp\ipykernel_11248\3783228165.py:159>>

✅ Full voice-interactive AI bot is running!
