In [None]:
!pip install python-telegram-bot --quiet
!pip install langchain==0.2.14 langchain-nvidia-ai-endpoints==0.2.1 langchain-community==0.2.12 langchain-core==0.2.33
!pip install faiss-cpu==1.8.0 #Facebook AI Similarity Search
!pip install PyMuPDF==1.24.4 #for PDF parsing and content extraction
!pip install beautifulsoup4 #to extract data from web pages or documents.



In [None]:
import os
import re
import logging
import requests
from bs4 import BeautifulSoup
from telegram import Update
from telegram.ext import ApplicationBuilder, CommandHandler, MessageHandler, filters, ContextTypes, ConversationHandler
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.memory import ConversationBufferMemory
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT

In [None]:
import getpass

if not os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
    nvapi_key = getpass.getpass("Enter your NVIDIA API key: ")
    assert nvapi_key.startswith("nvapi-"), "Invalid NVIDIA API key."
    os.environ["NVIDIA_API_KEY"] = nvapi_key
#Key:  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

Enter your NVIDIA API key: ··········


In [None]:
def html_document_loader(url): #prepares web content for downstream NLP tasks, such as vector embedding.
    try:
        response = requests.get(url) # Send GET request to fetch web page content
        soup = BeautifulSoup(response.text, "html.parser") # Parse HTML content using BeautifulSoup
        for tag in soup(["script", "style"]): tag.decompose()
        return re.sub("\s+", " ", soup.get_text()).strip() # Extract, normalize, and clean up the visible text content


    except Exception as e:
        print(f"Failed to load {url}: {e}")
        return ""

def create_embeddings(embedding_path="./data/crisis_embedding"): #reusable pipeline to build & update a local semantic search database from a list of URLs.
    urls = [
        "https://petra.gov.jo/Include/Main.jsp?lang=en",
        "https://www.aljazeera.com/",
        "https://jordan.gov.jo/EN/ListDetails/Government_Entities/55/6",
        "https://www.ncscm.gov.jo/EN/Pages/Blocking_Water_and_Floods",
        "https://www.ncscm.gov.jo/EN/Pages/Earthquakes",
        "https://www.ncscm.gov.jo/EN/Pages/Snowfall",
        "https://www.jordannews.jo/Section-109/News/Crisis-Management-Center-launches-drill-Sunday-27483",
        "https://www.ncscm.gov.jo/EN/Pages/Contact_Us",
        "https://www.icrc.org/en",
        "https://jordantimes.com/",
        "https://www.jnrcs.org/ar/latestnews",
        "https://www.thepersonal.com/blog/-/what-to-do-in-case-of-a-natural-disaster",
        "https://www.getprepared.gc.ca/cnt/kts/index-en.aspx"

    ]
    documents = [html_document_loader(url) for url in urls]
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    texts = text_splitter.create_documents(documents)
    embeddings = NVIDIAEmbeddings(model="NV-Embed-QA", truncate="END") # Initialize NVIDIA embedding model for text chunk vectorization

    if os.path.exists(embedding_path):
        vectorstore = FAISS.load_local(folder_path=embedding_path, embeddings=embeddings, allow_dangerous_deserialization=True)
        vectorstore.add_documents(texts)
    else:
        vectorstore = FAISS.from_documents(texts, embedding=embeddings)

    vectorstore.save_local(folder_path=embedding_path)
    return vectorstore

embedding_path = "./data/crisis_embedding" #path where the FAISS vectorstore is stored
if not os.path.exists(embedding_path):
    create_embeddings(embedding_path)

embedding_model = NVIDIAEmbeddings(model="NV-Embed-QA", truncate="END")
docsearch = FAISS.load_local(folder_path=embedding_path, embeddings=embedding_model, allow_dangerous_deserialization=True) # entry point for retrieval operations


In [None]:

llm = ChatNVIDIA(model="meta/llama3-70b-instruct") # Initialize the LLaMA 3 model for question rephrazing
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) # memory buffer to maintain chat history (context-aware conversations)
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT) # Create a chain to rephrases follow-up questions.
chat = ChatNVIDIA(model="mistralai/mixtral-8x7b-instruct-v0.1", temperature=0.1, max_tokens=1000, top_p=1.0) #mistral nvidia model Answering questions using retrieved documents
doc_chain = load_qa_chain(chat, chain_type="stuff", prompt=QA_PROMPT) # Create a QA chain to answer questions using retrieved documents and nvidia model.
qa = ConversationalRetrievalChain(
    retriever=docsearch.as_retriever(),
    combine_docs_chain=doc_chain,
    memory=memory,
    question_generator=question_generator,
)


  warn_deprecated(
stuff: https://python.langchain.com/v0.2/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/v0.2/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/v0.2/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/v0.2/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/v0.2/docs/how_to/#qa-with-rag
  warn_deprecated(
  warn_deprecated(


In [None]:
import asyncio

logging.basicConfig(level=logging.INFO)
ASK_TYPE, ASK_LOCATION, CHAT = range(3) # Define conversation states for crisis type, location, and Q&A
user_context = {} # Store each user data .

async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
    await update.message.reply_text("💬Welcome! What type of crisis are you facing?")
    return ASK_TYPE

# Save user's crisis type and prompt for location
async def ask_location(update: Update, context: ContextTypes.DEFAULT_TYPE):
    user_context[update.effective_chat.id] = {"crisis": update.message.text}
    await update.message.reply_text("📌Please share your location (city/area)?")
    return ASK_LOCATION

# Run RAG model on crisis + location input and share response; then ask for follow-up
async def provide_advice(update: Update, context: ContextTypes.DEFAULT_TYPE):
    user_id = update.effective_chat.id
    user_context[user_id]["location"] = update.message.text
    crisis_info = f"Crisis: {user_context[user_id]['crisis']} in {user_context[user_id]['location']}"
    response = qa.run(crisis_info)
    await update.message.reply_text(response)
    await update.message.reply_text("Do you have more questions about this crisis?")
    return CHAT

# Handle general follow-up questions or end conversation if user sends /stop
async def continue_conversation(update: Update, context: ContextTypes.DEFAULT_TYPE):
    if update.message.text.strip().lower() == "/stop":
        await update.message.reply_text("Stay safe. Ending conversation.")
        return ConversationHandler.END
    response = qa.run(update.message.text)
    await update.message.reply_text(response)
    return CHAT

async def stop(update: Update, context: ContextTypes.DEFAULT_TYPE):
    await update.message.reply_text("Conversation stopped. Stay safe!")
    return ConversationHandler.END

app = ApplicationBuilder().token("8037648144:AAGgsMLgV_zPU7iEY1e1MxdnWQJ6g21B_Yw").build() # Initialize Telegram bot with provided token

# Define conversation flow for start, collecting info, and chatting with QA model
handler = ConversationHandler(
    entry_points=[CommandHandler("start", start)],
    states={
        ASK_TYPE: [MessageHandler(filters.TEXT & ~filters.COMMAND, ask_location)],
        ASK_LOCATION: [MessageHandler(filters.TEXT & ~filters.COMMAND, provide_advice)],
        CHAT: [MessageHandler(filters.TEXT & ~filters.COMMAND, continue_conversation)],
    },
    fallbacks=[CommandHandler("stop", stop)],
)

app.add_handler(handler)


# Async function to start and keep the bot running inside Colab
async def run_bot():
    await app.initialize()
    await app.start()
    print("Bot started. Send /start to your bot.")
    await app.updater.start_polling()
    # Keep it running forever unless stopped manually
    await asyncio.Event().wait()

# Start the bot in the background event loop
await run_bot()


Bot started. Send /start to your bot.


  warn_deprecated(
