In [1]:
import gradio as gr
import wikipedia
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_together import ChatTogether
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

# ---------- Global cache
retriever_cache = {}

def fetch_wikipedia(_topic: str):
    """Fetch and split a Wikipedia page into <500-token chunks."""
    try:
        content = wikipedia.page(_topic).content
    except Exception as e:
        raise RuntimeError(f"Couldn’t fetch “{_topic}”: {e}")
    
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = splitter.split_text(content)
    if len(chunks) < 2:
        raise RuntimeError("⚠️ Article too short.")
    return [Document(page_content=ch) for ch in chunks]

def build_retriever(docs):
    """Build a FAISS retriever from the documents."""
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(docs, embeddings)
    return vectorstore.as_retriever()

# ---------- LLM & Chain Setup
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant. Use the context to answer the user’s question."),
        ("human", "Context:\n{context}\n\nQuestion: {input}"),
    ]
)

llm = ChatTogether(
    model="meta-llama/Llama-3-70b-chat-hf",
    api_key="60ca17c8ea1bd4c7149e011dcbc7146b0b28712e4d9d7c15a926d1df1749ad52",
    temperature=0.3,
)

def qa_answer(topic, question):
    """Main function for Gradio interface."""
    if topic not in retriever_cache:
        try:
            docs = fetch_wikipedia(topic)
            retriever = build_retriever(docs)
            retriever_cache[topic] = retriever
        except RuntimeError as e:
            return str(e)
    else:
        retriever = retriever_cache[topic]

    combine_chain = create_stuff_documents_chain(llm, prompt)
    qa_chain = create_retrieval_chain(retriever=retriever, combine_docs_chain=combine_chain)
    result = qa_chain.invoke({"input": question})
    return result.get("answer", "No answer found.")

# ---------- Gradio UI
demo = gr.Interface(
    fn=qa_answer,
    inputs=[
        gr.Textbox(label="Wikipedia Article Title", value="Sleep and memory"),
        gr.Textbox(label="Your Question", placeholder="What is the relationship between sleep and memory?")
    ],
    outputs=gr.Textbox(label="Answer"),
    title="📚 Wikipedia RAG Chatbot",
    description="Enter a Wikipedia topic and ask a question. Uses retrieval-augmented generation with FAISS, Hugging Face embeddings, and LLaMA 3 70B."
)

if __name__ == "__main__":
    demo.launch()


  from .autonotebook import tqdm as notebook_tqdm


* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")



