# Simple Wikipedia based RAG Model

Scrapes Wikipedia for context used to tailor answers by a small LLM. Fit to run on CPUs/mobiles.

Built by Pratham Shah for the Cryptonite Taskphase.

In [1]:
import requests
import stanza
from langchain.chains import ConversationalRetrievalChain
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain.memory import ConversationBufferMemory
from langchain_ollama import OllamaLLM

In [2]:
stanza.download('en', verbose=False)
nlp = stanza.Pipeline('en', processors='tokenize,ner')

In [None]:
def get_focus(text):
    doc = nlp(text)
    entities = [ent.text for ent in doc.ents if ent.type in ('PERSON', 'ORG', 'LOC', 'MISC', 'EVENT', 'WORK_OF_ART')]
    print(entities)
    return entities or [text]

In [4]:
def search(query, limit=3):
    url = "https://en.wikipedia.org/w/api.php"
    params = {
        'action': 'query',
        'list': 'search',
        'srsearch': query,
        'format': 'json',
        'srlimit': limit
    }
    resp = requests.get(url, params=params).json()
    return [item['title'] for item in resp['query']['search']]

def wikipedia_content(title):
    url = "https://en.wikipedia.org/w/api.php"
    params = {
        'action': 'query',
        'prop': 'extracts',
        'explaintext': True,
        'titles': title,
        'format': 'json'
    }
    resp = requests.get(url, params=params).json()
    pages = resp['query']['pages']
    page = next(iter(pages.values()))
    return page.get('extract', '')

In [5]:
def get_docs_for_query(query):
    entities = get_focus(query)
    docs = []
    text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
    for entity in entities:
        titles = search(entity)
        for title in titles:
            content = wikipedia_content(title)
            if content:
                chunks = text_splitter.split_text(content)
                for chunk in chunks:
                    docs.append(Document(page_content=chunk, metadata={"source": title}))
    return docs

In [6]:
def build_faiss_index(documents):
    embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(documents, embeddings)
    return vectorstore

In [7]:
def create_chat_chain():
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    
    llm = OllamaLLM(model="tinyllama")
    dummy_docs = [Document(page_content="Init doc")]
    
    vectorstore = build_faiss_index(dummy_docs)
    retriever = vectorstore.as_retriever(search_kwargs={"k":3})
    chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory)
    return chain, vectorstore, memory

In [8]:
def update_vectorstore(chain, vectorstore, query):
    docs = get_docs_for_query(query)
    if docs:
        vectorstore = build_faiss_index(docs)
        chain.retriever = vectorstore.as_retriever(search_kwargs={"k":3})
    return chain, vectorstore

In [9]:
def chat():
    print("Wikipedia RAGbot! (type exit/quit to stop)")
    chain, vectorstore, memory = create_chat_chain()
    while True:
        query = input("\nYou: ")
        if query.lower() in ['exit', 'quit']:
            print("thank you!")
            break
        chain, vectorstore = update_vectorstore(chain, vectorstore, query)
        result = chain.run(query)
        print("RAG Model:", result)


In [None]:
chat()

Wikipedia RAGbot! (type exit/quit to stop)


  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
  embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
2025-07-24 19:09:01.644684: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-24 19:09:01.739360: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753364341.774817    5746 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753364341.785066    5746 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has alread

nothing found!


  result = chain.run(query)
