<a href="https://colab.research.google.com/github/saliSoul/Q-A-ai-powered-System/blob/main/QA_Ai_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary dependencies
!pip install langchain chromadb sentence-transformers transformers rank_bm25 pypdf langchain-community huggingface_hub


In [1]:
import os
import shutil
import google.colab
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
from rank_bm25 import BM25Okapi

# Path for ChromaDB storage
CHROMA_DB_DIR = "chroma_db"



In [2]:
#Upload Files ()
def upload_files():
    """Upload PDF or TXT files to Colab."""
    print("📂 Upload your PDF or TXT files...")
    uploaded = google.colab.files.upload()
    data_dir = "/content/data"
    os.makedirs(data_dir, exist_ok=True)

    for filename in uploaded.keys():
        file_path = os.path.join(data_dir, filename)
        with open(file_path, "wb") as f:
            f.write(uploaded[filename])

    print(f"✅ Uploaded {len(uploaded)} files.")
    return data_dir


In [3]:
# Load Documents
def load_documents(directory):
    """Load text and PDF files from a directory efficiently."""
    docs = []
    for file in os.listdir(directory):
        file_path = os.path.join(directory, file)
        if file.endswith(".pdf"):
            loader = PyPDFLoader(file_path)
        elif file.endswith(".txt"):
            loader = TextLoader(file_path)
        else:
            continue
        docs.extend(loader.load())
    return docs


In [12]:
#To Upload and process new documents
data_directory = upload_files()
documents = load_documents(data_directory)

if not documents:
    raise ValueError("No valid documents found! Please upload PDF or TXT files.")

📂 Upload your PDF or TXT files...


Saving NEOV.txt to NEOV (1).txt
✅ Uploaded 1 files.


In [5]:
#Dynamic Chunking (Adjusts based on document size)
def adaptive_chunking(docs):
    """Dynamically chunk documents based on size."""
    avg_length = sum(len(doc.page_content) for doc in docs) / len(docs)
    chunk_size = min(512, int(avg_length / 2))
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=int(chunk_size * 0.2)
    )
    return text_splitter.split_documents(docs)

splits = adaptive_chunking(documents)


In [13]:
#Embedding using 'bge-base-en'
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-base-en")

#ChromaDB for Vector Storage
vector_store = Chroma.from_documents(splits, embeddings, persist_directory=CHROMA_DB_DIR)

#Reload ChromaDB
vector_store = Chroma(persist_directory=CHROMA_DB_DIR, embedding_function=embeddings)

#Using BM25 for Sparse Retrieval (Lexical Matching)
bm25 = BM25Okapi([doc.page_content.split() for doc in splits])



In [14]:

#Loads the FLAN-T5 model for generating responses
qa_pipeline = pipeline(
    "text2text-generation",
    model="google/flan-t5-large",
    max_new_tokens=200,
    temperature=0.5,
    top_p=0.85,
)

# Load the "FLAN-T5" LLM
llm = HuggingFacePipeline(pipeline=qa_pipeline)
print("LLM (FLAN-T5) initialized.")


Device set to use cpu


LLM (FLAN-T5) initialized.


In [15]:
#Prompt template
PROMPT_TEMPLATE = """
You are an AI assistant. Use ONLY the provided context to answer.

Context:
{context}

Question: {question}

Give a well-structured, accurate response.
"""

prompt = PromptTemplate(template=PROMPT_TEMPLATE, input_variables=["context", "question"])

#Hybrid Retrieval: Combines BM25 + ChromaDB for better search
def retrieve_documents(query, top_k=5):
    dense_results = vector_store.similarity_search(query, k=top_k)
    sparse_results = bm25.get_top_n(query.split(), splits, n=top_k)

    # Merge results ( to avoid duplicates)
    combined_results = {doc.page_content: doc for doc in (dense_results + sparse_results)}
    return list(combined_results.values())

In [None]:
t does neov do

In [16]:
def answer_question(question):
    """Retrieve relevant documents and generate an answer."""
    results = retrieve_documents(question)

    if not results:
        return "No relevant information found."

    context_text = "\n\n---\n\n".join([doc.page_content for doc in results])

    #Formating prompt
    formatted_prompt = prompt.format(context=context_text, question=question)

    #Get the response from LLM
    response_text = qa_pipeline(formatted_prompt)[0]["generated_text"]

    # to Extract sources
    # sources = [doc.metadata.get("source", "Unknown") for doc in results]

    return f"\n💡 Response: {response_text}"?

# 💬 **Interactive Chat**
while True:
    query = input("\n💬 Ask a question (or type 'exit' to quit): ")
    if query.lower() == "exit":
        print("Alright. Have a great day! :)")
        break

    response = answer_question(query)
    print(response)




💬 Ask a question (or type 'exit' to quit): what does neov do?





💡 Response: NEOV develops intelligent virtual agents capable of:  Automating customer support and document processing  Enhancing business decision-making through advanced natural language processing (NLP)  Providing accurate and efficient responses using machine learning algorithms 2. Workflow Automation

💬 Ask a question (or type 'exit' to quit): who is neov?

💡 Response: a rapidly growing consulting firm operating across the African continent, specializing in the insurance and fintech sectors

💬 Ask a question (or type 'exit' to quit): how many people work in neov?

💡 Response: 2-10 employees

💬 Ask a question (or type 'exit' to quit): what's neov's mission?


Token indices sequence length is longer than the specified maximum sequence length for this model (595 > 512). Running this sequence through the model will result in indexing errors



💡 Response: Empower businesses with intelligent, scalable, and accessible automation tools

💬 Ask a question (or type 'exit' to quit): is neov a person?

💡 Response: no

💬 Ask a question (or type 'exit' to quit): is neov a company/

💡 Response: yes

💬 Ask a question (or type 'exit' to quit): exit
Alright. Have a great day! :)
