In [11]:
# pdf_qa_debug.py

from PROJECTS.ai_analyst_assistant1.pdf_loader import extract_text_from_pdf
from PROJECTS.ai_analyst_assistant1.qa_engine import chunk_text, embed_chunks, create_faiss_index, search_similar_chunks
from PROJECTS.ai_analyst_assistant1.llm_answer import generate_answer


# ----------------------------
# Main Script
# ----------------------------
def run_pdf_qa(pdf_path):
    print(f"📄 Reading PDF: {pdf_path}")
    with open(pdf_path, "rb") as f:
        pages = extract_text_from_pdf(f)

    if not pages or all(not p["text"].strip() for p in pages):
        print("❌ No valid text extracted from PDF.")
        return

    print("📚 Chunking and indexing...")
    chunks = chunk_text(pages)
    embeddings, texts = embed_chunks(chunks)
    index = create_faiss_index(embeddings)

    print(f"✅ {len(chunks)} chunks created.\n")
    print("✅ Ready. Ask questions below (type 'exit' to quit):\n")

    while True:
        question = input("💬 Question: ").strip()
        if question.lower() in ["exit", "quit"]:
            break

        context_chunks = search_similar_chunks(index, question, texts, top_k=7)
        combined_context = "\n\n".join(context_chunks)

        # Debug: Show context if needed
        print("\n🔍 Top context:\n" + combined_context[:500] + "\n...\n")

        answer = generate_answer(question, combined_context)

        print("\n🧾 Your Question:")
        print(f"❓ {question}")
        print("\n💡 Answer:")
        print(f"{answer}\n{'-' * 60}")


# ----------------------------
# Run It
# ----------------------------
if __name__ == "__main__":
    # Change this path to test different PDFs
    pdf_path = r"C:\Users\anmol\Downloads\sample-report.pdf"
    run_pdf_qa(pdf_path)


📄 Reading PDF: C:\Users\anmol\Downloads\sample-report.pdf




📚 Chunking and indexing...
✅ 18 chunks created.

✅ Ready. Ask questions below (type 'exit' to quit):



The following generation flags are not valid and may be ignored: ['temperature', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



🔍 Top context:
Table of Contents 1.

Multi-Page Report “A comprehensive and content-heavy report that includes text, images, and tables for thorough testing of pagination and complex layouts.” Prepared By Sample Team sample-files.com

Product Overview 5.

Results & Discussion 6.

Introduction 2.

Sales Projections 8.

Marketing Strategy 7.
...


🧾 Your Question:
❓ List the table of contents

💡 Answer:
1. Multi-Page Report
------------------------------------------------------------
