In [11]:
# pdf_qa_debug.py

from PROJECTS.ai_analyst_assistant1.pdf_loader import extract_text_from_pdf
from PROJECTS.ai_analyst_assistant1.qa_engine import chunk_text, embed_chunks, create_faiss_index, search_similar_chunks
from PROJECTS.ai_analyst_assistant1.llm_answer import generate_answer


# ----------------------------
# Main Script
# ----------------------------
def run_pdf_qa(pdf_path):
    print(f"üìÑ Reading PDF: {pdf_path}")
    with open(pdf_path, "rb") as f:
        pages = extract_text_from_pdf(f)

    if not pages or all(not p["text"].strip() for p in pages):
        print("‚ùå No valid text extracted from PDF.")
        return

    print("üìö Chunking and indexing...")
    chunks = chunk_text(pages)
    embeddings, texts = embed_chunks(chunks)
    index = create_faiss_index(embeddings)

    print(f"‚úÖ {len(chunks)} chunks created.\n")
    print("‚úÖ Ready. Ask questions below (type 'exit' to quit):\n")

    while True:
        question = input("üí¨ Question: ").strip()
        if question.lower() in ["exit", "quit"]:
            break

        context_chunks = search_similar_chunks(index, question, texts, top_k=7)
        combined_context = "\n\n".join(context_chunks)

        # Debug: Show context if needed
        print("\nüîç Top context:\n" + combined_context[:500] + "\n...\n")

        answer = generate_answer(question, combined_context)

        print("\nüßæ Your Question:")
        print(f"‚ùì {question}")
        print("\nüí° Answer:")
        print(f"{answer}\n{'-' * 60}")


# ----------------------------
# Run It
# ----------------------------
if __name__ == "__main__":
    # Change this path to test different PDFs
    pdf_path = r"C:\Users\anmol\Downloads\sample-report.pdf"
    run_pdf_qa(pdf_path)


üìÑ Reading PDF: C:\Users\anmol\Downloads\sample-report.pdf




üìö Chunking and indexing...
‚úÖ 18 chunks created.

‚úÖ Ready. Ask questions below (type 'exit' to quit):



The following generation flags are not valid and may be ignored: ['temperature', 'top_k']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



üîç Top context:
Table of Contents 1.

Multi-Page Report ‚ÄúA comprehensive and content-heavy report that includes text, images, and tables for thorough testing of pagination and complex layouts.‚Äù Prepared By Sample Team sample-files.com

Product Overview 5.

Results & Discussion 6.

Introduction 2.

Sales Projections 8.

Marketing Strategy 7.
...


üßæ Your Question:
‚ùì List the table of contents

üí° Answer:
1. Multi-Page Report
------------------------------------------------------------
