# Installation of python packages

In [None]:
!pip install pdfplumber
!pip install pytesseract
!pip install pdf2image

# Import the packages and setup varibales for execution

In [None]:
import os
import requests
import pdfplumber
import pytesseract
from pdf2image import convert_from_path

# Local server endpoint address
api_url="http://localhost:1234/v1/chat/completions"
# model API identifier <-- Replace with model that you download and provision in LM Studio
model_name="phi-3.1-mini-128k-instruct"
# Path to your PDF file <-- Replace with your PDF file path
pdf_file = "TataComms-q4-fy2025-1.pdf"

# Python function defintion for extracting text from PDF

In [None]:

def extract_text_from_pdf(pdf_path):
    """Try extracting text using pdfplumber, fallback to OCR if needed."""
    try:
        with pdfplumber.open(pdf_path) as pdf:
            text = "\n".join([p.extract_text() or "" for p in pdf.pages]).strip()
        if text:
            return text
    except Exception as e:
        print("pdfplumber failed:", e)

    print("Using OCR fallback...")
    images = convert_from_path(pdf_path)
    ocr_text = ""
    for img in images:
        ocr_text += pytesseract.image_to_string(img)
    return ocr_text.strip()


# Python function defintion for QnA and calling the LLM deployed in LM Studio

In [None]:
def ask_pdf_question(pdf_text, question):
    """Sends the PDF content and user question to the model."""
    prompt = f"You are given the following document:\n\n{pdf_text[:3000]}\n\nAnswer the following question based on it:\n\n{question}"

    payload = {
        "model": model_name,
        "messages": [
            {"role": "system", "content": "You are a helpful assistant that answers questions based on provided documents."},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.3,
        "max_tokens": 1024
    }

    response = requests.post(api_url, json=payload)
    if response.status_code == 200:
        return response.json()['choices'][0]['message']['content']
    else:
        raise Exception(f"API Error {response.status_code}: {response.text}")

# Main function for initiating the QnA Chatbot and calling other functions

In [None]:

if __name__ == "__main__":
    #pdf_path = input("Enter the path to your PDF file: ").strip()

    if not os.path.exists(pdf_file):
        print("PDF file not found.")
        exit(1)

    print("Extracting text from PDF...")
    pdf_text = extract_text_from_pdf(pdf_file)

    if not pdf_text:
        print("No text could be extracted.")
        exit(1)

    print("\nYou can now ask questions about the PDF.\nType 'exit' to quit.")

    while True:
        question = input("\nAsk a question: ").strip()
        if question.lower() in ["exit", "quit"]:
            break
        try:
            print("\nGenerating...\n")
            answer = ask_pdf_question(pdf_text, question)
            print("\nAnswer:\n", answer)
        except Exception as e:
            print("Error:", e)
