In [None]:
# Install required libraries if not already installed
# !pip install pdf2image
# !pip install pdfminer
# !pip install tqdm
# !pip install llama_stack_client

# Required imports
from pdf2image import convert_from_path
from pdfminer.high_level import extract_text
from io import BytesIO
import base64
import os
import concurrent.futures
from tqdm import tqdm
import json
from llama_stack_client import LlamaStackClient
from llama_stack_client.types import SystemMessage, UserMessage

# Function to convert PDF to images
def convert_doc_to_images(pdf_path):
    try:
        images = convert_from_path(pdf_path)
        return images
    except Exception as e:
        print(f"Error converting PDF to images: {e}")
        return []

# Function to extract text from PDF
def extract_text_from_pdf(pdf_path):
    try:
        text = extract_text(pdf_path)
        return text
    except Exception as e:
        print(f"Error extracting text from PDF: {e}")
        return ""

# Function to convert image to base64 for LlamaStack analysis
def get_img_uri(img):
    buffer = BytesIO()
    img.save(buffer, format="jpeg")
    base64_image = base64.b64encode(buffer.getvalue()).decode("utf-8")
    data_uri = f"data:image/jpeg;base64,{base64_image}"
    return data_uri

# LlamaStack inference function
def analyze_image_llama_stack(img_uri, client):
    system_prompt = '''
    You will be provided with an image of a pdf page or a slide. Your goal is to describe the content of the image in detail.
    Do not mention the format of the image or page numbers, but focus on explaining the contents as if you are presenting it to a technical audience.
    '''

    response = client.inference.chat_completion(
        messages=[
            SystemMessage(content=system_prompt, role="system"),
            UserMessage(
                content=f"Here is the image: {img_uri}",
                role="user",
            ),
        ],
        model="Llama3.2-11B-Vision-Instruct",
    )

    return response.completion_message.content

# Function to process a single PDF and analyze its pages
def process_pdf(pdf_path, client):
    doc = {"filename": os.path.basename(pdf_path)}

    # Extract text
    doc['text'] = extract_text_from_pdf(pdf_path)

    # Convert to images
    images = convert_doc_to_images(pdf_path)

    # Analyze images with LlamaStack
    if images:
        pages_description = []
        with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
            futures = [executor.submit(analyze_image_llama_stack, get_img_uri(img), client) for img in images[1:]]  # Skipping first page if desired

            with tqdm(total=len(images) - 1) as pbar:
                for _ in concurrent.futures.as_completed(futures):
                    pbar.update(1)

            for f in futures:
                result = f.result()
                pages_description.append(result)

        doc['pages_description'] = pages_description

    return doc

# Initialize LlamaStack client
client = LlamaStackClient(base_url="http://localhost:5000")  # Replace with your actual LlamaStack base URL

# Example usage with your PDF file path
pdf_file_path = "/mnt/data/your_pdf_file.pdf"  # Adjust path as needed

# Process the PDF and get the result
doc_data = process_pdf(pdf_file_path, client)

# Saving result to JSON for later use
output_path = "/mnt/data/processed_pdf_data.json"
with open(output_path, 'w') as f:
    json.dump(doc_data, f)

print(f"Processed PDF data saved to {output_path}")
