# Test Processor


In [None]:
from process_pdf import PDFProcessor

In [None]:
chunker = PDFProcessor()

In [None]:
file_content = r"D:\exxonpocaws\datafiles\a500.pdf"
s3 = None
input_bucket = None

In [None]:
chunked_content = chunker.process_pdf(file_content, s3, input_bucket)

In [None]:
chunked_content

# Test Lambda Function


In [None]:
from lambda_function import lambda_handler
import json
from dotenv import load_dotenv


In [None]:

# Sample context (mock it if needed)
class Context:
    def __init__(self):
        self.aws_request_id = "test"


In [None]:
context = Context()

In [None]:
# Load the event.json file
with open('test_event.json', 'r') as file:
    event = json.load(file)

In [None]:
load_dotenv()

In [None]:
# Invoke the function
response = lambda_handler(event, context)
print(response)

# Download Models


In [None]:
from transformers import CLIPProcessor, CLIPModel

In [None]:
# Specify your local folder for storing the model
local_folder = "./local_models/clip-vit-base-patch32"

# Download and save the model and processor locally
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=local_folder)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=local_folder)

# Save the model locally
model.save_pretrained("./local_models/model/clip-vit-base-patch32")
processor.save_pretrained("./local_models/processor/clip-vit-base-patch32")

In [None]:
# Specify your local folder for storing the model
local_folder = r"C:\Users\prana\Desktop\lambda-chunking\local_models\paligemma2-3b-pt-224"
# Load model directly

from transformers import AutoProcessor, AutoModelForImageTextToText

processor = AutoProcessor.from_pretrained("google/paligemma2-3b-pt-224")
model = AutoModelForImageTextToText.from_pretrained("google/paligemma2-3b-pt-224")

# Save the model locally
model.save_pretrained(local_folder)
processor.save_pretrained(local_folder)

In [None]:
# Specify your local folder for storing the model
local_folder = r"C:\Users\prana\Desktop\lambda-chunking\local_models\Qwen2-VL-2B-Instruct"

# Load model directly
from transformers import AutoProcessor, AutoModelForImageTextToText

processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
model = AutoModelForImageTextToText.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
# Save the model locally
model.save_pretrained(local_folder)
processor.save_pretrained(local_folder)

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

In [None]:
local_folder = "./local_models/blip-image-captioning-base/"

In [None]:
pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

In [None]:
pipe.save_pretrained(local_folder)

In [None]:
from transformers import pipeline
local_folder = "./local_models/vit-gpt2-image-captioning/"
pipe = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
pipe.save_pretrained(local_folder)

random


In [None]:
num_pages = 133
num_threads=4

In [None]:
chunk_size = -(-num_pages // num_threads)  # Ceiling division for chunk size
page_ranges = [list(range(i, min(i + chunk_size, num_pages))) for i in range(0, num_pages, chunk_size)]

print(page_ranges)

In [None]:
page_ranges

In [None]:
page_ranges = [range(0,34), range(34,68), range(68, 102), range(102, 133)]

In [None]:
import pymupdf4llm
from concurrent.futures import ThreadPoolExecutor, as_completed

In [None]:
futures=[]
with ThreadPoolExecutor() as executor:
    for pages in page_ranges:
        futures.append(executor.submit(pymupdf4llm.to_markdown, r"C:\Users\prana\Downloads\archive\Manuals\coffee machine1.pdf", page_chunks=True, pages = pages))

In [None]:
futures[-1].result()[2]

In [2]:
import fitz

In [3]:
doc = fitz.open(r"D:\exxonpocaws\datafiles\a500.pdf"
) 

In [None]:
type(doc[0])

In [None]:
for i in range(0, 4):
    print(i)

In [12]:
page_count = 133
num_threads = 4
overlap = 2
page_ranges = [
    list(range(
        max(0, i * (page_count // num_threads) - overlap), 
        min(page_count, (i + 1) * (page_count // num_threads) + overlap)
    ))
    for i in range(num_threads)
]

In [None]:
page_ranges

In [None]:
import pymupdf4llm
import fitz

In [None]:
pages = pymupdf4llm.to_markdown(r"C:\Users\prana\Downloads\archive\Manuals\exercise bikes.pdf", page_chunks=True)

In [None]:
pages[26]

In [53]:
from transformers import AutoProcessor, AutoModelForCausalLM  
from PIL import Image
import requests
import copy
import torch
%matplotlib inline

In [54]:
model_id = './local_models/Florence-2-base'
model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, torch_dtype='auto').eval().cuda()
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)

In [55]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

In [49]:
def run_example(task_prompt, text_input=None):
    if text_input is None:
        prompt = task_prompt
    else:
        prompt = task_prompt + text_input
    inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype)
    generated_ids = model.generate(
      input_ids=inputs["input_ids"],
      pixel_values=inputs["pixel_values"],
      max_new_tokens=1024,
      num_beams=3
    )
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]

    parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))

    print(parsed_answer)

In [51]:
task_prompt = '<DETAILED_CAPTION>'
context = "goku just fought someone"
image = Image.open(r"C:\Users\prana\Desktop\lambda-chunking\download.jpg")
run_example(task_prompt)

{'<DETAILED_CAPTION>': 'This is an animated image. In this image we can see a person wearing goggles. In the back there is a wall. Also there are trees.'}


In [None]:
# Load the image
img_path = "download.jpg"
instructions = "Using the following context from the page, summarize the image for RAG using relevant information."
markdown_context = "Goku chillin"

try:
    # Open the image directly from the file path
    image = Image.open(img_path)

    # Prepare input for the Qwen model (adjust structure if pipeline requires specific formatting)
    full_input = [
        {
            "role": "user",
            "content": [
                {"type": "image"},
                {"type": "text", "text": f"{instructions}\n{markdown_context}"}
            ]
        }
    ]

    # Generate a summary using the pipeline
    result = pipe(text=full_input, images=[image])  # Replace 'pipe' with your actual pipeline object
    summary = result[0]["generated_text"] if result else "No summary generated"
except Exception as e:
    print(f"Error generating summary: {e}")
    summary = "Summary generation failed"

print(summary)