# Gemini PDF worker

In [7]:
import json
from pypdf import PdfReader
from dotenv import load_dotenv
from google import genai
from google.genai import types
import base64
import os

project_model = "gemini-2.5-flash-preview-04-17"

# 1. Load your .env.local
load_dotenv(dotenv_path=".env.local")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

# 2. Instantiate the client
client = genai.Client(api_key=GEMINI_API_KEY)

In [8]:
# 3. Ping 1: list available models
#    (this verifies your key + network)
for model in client.models.list():
    print(model)

name='models/embedding-gecko-001' display_name='Embedding Gecko' description='Obtain a distributed representation of a text.' version='001' endpoints=None labels=None tuned_model_info=TunedModelInfo(base_model=None, create_time=None, update_time=None) input_token_limit=1024 output_token_limit=1 supported_actions=['embedText', 'countTextTokens'] default_checkpoint_id=None checkpoints=None
name='models/gemini-1.0-pro-vision-latest' display_name='Gemini 1.0 Pro Vision' description='The original Gemini 1.0 Pro Vision model version which was optimized for image understanding. Gemini 1.0 Pro Vision was deprecated on July 12, 2024. Move to a newer Gemini version.' version='001' endpoints=None labels=None tuned_model_info=TunedModelInfo(base_model=None, create_time=None, update_time=None) input_token_limit=12288 output_token_limit=4096 supported_actions=['generateContent', 'countTokens'] default_checkpoint_id=None checkpoints=None
name='models/gemini-pro-vision' display_name='Gemini 1.0 Pro Vi

In [9]:
# Load environment variables (.env.local) if present
load_dotenv(dotenv_path=".env.local")

def extract_fields_with_coords(pdf_path):
    """
    Extracts AcroForm fields with their IDs and coordinates from a PDF.
    Returns a list of dicts with field_id, page, and coords.
    """
    reader = PdfReader(pdf_path)
    fields = []
    for page_num, page in enumerate(reader.pages, start=1):
        annots = page.get("/Annots", [])
        for annot in annots:
            obj = annot.get_object()
            name = obj.get("/T")
            rect = obj.get("/Rect")
            if name and rect:
                x1, y1, x2, y2 = rect
                fields.append({
                    "field_id": name,
                    "page": page_num,
                    "coords": [x1, y1, x2, y2],
                })
    return fields

def test_gemini_with_instructions(raw_fields, instructions_path, language="en"):
    """
    Builds a JSON prompt combining raw_fields with the instruction PDF path,
    simulates a Gemini call, and returns enriched field schemas.
    """
    # Read instructions bytes (base64-encoded for embedding in prompt example)
    with open(instructions_path, "rb") as f:
        inst_bytes = f.read()
    inst_b64 = base64.b64encode(inst_bytes).decode('utf-8')
    
    # Build prompt payload
    prompt_payload = {
        "fields": raw_fields,
        "instructions_pdf_b64": inst_b64,
        "language": language
    }
    print("=== Prompt Payload to Gemini ===")
    print(json.dumps(prompt_payload, indent=2)[:500] + "\n...")  # truncated for readability
    
    # Simulate Gemini response
    enriched = []
    for f in raw_fields:
        enriched.append({
            "field_id": f["field_id"],
            "label": f"Label for {f['field_id']}",
            "page": f["page"],
            "coords": f["coords"],
            "gemini_note": f"Explain {f['field_id']} in simple terms.",
            "examples": [f"Example for {f['field_id']}"]
        })
    return enriched

# Example usage
form_pdf    = "static/forms/I-765/form.pdf"
inst_pdf    = "static/forms/I-765/inst.pdf"

raw_fields  = extract_fields_with_coords(form_pdf)
print("Extracted Raw Fields:", raw_fields[:3], "...\n")

enriched_fields = test_gemini_with_instructions(raw_fields[:5], inst_pdf, language="en")
print("Enriched Fields Sample:", json.dumps(enriched_fields, indent=2))


Extracted Raw Fields: [{'field_id': 'PDF417BarCode1[0]', 'page': 1, 'coords': [191.999, 11.999, 461.999, 29.999]}, {'field_id': 'Line1a_FamilyName[0]', 'page': 1, 'coords': [120.002, 132.001, 294.001, 150.001]}, {'field_id': 'Line1b_GivenName[0]', 'page': 1, 'coords': [120.002, 108.006, 294.001, 126.006]}] ...

=== Prompt Payload to Gemini ===
{
  "fields": [
    {
      "field_id": "PDF417BarCode1[0]",
      "page": 1,
      "coords": [
        191.999,
        11.999,
        461.999,
        29.999
      ]
    },
    {
      "field_id": "Line1a_FamilyName[0]",
      "page": 1,
      "coords": [
        120.002,
        132.001,
        294.001,
        150.001
      ]
    },
    {
      "field_id": "Line1b_GivenName[0]",
      "page": 1,
      "coords": [
        120.002,
        108.006,
        294.001,
        126.006
      ]
  
...
Enriched Fields Sample: [
  {
    "field_id": "PDF417BarCode1[0]",
    "label": "Label for PDF417BarCode1[0]",
    "page": 1,
    "coords": [
      1