# Gemini PDF worker

In [1]:
# ─── Imports ────────────────────────────────────────────────────────────────
import json, base64, os
from dotenv import load_dotenv
from pypdf import PdfReader
from google import genai
from google.genai import types

In [2]:
# ─── Setup ────────────────────────────────────────────────────────────────
MODEL = "gemini-2.5-flash-preview-04-17"

load_dotenv(dotenv_path=".env.local")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

client = genai.Client(api_key=GEMINI_API_KEY)

In [3]:
# ─── Helpers ──────────────────────────────────────────────────────────────
def extract_fields_with_coords(pdf_path):
    reader = PdfReader(pdf_path)
    fields = []
    for page_num, page in enumerate(reader.pages, start=1):
        for annot in page.get("/Annots", []):
            obj = annot.get_object()
            name = obj.get("/T")
            rect = obj.get("/Rect")
            if name and rect:
                x1, y1, x2, y2 = rect
                fields.append({
                    "field_id": name,
                    "page":     page_num,
                    "coords":   [x1, y1, x2, y2],
                })
    return fields

In [4]:
# ─── Real Gemini Call ─────────────────────────────────────────────────────
def enrich_fields_with_gemini(raw_fields, instructions_path, language="en"):
    # 1) Read & encode instructions PDF
    with open(instructions_path, "rb") as f:
        inst_b64 = base64.b64encode(f.read()).decode("utf-8")

    # 2) Build your prompt (JSON-in, JSON-out)
    prompt = f"""
I have a USCIS form with these fields (field_id, page, coords):
{json.dumps(raw_fields, indent=2)}

I also have the form instructions PDF encoded in base64:
{inst_b64}

Please, for each field, output a JSON object with keys:
  - field_id
  - label           (short human label)
  - page
  - coords
  - dependencies    (list of other field_ids)
  - gemini_note     (plain-English explanation)
  - examples        (list with one example)
Return strictly valid JSON in the format: {{ "fields": [ ... ] }}.
Language: {language}
"""

    # 3) Call Gemini
    response = client.models.generate_content(
        model=MODEL,
        contents=[
            types.Content(
                role="user", 
                parts=[ 
                    types.Part.from_text(text=prompt) 
                ]
            )
        ],
        config=types.GenerateContentConfig(
            thinking_config=types.ThinkingConfig(thinking_budget=0),
            response_mime_type="application/json",
        )
    )

    # 4) Parse & return
    result = json.loads(response.text)
    return result["fields"]

In [5]:
form_pdf = "static/forms/I-765/form.pdf"
inst_pdf = "static/forms/I-765/inst.pdf"

raw = extract_fields_with_coords(form_pdf)
enriched = enrich_fields_with_gemini(raw, inst_pdf, language="en")

print(json.dumps(enriched, indent=2))

[
  {
    "field_id": "PDF417BarCode1[0]",
    "label": "PDF417 Bar Code",
    "page": 1,
    "coords": [
      191.999,
      11.999,
      461.999,
      29.999
    ],
    "dependencies": [],
    "gemini_note": "This field is for the PDF417 barcode.",
    "examples": [
      ""
    ]
  },
  {
    "field_id": "Line1a_FamilyName[0]",
    "label": "Family Name",
    "page": 1,
    "coords": [
      120.002,
      132.001,
      294.001,
      150.001
    ],
    "dependencies": [],
    "gemini_note": "This field is for the applicant's family name.",
    "examples": [
      "SMITH"
    ]
  },
  {
    "field_id": "Line1b_GivenName[0]",
    "label": "Given Name",
    "page": 1,
    "coords": [
      120.002,
      108.006,
      294.001,
      126.006
    ],
    "dependencies": [],
    "gemini_note": "This field is for the applicant's given name.",
    "examples": [
      "JOHN"
    ]
  },
  {
    "field_id": "Line1c_MiddleName[0]",
    "label": "Middle Name",
    "page": 1,
    "coords": [