# Gemini PDF to iTELL JSON Conversion Test

In [15]:
import json
import requests
import os
import base64
from pathlib import Path

from google import genai
from dotenv import load_dotenv

# Load environment variables
load_dotenv()


client = genai.Client(
    api_key=os.getenv("GEMINI_API_KEY"),
)

# File paths
pdf_path = "../data/input-docs/split-pdfs/USCIS Chapter 1.pdf"
example_json_path = "../data/reference-json/communication-for-business-communication-for-business-success_page001_18-1-intercultural-communication.json"
md_instructions_path = "../doc/guide-to-itell-json.md"

In [16]:
# Load example JSON and instructions
with open(example_json_path, "r", encoding="utf-8") as f:
    example_json = json.load(f)

with open(md_instructions_path, "r", encoding="utf-8") as f:
    md_instructions = f.read()

with open(pdf_path, "rb") as f:
    pdf_b64 = base64.b64encode(f.read()).decode("utf-8")

In [17]:
# Construct the prompt
def construct_prompt(md_instructions, example_json):
    prompt = f"""You are an expert content converter specializing in educational materials. Your task is to convert the provided PDF document into iTELL JSON format.

INSTRUCTIONS:
{md_instructions}

EXAMPLE OUTPUT FORMAT:
```json
{json.dumps(example_json, indent=2)}
```

Please convert the PDF document into iTELL JSON format following the instructions and example provided. Ensure proper chunking, appropriate headers, and maintain the educational structure. Return only valid JSON."""
    return prompt


prompt = construct_prompt(md_instructions, example_json)
# print(prompt)

In [18]:
def convert(pdf_path, prompt):
    completion = client.models.generate_content(
        model="gemini-2.5-flash",
      contents=[
          genai.types.Part.from_bytes(
            data=Path(pdf_path).read_bytes(),
            mime_type='application/pdf',
          ),
          prompt
      ])

    return completion.text.strip()


content = convert(pdf_path, prompt)
print(content)

```json
{
  "Title": "The U.S. Constitution",
  "Description": "This volume provides an introduction to the U.S. Constitution, covering its historical context, fundamental principles, governmental structure, and the amendment process. It serves as a guide for understanding the supreme law of the United States for civics education.",
  "VolumeSummary": "The U.S. Constitution, written in 1787, is the oldest written constitution globally, establishing the government and protecting basic rights of people in the United States. It begins with the Preamble, emphasizing self-government through the words 'We the People.' The Constitution organizes the federal government into three branches—Legislative, Executive, and Judicial—each with distinct powers, ensuring a separation of powers to prevent any single branch from becoming too powerful. It outlines federal powers, such as declaring war and printing money, and also defines powers reserved for state governments, including public safety and edu