# OpenAI PDF to iTELL JSON Conversion Test

In [1]:
import openai
import json
import requests
from dotenv import load_dotenv
import os
import base64
from pathlib import Path

# Load environment variables
load_dotenv()
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# File paths
pdf_path = "../data/input-docs/Civics Test Text-first-4-chapters.pdf"
example_output_path = "../data/reference-json/Civics Test Text iTELL JSON.json"
md_instructions_path = "../doc/guide-to-itell-json.md"
strapi_url = os.getenv("CMS_URL", "http://localhost:1337")
strapi_key = os.getenv("CMS_KEY")

In [2]:
# Load example JSON and instructions
with open(example_output_path, "r", encoding="utf-8") as f:
    example_json = json.load(f)

with open(md_instructions_path, "r", encoding="utf-8") as f:
    md_instructions = f.read()

with open(pdf_path, "rb") as f:
    pdf_b64 = base64.b64encode(f.read()).decode("utf-8")

chapter_1_json = next(
    page
    for page in example_json["data"]
    if page["Title"] == "Chapter 1: The U.S. Constitution"
)

In [4]:
# Construct the prompt
def construct_prompt(md_instructions, example_json):
    prompt = f"""You are an expert content converter specializing in educational materials. Your task is to convert the provided PDF document into iTELL JSON format.

INSTRUCTIONS:
{md_instructions}

EXAMPLE OUTPUT FORMAT:
```json
{json.dumps(example_json, indent=2)}
```

Please convert the PDF document into iTELL JSON format following the instructions and example provided. Ensure proper chunking, appropriate headers, and maintain the educational structure. Return only valid JSON."""
    return prompt


prompt = construct_prompt(md_instructions, chapter_1_json)
# print(prompt)

In [5]:
def openai_convert(pdf_b64, prompt):
    completion = client.chat.completions.create(
        model="gpt-5-mini",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "file",
                        "file": {
                            "filename": pdf_path,
                            "file_data": f"data:application/pdf;base64,{pdf_b64}",
                        },
                    },
                    {
                        "type": "text",
                        "text": prompt,
                    },
                ],
            },
        ],
        max_completion_tokens=4_000,
    )

    return completion.choices[0].message.content.strip()


content = openai_convert(pdf_b64, prompt)
print(content)

{
  "id": 1000,
  "documentId": "one-nation-one-people-ch1-4",
  "Title": "One Nation, One People: Chapters 1\u20134",
  "Content": [
    {
      "__component": "page.chunk",
      "id": 6000,
      "Header": "Learning Objectives",
      "Text": "<section class=\"Callout\"><div><p>In these chapters, you will learn about:</p><ul><li>The U.S. Constitution and its Preamble</li><li>The structure of the federal government (three branches)</li><li>How the Constitution can be changed and the Bill of Rights</li><li>The Legislative, Executive, and Judicial branches and how they function</li></ul></div></section>"
    },
    {
      "__component": "page.chunk",
      "id": 6001,
      "Header": "The U.S. Constitution",
      "Text": "<p>The U.S. Constitution was written in 1787. It is the oldest written constitution in the world. The Constitution sets up the government and protects the basic rights of people living in the United States.</p><p>The Constitution was written during the Founding Era.

In [None]:
# Upload to Strapi...
headers = {
    "Authorization": f"Bearer {strapi_token}",
    "Content-Type": "application/json",
}

upload_response = requests.post(
    f"{strapi_url}/api/contents", json={"data": content}, headers=headers
)

if upload_response.status_code in [200, 201]:
    print("✅ Successfully uploaded to Strapi")
else:
    print(f"❌ Strapi upload failed: {upload_response.status_code}")