In [None]:
import os, json, time, tempfile, pathlib
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


# Read: https://cookbook.openai.com/examples/batch_processing


In [5]:
# Your conversation (as provided)
messages = [
    { "role": "system", "content": "You are a professional, friendly, and detail-oriented travel assistant and booking agent. Your goal is to help users plan trips for vacation, holidays, work, study, conferences, family gatherings, or other purposes. Always tailor your recommendations based on their specific travel reason, budget, trip length, group size, and preferences.\n\n## Core Behaviour & Guidelines\n1. Always greet the user warmly, acknowledging their stated travel reason.\n2. Ask for missing essential information before finalising a plan: budget, travel companions (number and relationship), planned duration, country of origin, travel dates, and any paperwork or visa considerations.\n3. For every recommendation, prioritise **accuracy, feasibility, and clarity**. Do not fabricate legal or visa information; instead, guide the user to authoritative, official sources if you cannot confirm something.\n4. Present all responses in a **clear, structured format** with headings, subheadings, and bullet points. Avoid long unbroken paragraphs.\n5. Use live citations for legal requirements, visa rules, and regulations (link to official government or embassy websites).\n6. Always factor in seasonal/weather considerations, public holidays, festivals, or peak seasons at the destination.\n7. Assume the user might be a first-time visitor to the destination, so include context and practical tips.\n\n## Required Output Format for Final Plan\n### Summary\nTraveling from: {city, country} — {departure date in MMM DD, YY}\nTraveling to: {city, country} — {arrival date in MMM DD, YY}\n[Destination + duration of stay] (repeat for each stop in multi-destination trips)\nTravel reason: {reason}\nBudget: {currency symbol}{amount}\nTraveling with: {description of group, number of people}\n\n### Legal Requirements\nVisa Requirements:\n- [bullet points with requirement summaries]\n- Each requirement should include a short rationale (why needed)\nWhere to Apply for Visa:\n- [official website links]\nHow Long Before to Start Planning:\n- [bullet points with recommendations]\nOther Legal Requirements:\n- [bullet points; include permits, health requirements, insurance obligations]\n\n### Popular Flight Options — Departure\n[table with columns: Airline, Cost, Duration, Stops, Notes — top 10 sorted by best balance of cost and time]\n\n### Popular Flight Options — Return\n[table with same columns — top 10]\n\n### Itinerary for [Destination]\n(Repeat this section for each destination in a multi-destination trip)\n1. Account for arrival time, rest and recovery, and local transport availability.\n2. Ensure key attractions or events have enough allocated time.\n3. Add seasonal or holiday-specific adjustments.\n4. For each trip from point A to B, list three viable transport options, best first, with pros/cons.\n5. Where applicable, include unique, lesser-known experiences.\n6. Mention potential risks (weather, strikes, closures).\n\n## Additional Assistant Duties\n- **Budget sensitivity:** Adapt recommendations to the user's stated budget — avoid suggesting overly expensive options unless user explicitly requests luxury.\n- **Safety & security:** If a region has travel advisories, highlight them with a direct link to the advisory source.\n- **Cultural tips:** Provide etiquette, language basics, and do's/don'ts for each destination.\n- **Accessibility:** Where possible, note if destinations, hotels, or transport are accessible for people with disabilities.\n- **Food & dining:** Suggest must-try local dishes and safe eating practices.\n- **Packing suggestions:** List climate-appropriate packing items based on travel dates and weather data.\n- **Connectivity:** Include info on SIM cards, eSIMs, and Wi-Fi availability.\n\n## Example Few-Shot Demonstrations\n### Example 1 — Short Domestic Trip\nUser: \"I’m going to Kyoto for 4 days during cherry blossom season.\"\nAssistant:\n[Provide full structured plan, including legal requirements if applicable, suggested itinerary with cherry blossom viewing spots, transport, hotel areas, and seasonal notes]\n\n### Example 2 — International Conference\nUser: \"I’m attending a 5-day tech conference in Berlin from India.\"\nAssistant:\n[Structured plan with visa info, nearby hotels to conference venue, cultural etiquette, SIM card options, evening activities]\n\n### Example 3 — Family Vacation\nUser: \"Family trip to New Zealand for 2 weeks from the UK.\"\nAssistant:\n[Structured plan with multi-destination itinerary, driving route, child-friendly attractions, budget-conscious accommodation]\n\n---\nAlways adhere to the structure and depth demonstrated above. Do not skip sections unless the user explicitly instructs otherwise. If unsure, do not include data, or ask quesitons, or point user on where to get relevant info. Do not give information you are yourself not sure about. If tools are not available like websearch, etc. — inform the user about the same and give them alternatives on how they could find the information themslves (use appologetic tone here). For most chit, chats — keep the answers super crisp. Before answering user's question — ask all the necessary questions/ask for the information you'd need to be able to answer their questions "},
    { "role": "user", "content": "What is the capital of France?" },
    { "role": "assistant", "content": "The capital of France is Paris." },
    { "role": "user", "content": "Write me 3 paragraphs about paris and a list of places to visit" },
    { "role": "assistant", "content": "**Paris** is the capital and largest city of France, known globally for its art, fashion, gastronomy, and culture. Situated on the River Seine, Paris has been a major center of finance, diplomacy, commerce, and tourism since the 17th century. Its rich history is reflected in its architecture—from Gothic cathedrals to grand boulevards—and its influence continues to shape global culture and politics.\n The city is often referred to as \"The City of Light\" due to its leading role during the Age of Enlightenment and its early adoption of street lighting. Paris is home to some of the world’s most visited museums and monuments, including the Louvre, which houses the Mona Lisa, and the Eiffel Tower, which dominates the skyline. Its cafe culture, literary legacy, and culinary excellence further add to its appeal. \n Paris is divided into 20 districts or *arrondissements*, each with its own character. From the bohemian charm of Montmartre to the elegance of the Marais, every neighborhood offers a unique experience. Whether you're wandering along the Seine, shopping in haute couture boutiques, or enjoying a croissant in a sidewalk café, Paris invites you to slow down and savor the beauty around you. \n --- \n ### Must-Visit Places in Paris: \n * **Eiffel Tower** \n * **Louvre Museum** \n * **Notre-Dame Cathedral** \n * **Sainte-Chapelle** \n * **Montmartre & Sacré-Cœur** \n * **Champs-Élysées & Arc de Triomphe** \n * **Palace of Versailles** (just outside Paris) \n * **Musée d'Orsay** \n * **Luxembourg Gardens** \n * **Seine River Cruise** \n * **Père Lachaise Cemetery** \n * **Le Marais District** \n * **Latin Quarter** \n * **Tuileries Garden** \n * **Opéra Garnier**" },
    { "role": "user", "content": "How many days should I plan for?" },
]

print(messages)

[{'role': 'system', 'content': 'You are a professional, friendly, and detail-oriented travel assistant and booking agent. Your goal is to help users plan trips for vacation, holidays, work, study, conferences, family gatherings, or other purposes. Always tailor your recommendations based on their specific travel reason, budget, trip length, group size, and preferences.\n\n## Core Behaviour & Guidelines\n1. Always greet the user warmly, acknowledging their stated travel reason.\n2. Ask for missing essential information before finalising a plan: budget, travel companions (number and relationship), planned duration, country of origin, travel dates, and any paperwork or visa considerations.\n3. For every recommendation, prioritise **accuracy, feasibility, and clarity**. Do not fabricate legal or visa information; instead, guide the user to authoritative, official sources if you cannot confirm something.\n4. Present all responses in a **clear, structured format** with headings, subheadings,

In [7]:
# Create JSONL file

payload = {
    "custom_id": "req-001-paris-days",
    "method": "POST",
    "url": "/v1/chat/completions",
    "body": {
        "model": "gpt-4o-mini",
        "messages": messages,
        # Optional tuning knobs:
        "temperature": 0.7,
        "max_tokens": 100
    },
}

tmpdir = pathlib.Path(tempfile.mkdtemp())
input_jsonl_path = tmpdir / "batch_input.jsonl"
with open(input_jsonl_path, "w", encoding="utf-8") as f:
    f.write(json.dumps(payload) + "\n")

input_jsonl_path

PosixPath('/var/folders/kl/353z8zps55j1cpm5r0xdnhsw0000gn/T/tmprgh_wcdk/batch_input.jsonl')

In [8]:
upload = client.files.create(
    file=open(input_jsonl_path, "rb"),
    purpose="batch"
)

batch = client.batches.create(
    input_file_id=upload.id,
    endpoint="/v1/chat/completions",
    completion_window="24h",  # 24h or 4h (if available to your account)
    metadata={"note": "Paris trip planning example"}
)

batch.id # You must store this batch ID!

'batch_6896b225268c8190aa8b990df872a9f0'

In [12]:
# --- 3) Poll until the batch completes (or fails) ---
def wait_for_batch(batch_id: str, poll_secs: int = 10, verbose: bool = True):
    TERMINAL = {"completed", "failed", "cancelled", "expired"}
    while True:
        b = client.batches.retrieve(batch_id)

        # Defensive reads (some fields can be None while validating)
        rc = getattr(b, "request_counts", None)
        completed = getattr(rc, "completed", 0) if rc else 0
        failed = getattr(rc, "failed", 0) if rc else 0
        total = getattr(rc, "total", 0) if rc else 0

        if verbose:
            ts = time.strftime("%H:%M:%S")
            print(f"[{ts}] Status: {b.status:<11} | completed={completed} failed={failed} total={total}")

        if b.status in TERMINAL:
            return b

        time.sleep(poll_secs)

final_batch = wait_for_batch(batch.id)
final_batch.status

[08:02:36] Status: in_progress | completed=0 failed=0 total=1
[08:02:47] Status: in_progress | completed=0 failed=0 total=1
[08:02:57] Status: in_progress | completed=0 failed=0 total=1
[08:03:08] Status: in_progress | completed=0 failed=0 total=1
[08:03:19] Status: in_progress | completed=0 failed=0 total=1
[08:03:30] Status: in_progress | completed=0 failed=0 total=1
[08:03:41] Status: in_progress | completed=0 failed=0 total=1
[08:03:52] Status: in_progress | completed=0 failed=0 total=1
[08:04:03] Status: in_progress | completed=0 failed=0 total=1
[08:04:14] Status: in_progress | completed=0 failed=0 total=1
[08:04:25] Status: in_progress | completed=0 failed=0 total=1
[08:04:36] Status: in_progress | completed=0 failed=0 total=1
[08:04:46] Status: in_progress | completed=0 failed=0 total=1
[08:04:57] Status: in_progress | completed=0 failed=0 total=1
[08:05:08] Status: in_progress | completed=0 failed=0 total=1
[08:05:19] Status: in_progress | completed=0 failed=0 total=1
[08:05:3

'completed'

In [13]:
# --- 4) Download and parse the output JSONL ---
if final_batch.status != "completed":
    raise RuntimeError(f"Batch {final_batch.id} ended with status: {final_batch.status}")

output_file_id = final_batch.output_file_id
if not output_file_id:
    raise RuntimeError("No output_file_id found on the completed batch.")

# The SDK returns a streaming response; join bytes to get full text
resp = client.files.content(output_file_id)
output_bytes = b"".join(resp.iter_bytes())
output_text = output_bytes.decode("utf-8")

assistant_texts = []
raw_rows = []

for i, line in enumerate(output_text.strip().splitlines(), 1):
    obj = json.loads(line)
    raw_rows.append(obj)

    # Each line looks like:
    # {
    #   "id": "...",
    #   "custom_id": "req-001-paris-days",
    #   "response": {
    #       "status_code": 200,
    #       "request_id": "...",
    #       "body": { "id": "...", "object": "chat.completion", "choices": [...] }
    #   }
    # }
    custom_id = obj.get("custom_id")
    resp = obj.get("response", {})
    body = resp.get("body", {}) if isinstance(resp, dict) else {}

    content = None
    if isinstance(body, dict):
        # Chat Completions path
        if "choices" in body and body.get("choices"):
            content = body["choices"][0].get("message", {}).get("content")
        # (Optional) Responses API path, if you ever use it in future:
        elif "output_text" in body:
            content = body["output_text"]

    assistant_texts.append({
        "line": i,
        "custom_id": custom_id,
        "status_code": resp.get("status_code"),
        "content": content or ""
    })

# Keep both parsed assistant texts and raw rows if you want to inspect later
len(assistant_texts), len(raw_rows)

(1, 1)

In [14]:
# --- 5) Pretty-print just the assistant’s answer(s) ---
from textwrap import indent

for item in assistant_texts:
    hdr = item["custom_id"] or f"line-{item['line']}"
    sc = item.get("status_code")
    print(f"--- Reply for {hdr} (status={sc}) ---")
    text = item["content"] if item["content"] else "[No assistant content in this line]"
    print(indent(text, "  "))
    print()

--- Reply for req-001-paris-days (status=200) ---
  The ideal number of days to plan for a trip to Paris depends largely on your interests and the experiences you wish to have. Here’s a general guideline:

  ### Suggested Duration:
  - **3 Days**: 
    - Best for a quick visit to the main attractions such as the Eiffel Tower, Louvre Museum, and Notre-Dame Cathedral. This is suitable if you're on a tight schedule.
  
  - **5 Days**: 
    - Allows for a more relaxed pace. You can explore additional areas like

