In [1]:
import os
import pandas as pd
from openai import OpenAI
from pathlib import Path
import time, re, json
import time
import math
from datetime import datetime

In [2]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [3]:
# Generate the prompts

In [4]:
pairs = [["bells", "clocks"], ["mugs", "cups"], ["onion", "garlic"], ["watches", "clocks"]]
template = "Give me a story about {a} and {b} in {target_language}."
target_languages = ["English", "Swedish"]

In [5]:
# Needed functions

def make_prompt(pairs, target_languages, template, swap=False):
    """Return a list of prompts built from pairs and multiple target languages."""
    prompts = []
    for target_language in target_languages:
        for item1, item2 in pairs:
            a, b = (item2, item1) if swap else (item1, item2)
            prompt = template.format(a=a, b=b, target_language=target_language)
            prompts.append(prompt)
    return prompts

In [6]:
all_prompts = []

In [7]:
prompts_order1 = make_prompt(pairs, target_languages, template, swap=False)
prompts_order2 = make_prompt(pairs, target_languages, template, swap=True)

In [8]:
all_prompts = prompts_order1 + prompts_order2

In [9]:
all_prompts

['Give me a story about bells and clocks in English.',
 'Give me a story about mugs and cups in English.',
 'Give me a story about onion and garlic in English.',
 'Give me a story about watches and clocks in English.',
 'Give me a story about bells and clocks in Swedish.',
 'Give me a story about mugs and cups in Swedish.',
 'Give me a story about onion and garlic in Swedish.',
 'Give me a story about watches and clocks in Swedish.',
 'Give me a story about clocks and bells in English.',
 'Give me a story about cups and mugs in English.',
 'Give me a story about garlic and onion in English.',
 'Give me a story about clocks and watches in English.',
 'Give me a story about clocks and bells in Swedish.',
 'Give me a story about cups and mugs in Swedish.',
 'Give me a story about garlic and onion in Swedish.',
 'Give me a story about clocks and watches in Swedish.']

In [60]:
# Generate the model's outputs

In [30]:
# --- Config ---
MODEL = "gpt-5.1-2025-11-13"
TOP_K = 5
OUT_STORIES_DIR = "gpt_5_1/stories/trial_5/"
OUT_LOGS_DIR = "gpt_5_1/logs/trial_5/"

# Ensure output folders exist
os.makedirs(OUT_STORIES_DIR, exist_ok=True)
os.makedirs(OUT_LOGS_DIR, exist_ok=True)

In [31]:
def slugify(text: str, max_len: int = 60) -> str:
    """
    Make a safe filename from the prompt:
    - lowercase
    - replace non-alphanumerics with underscores
    - collapse multiple underscores
    - trim to max_len
    """
    text = text.strip().lower()
    text = re.sub(r"\s+", " ", text)              # collapse whitespace
    text = text[:max_len]                          # soft limit before replacing
    text = re.sub(r"[^a-z0-9]+", "_", text)        # non-alphanum -> _
    text = re.sub(r"_+", "_", text).strip("_")     # collapse/trim _
    return text or "prompt"

def now_stamp() -> str:
    # ISO-like timestamp for filenames: 2025-11-12_14-03-59
    return datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

def save_story_and_logs(prompt: str):
    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

    response = client.chat.completions.create(
        model=MODEL,
        messages=[{"role": "user", "content": prompt}],
        logprobs=True,
        top_logprobs=TOP_K,
        # max_tokens=None,  # optional
    )

    choice = response.choices[0]
    story_text = choice.message.content or ""
    # Build a serializable log structure
    token_logs = []
    if choice.logprobs and choice.logprobs.content:
        for i, content_item in enumerate(choice.logprobs.content):
            # Each content_item corresponds to one generated token position
            # and has .top_logprobs = list of candidates
            position_candidates = []
            for cand in content_item.top_logprobs:
                token = cand.token
                token_logprob = cand.logprob
                prob = math.exp(token_logprob) if token_logprob is not None else None
                position_candidates.append({
                    "token": token,
                    "logprob": token_logprob,
                    "probability": prob,
                })
            token_logs.append({
                "position": i,
                "top_logprobs": position_candidates
            })

    base = f"{now_stamp()}_{slugify(prompt)}"
    story_path = os.path.join(OUT_STORIES_DIR, f"{base}.txt")
    log_path = os.path.join(OUT_LOGS_DIR, f"{base}.json")

    # --- Write story file (includes prompt for traceability) ---
    with open(story_path, "w", encoding="utf-8") as f:
        f.write("### Prompt\n")
        f.write(prompt.strip() + "\n\n")
        f.write("### Model\n")
        f.write(MODEL + "\n\n")
        f.write("### Story\n")
        f.write(story_text)

    # --- Write JSON log file ---
    log_payload = {
        "model": MODEL,
        "prompt": prompt,
        "story": story_text,
        "top_k": TOP_K,
        "token_positions": token_logs,
        # Optional: lightweight echo of raw ids for reproducibility/debug
        "usage": getattr(response, "usage", None) and {
            "prompt_tokens": response.usage.prompt_tokens,
            "completion_tokens": response.usage.completion_tokens,
            "total_tokens": response.usage.total_tokens,
        },
        "created": getattr(response, "created", None),
        "id": getattr(response, "id", None),
    }
    with open(log_path, "w", encoding="utf-8") as f:
        json.dump(log_payload, f, ensure_ascii=False, indent=2)

    print(f"Story saved to: {story_path}")
    print(f"Log saved to:   {log_path}")

In [32]:
for prompt in all_prompts:
    save_story_and_logs(prompt)

Story saved to: gpt_5_1/stories/trial_5/2025-11-28_16-17-40_give_me_a_story_about_bells_and_clocks_in_english.txt
Log saved to:   gpt_5_1/logs/trial_5/2025-11-28_16-17-40_give_me_a_story_about_bells_and_clocks_in_english.json
Story saved to: gpt_5_1/stories/trial_5/2025-11-28_16-18-14_give_me_a_story_about_mugs_and_cups_in_english.txt
Log saved to:   gpt_5_1/logs/trial_5/2025-11-28_16-18-14_give_me_a_story_about_mugs_and_cups_in_english.json
Story saved to: gpt_5_1/stories/trial_5/2025-11-28_16-18-33_give_me_a_story_about_onion_and_garlic_in_english.txt
Log saved to:   gpt_5_1/logs/trial_5/2025-11-28_16-18-33_give_me_a_story_about_onion_and_garlic_in_english.json
Story saved to: gpt_5_1/stories/trial_5/2025-11-28_16-20-01_give_me_a_story_about_watches_and_clocks_in_english.txt
Log saved to:   gpt_5_1/logs/trial_5/2025-11-28_16-20-01_give_me_a_story_about_watches_and_clocks_in_english.json
Story saved to: gpt_5_1/stories/trial_5/2025-11-28_16-20-54_give_me_a_story_about_bells_and_clocks