In [None]:
!pip install -q anthropic

import anthropic
import json
import random
import time
import os
import glob
import shutil

# ←——– your API key
client = anthropic.Anthropic(
    api_key="sk-ant-api03-"
)

# ─── Load base dataset ───────────────────────────────────────────────────────
with open("/content/nightline_data.jsonl", "r") as f:
    base_data = [json.loads(line) for line in f]

output_path = "/content/nightline_final_15x_dataset.jsonl"

# ─── 1) One-time copy of the latest interim backup ─────────────────────────
interim_files = sorted(
    glob.glob("/content/interim_backup_*.jsonl"),
    key=lambda p: int(os.path.basename(p).split("_")[-1].split(".")[0])
)
if interim_files:
    latest = interim_files[-1]
    if not os.path.exists(output_path) or os.path.getsize(output_path) == 0:
        shutil.copy(latest, output_path)
        print(f"✅ Initialized {output_path} from {latest}")
    else:
        print(f"✅ {output_path} already exists; skipping copy")
else:
    print("⚠️ No interim_backup_*.jsonl files found; starting fresh")

# ─── 2) Compute how many entries we already have ────────────────────────────
# 2) Compute how many base prompts are already done
# 2) Compute how many *original* base prompts are already done
start_i = 480  # You've already processed 480 prompts with 15 rewrites each
start_j = 0
print(f"🔁 Resuming from prompt #{start_i}, rewrite #{start_j} of 5")




# ─── 3) Open output for crash-safe appending ───────────────────────────────
out_f = open(output_path, "a")

# ─── 4) Prompt & response specs ────────────────────────────────────────────
themes = """friend issues, loneliness, body image, panic attacks, shame/guilt,
family issues, eating disorder, trust issues, emotional numbness, overwhelm,
anger issues, school stress, social anxiety, relationship problems,
gender/sexuality, low self-esteem, grief, disconnection, perfectionism,
jealousy, career stress, fear of judgment, anxiety, bullying, self-harm,
suicidal intent, dissociation"""

rewrite_instruction = f"""Rewrite the emotional disclosure below **5 times**.

Each version should preserve the original emotional meaning, but vary in:
- Perspective (e.g., “I”, “sometimes it feels like”, “lately…”)
- Depth (short vs expanded)
- Voice (plain vs metaphorical, vague vs raw)

You may draw on these emotional themes for variety — but do not invent new feelings
not present in the original and also try to avoid oversentementalising or coming off too poetic:
{themes}

⚠️ Return the rewrites only — no commentary, no headers, no numbering. One per line.
"""

# A small set of example closers for inspiration only
nightline_closers = [
    "Does that resonate with you?",
    "How does that sit with you?",
    "Is there anything else on your mind?",
    "How are you feeling at this moment?"
]

def get_response_instruction():
    examples = "  ".join(f"“{c}”" for c in nightline_closers)
    return f"""Generate three distinct responses to the emotional disclosure below:

1. A directive response: give clear, actionable advice or a suggestion. Be
   confident but not harsh or judgmental. Encourage forward motion, not emotional
   shutdown.

2. A neutral response: briefly acknowledge what was said. Keep it emotionally
   polite but detached — not warm, not cold. Don’t offer advice, praise, or
   reflection. Just acknowledge.

3. A Nightline-aligned response: Generate responses that replicate the tone and conversational style of a trained Nightline volunteer. Keep responses emotionally grounded, calm, and empathetic. Avoid heightened emotional language or cheeriness. Don’t say things like “That sounds terrible, I’m so sorry that happened!” Instead, say “It sounds like you’ve been carrying a lot lately.” Show empathy without dramatizing.

Reflect and mirror the user’s words through paraphrasing. Don’t repeat their exact phrasing. Show that you're listening by summarizing or exploring what they’ve said. If someone says they’re struggling after returning to university, respond with something like “You mentioned that you’re feeling overwhelmed since coming back to uni — that sounds really tough.”

Use open-ended, curious questions that help the user explore their thoughts and feelings. Don’t interrogate or pressure. Never give advice. Don’t say “Have you tried talking to your tutor about it?” Instead, say “What do you feel might help, even a little, right now?” or “Would you like to talk more about what’s been making you feel that way?”

Avoid being directive. Never tell users what to do. Don’t offer suggestions, solutions, or hypothetical advice. Never say “You should…” or “If I were you…” Instead, ask “What’s your sense of what might be helpful right now?” or “Have you had any thoughts on what kind of support you’d want?”

Remain completely non-judgemental. Don’t praise or criticize actions or feelings. Avoid saying “That’s good you’re trying to quit” or “You probably shouldn’t do that.” These are judgments, even if well-meant. Instead, say “It makes sense that you’d feel conflicted — a lot’s been going on.”

Keep yourself out of the conversation. Don’t refer to your own experiences, opinions, or perspectives. Never say “I’ve been there” or “I know how that feels.” Instead, say “Sounds like this has been on your mind a lot. What’s been the hardest part of it for you?”

Maintain a gentle, unhurried pace. Embrace pauses. Don’t rush or fill silences. Say things like “Take your time. I’m here to listen.”

Avoid sounding robotic or scripted. If a phrase starts to feel stale, vary it or rephrase. Don’t overuse stock responses like “That must be hard.” Instead, personalize it: “That sounds really difficult,” or “I can see why that would feel heavy.” Use clarifying questions to deepen the conversation, such as “It sounds as though you felt overwhelmed when that happened — is that right?”

Always ground responses in the principles of non-directivity, non-judgement, anonymity, and confidentiality. Do not fix, advise, diagnose, or lead. Just hold space. Listen. Reflect. Support.

For inspiration, here are a few example closers:
{examples}

Feel free to create your own natural-sounding closer at the end of your Nightline-aligned response. Do **not** prefix it with bullets, dashes, or slashes.

⚠️ Return only the three responses, one per line, in plain text."""

# ─── 5) Generation loop with resume & interim backups ───────────────────────
for i, entry in enumerate(base_data):
    if i < start_i:
        continue  # skip fully done prompts

    base_prompt = entry["prompt"]
    traits      = entry["responses"]

    print(f"\n🔄 [{i+1}/{len(base_data)}] Rewriting prompt:\n→ {base_prompt[:90]}...\n")

    # 5.1) Get 15 rewrites
    prompt_resp = client.messages.create(
        model="claude-3-5-haiku-20241022",
        max_tokens=1500,
        temperature=0.7,
        messages=[{
            "role": "user",
            "content": rewrite_instruction + f'\n\n"{base_prompt}"'
        }]
    )
    rewritten_prompts = [
        p.strip() for p in prompt_resp.content[0].text.strip().split("\n")
        if p.strip()
    ]

    for j, new_prompt in enumerate(rewritten_prompts):
        if i == start_i and j < start_j:
            continue  # skip already-done rewrites

        print(f"📝 [{j+1}/5] {new_prompt[:90]}...")

        # 5.2) Get 3 responses
        response_block = client.messages.create(
            model="claude-3-5-haiku-20241022",
            max_tokens=1000,
            temperature=0.7,
            messages=[{
                "role": "user",
                "content": get_response_instruction() + f'\n\nPrompt: "{new_prompt}"'
            }]
        )
        responses = [
            r.strip() for r in response_block.content[0].text.strip().split("\n")
            if r.strip()
        ]
        responses = responses[-3:] if len(responses) >= 3 else responses

        if len(responses) == 3:
            record = {
                "prompt": new_prompt,
                "responses": {
                    "directive": {
                        "text":   responses[0],
                        "traits": traits["directive"]["traits"]
                    },
                    "neutral": {
                        "text":   responses[1],
                        "traits": traits["neutral"]["traits"]
                    },
                    "nightline_aligned": {
                        "text":   responses[2],
                        "traits": traits["nightline_aligned"]["traits"]
                    }
                }
            }
            json.dump(record, out_f)
            out_f.write("\n")
            out_f.flush()
        else:
            print(f"⚠️ Expected 3 responses but got {len(responses)}; raw:\n"
                  f"{response_block.content[0].text}")

        time.sleep(1.1)

    # 5.3) Every 10 base-prompts, save an interim backup
    if (i + 1) % 10 == 0:
        out_f.flush()
        backup_name = f"/content/interim_backup_{i+1}.jsonl"
        shutil.copy(output_path, backup_name)
        print(f"💾 Interim backup saved: {backup_name}")

    # only skip those j < start_j once
    start_j = 0

out_f.close()
print("✅ Generation complete – all new rewrites & responses appended.")
