In [1]:
import json
import random
from typing import Tuple

In [2]:
def extract_dialogue_info(example: str) -> Tuple[int, str, bool]:
    """
    Extract the length of a dialogue and the target speaker from an example.

    :param example: Generated output from ChatGPT with the following format:
    A1: I was at the port for 4 hours.
    A2:
    [with time elapsed]
    A: I just arrived at the port now.
    B: What are you going to do?
    A: I'm going to take a walk around the port.
    B: I see. Have fun.
    [4 hours later]
    A: The view was amazing! I took a lot of pictures.

    [without time elapsed]
    A: I'll send you some pictures after I get back.
    :return: (dialogue length, the speaker of the target (last) message, whether the example is valid)
    """
    try:
        dialogue = example.split("[with time elapsed]")[1]

        context = dialogue.split("later]\n")[0].split("[")[0].strip()
        length = len(context.split("\n")) + 1
        delayed_response = dialogue.split("later]\n")[1].split("\n")[0].strip()
        immediate_response = dialogue.split("[without time elapsed]\n")[1].split("\n")[0].strip()

        target_speaker = delayed_response[0]
        is_valid = immediate_response[0] == target_speaker
    except Exception as e:
        return 0, "", False

    return length, target_speaker, is_valid

In [3]:
with open("../resources/atomic_2020_narratives.jsonl") as f:
    data = [json.loads(line) for line in f]

with open("../results/gpt-4-0125-preview/1shot/atomic_2020_dialogues.jsonl") as f:
    fewshot_examples = [json.loads(line) for line in f]

In [4]:
system_prompts = []
for _ in range(len(data)):
    is_valid = False
    while not is_valid:
        example = random.choice(fewshot_examples)
        length, target_speaker, is_valid = extract_dialogue_info(example["generated"])
    other_speaker = "B" if target_speaker == "A" else "A"

    # fmt: off
    system_prompt = (
        "Given a narrative sentence containing an event and a question asking how long did the event last followed by the answer, follow the instructions below.\n\n"
        "[Instruction]\n"
        "1. Combine the question and the answer into one declarative sentence to state the event duration.\n"
        "2. Based on the narrative, create an instant-messaging-style dialogue between two speakers following the conditions below.\n\n"
        "[Condition]\n"
        f"1. A dialogue must be {length} utterances long.\n"
        f"2. {target_speaker} is messaging with {other_speaker} in the middle of the event, while {other_speaker} is not experiencing it.\n"
        f"3. After {other_speaker}'s last message, add \"[{{time_elapsed}} later]\", where time_elapsed is the time specified in the declarative sentence.\n"
        f"4. Suppose the same amount of time has passed in real world when {target_speaker} responds, so that {target_speaker} sends a message which is natural and appropriate considering the time passed.\n"
        f"5. Delete the \"[{{time_elapsed}} later]\" message and create another version of {target_speaker}'s last utterance as if no time has passed. Note that the former dialogue history remains intact.\n"
        "[/Condition]\n"
        "[/Instruction]\n\n"
        "[Example]\n"
        f"{example['user_prompt']}\n"
        f"{example['generated']}\n"
        "[/Example]"
    )
    # fmt: on
    system_prompts.append(system_prompt)

user_prompts = [f"Narrative: {d['narrative']}\nQuestion: {d['question']}\nAnswer: {d['answer']}\n" for d in data]

In [5]:
model = "gpt-3.5-turbo-0125"
temperature = 1.0
top_p = 1.0
frequency_penalty = 0.0
presence_penalty = 0.0
max_tokens = 1024

In [6]:
messages = [
    {
        "model": model,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty,
        "presence_penalty": presence_penalty,
        "max_tokens": max_tokens
    }
    for system_prompt, user_prompt in zip(system_prompts, user_prompts)
]

In [None]:
with open("../resources/gpt-3.5-turbo-0125_requests_atomic_2020.jsonl", "w") as f:
    for message in messages:
        f.write(json.dumps(message) + "\n")