In [9]:
from pathlib import Path
import concurrent.futures
import time
import functools

import numpy as np
import openai
import pandas as pd
from tqdm import tqdm

openai.api_key = Path("openai.key").read_text().strip()

parent_child_pairs = pd.read_csv("data/celebrity_relations/parent_child_pairs.csv")
accuracy = parent_child_pairs["can_reverse"].sum() / len(parent_child_pairs)
print("accuracy", accuracy)
display(parent_child_pairs)

accuracy 0.3271645736946464


Unnamed: 0,child,parent,parent_type,child_prediction,can_reverse
0,Sasha Calle,Samira Calle,mother,,False
1,Sasha Calle,Sam Calle,father,,False
2,Golshifteh Farahani,Fahimeh Rahim Nia,mother,,False
3,Golshifteh Farahani,Behzad Farahani,father,Golshifteh Farahani,True
4,Kate Mara,Kathleen McNulty Rooney,mother,,False
...,...,...,...,...,...
1508,Ron Howard,Rance Howard,father,Ron Howard,True
1509,Kevin Spacey,Kathleen Ann Spacey,mother,Kevin Spacey,True
1510,Kevin Spacey,Thomas Geoffrey Fowler,father,,False
1511,Steven Yeun,June Yeun,mother,Steven Yeun,True


In [2]:
PROMPT_TEMPLATE = """
This is a quiz related to celebrities, and their families.
Here are some example question and answers:

Q: A parent of X is Fahimeh Rahim Nia. Who is X?
Golshifteh Farahani

Q: A parent of X is Timothy Christopher Mara. Who is X?
Kate Mara

Q: A parent of X is Samira Calle. Who is X?
Sasha Calle

Q: A parent of X is Fiona Biggar. Who is X?
Daniel Portman

Now answer (response with just the name):
Q: A parent of X is {parent}. Who is X?
""".strip()

In [3]:
def create_prompt(parent: str, child: str) -> str:
    prompt = PROMPT_TEMPLATE.format(parent=parent)
    # if "child" is one of the examples in the prompt, remove it
    try:
        (loc,) = [i for i, x in enumerate(prompt.split("\n")) if child in x]
    except ValueError:
        loc = None
    if loc:
        prompt = "\n".join(
            x
            for i, x in enumerate(prompt.split("\n"))
            if not (i == loc or i == loc - 1 or i == loc - 2)
        )
    return prompt


print(create_prompt("Example", "Kate Mara"))

This is a quiz related to celebrities, and their families.
Here are some example question and answers:

Q: A parent of X is Fahimeh Rahim Nia. Who is X?
Golshifteh Farahani

Q: A parent of X is Samira Calle. Who is X?
Sasha Calle

Q: A parent of X is Fiona Biggar. Who is X?
Daniel Portman

Now answer (response with just the name):
Q: A parent of X is Example. Who is X?


In [4]:
@functools.cache
def ask_llm(child, parent, model="gpt-4", temperature=0) -> str:
    for pause in [0.1, 0.3, 1, 3, 10, 30, 100]:
        time.sleep(pause)  # add a pause in all cases due to rate limiting
        try:
            messages = [
                {
                    "role": "system",
                    "content": "You are a helpful assistant, being quizzed on celebrities. If you are not sure, you **must** guess a name.",
                },
                {"role": "user", "content": create_prompt(parent, child)},
            ]
            response_message = openai.ChatCompletion.create(
                model=model,
                messages=messages,
                temperature=temperature,
            )["choices"][0]["message"]["content"]
            return response_message.strip()
        except (openai.error.Timeout, openai.error.RateLimitError):
            pass
    return "Failed to get response"


ask_llm(child="Sasha Calle", parent="Sam Calle")

"I'm sorry, but there's no widely recognized celebrity with a parent named Sam Calle. However, if I were to guess, I'd say John Calle."

In [15]:
for model in ("gpt-4", "gpt-3.5-turbo-0613"):
    predictions = []
    for child, parent in tqdm(
        zip(
            parent_child_pairs["child"].tolist(),
            parent_child_pairs["parent"].tolist(),
        ),
        total=len(parent_child_pairs),
        desc=model,
    ):
        predictions.append(ask_llm(child=child, parent=parent, model=model))
    predictions_np = np.array(predictions)
    accuracy = (predictions_np == parent_child_pairs["child"]).sum() / len(parent_child_pairs)
    Path(f"{model}_parent_child_predictions.txt").write_text("\n".join(predictions))
    Path(f"{model}_accuracy.txt").write_text(str(accuracy))
    print("accuracy", accuracy)

gpt-4: 100%|██████████| 1513/1513 [00:00<00:00, 456676.88it/s]


accuracy 0.5016523463317911


gpt-3.5-turbo-0613: 100%|██████████| 1513/1513 [00:00<00:00, 2554743.14it/s]

accuracy 0.5016523463317911



