In [None]:
%pip install openai python-dotenv

In [None]:
import dotenv
dotenv.load_dotenv("../.env", override=True)

In [None]:
from openai import OpenAI
import os
import random
import csv

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [None]:
def generate_input_topics():         
    inputTopicTemplate = """
You are helping generate examples to fine-tune a model for a game. 
Choose a random topic, person or place. 
Reply with exactly ten 1-3 word topics, with each topic on a new line and no punctuation.
Prioritize diverse set of topics. For example, 

Banana
Elon Musk
Zebra
Mark Twain
...
    """

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": inputTopicTemplate}],
    )
    topics = response.choices[0].message.content.split("\n")
    for i in range(len(topics)):
        topics[i] = topics[i].strip()
    return topics

In [None]:

def generate_output_topics(input_topic):
    actions = ['broader', 'deeper', 'similar', 'opposite', 'people', 'places', 'good', 'evil', 'future', 'past']
    action = random.choice(actions)
    output_topic_template = """
You are helping generate examples to fine-tune a model for a game. 
Given an action, choose a random, specific topic and generate 3 new topics for the given action. 
Avoid choosing general topics or using unnececssary adjectives.  
Reply with exactly 10 new topics separated by commas.

If the action is "people", use specific names of peoples or groups.

Here are some examples: 

Input: Action: Broader, Topic: The Game of Thrones
Output: Fantasy, George R. R. Martin, HBO, Novels

Input: Action: Opposite, Topic: Dancing
Output: Stillness, Sleeping, Inactivity

Here is the input:
Action: {action}
Topic: {topic}
Output:
    """

    output_topic_prompt = output_topic_template.format(action=action, topic=input_topic)
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": output_topic_prompt}],
    )

    topics = response.choices[0].message.content.split(",")
    for i in range(len(topics)):
        topics[i] = topics[i].strip()
    return (action, input_topic, topics)


In [None]:
seen_topics = set()
all_examples = []

for round in range(50):
    print(f"Round {round + 1}/50")
    input_topics = generate_input_topics()
    
    # Filter out duplicates
    new_topics = [topic for topic in input_topics if topic not in seen_topics]
    seen_topics.update(new_topics)
    
    # Generate examples for new topics
    for input_topic in new_topics:
        example = generate_output_topics(input_topic)
        all_examples.append(example)
        print(example)
    
    version = 0
    with open(f'../data/finetune_examples_v{version}.csv', 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['Action', 'Input Topic', 'Output Topics'])
        for ex in all_examples:
            writer.writerow([ex[0], ex[1], ','.join(ex[2])])