In [None]:
import os
from openai import OpenAI

if not os.environ["OPENAI_API_KEY"]: 
    os.environ["OPENAI_API_KEY"]= '<REDACTED>'

MODEL = "gpt-4o-mini"

client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),
)

In [25]:
def game_to_string(game, turn):
    game_text = ""
    for i in range(turn+1):
        game_text += f"Question {i+1}: {game['questions'][i]}\n"
        game_text += f"Answer {i+1}: {game['answers'][i]}\n"  
        if i < turn: game_text += f"Guess {i+1}: {game['guesses'][i]}\n"      
    return game_text

def format_for_fine_tuning(game, role):
    keyword = game['keyword']
    questions = game['questions']
    answers = game['answers']
    guesses = game['guesses']
    game_len = len(questions)
    
    if role == "ask":
        sys_prompt = {
            "role": "system",
            "content": (
                "You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. "
                "The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. "
                "Then the Guesser tries to guess the keyword based on the questions and answers in the game. The keyword is a specific thing, NOT a place and NOT a person.\n\n"
                "You are participating in a new game of 20 Questions. Your role is to be the Questioner. You will ask successive yes-or-no questions to determine the keyword. "
                "You have a limited number of questions to ask, so choose a question that will eliminate half of the possible keywords to maximize efficiency. "
                "Avoid asking questions that are too specific too early on. Be as vague as possible while still eliminating half of the remaining possibilities. "
                "DO NOT ask if the keyword is a specific thing, rather ask something about the keyword.\n"
                "Example 1: DO NOT ASK: 'Is the keyword New York city?', INSTEAD ASK: 'Is it in the East Coast of the United States?\n"
                "Example 2: DO NOT ASK: 'Is the keyword cow?', INSTEAD ASK: 'Is it a specific type of cow?\n"
                "Example 3: DO NOT ASK: 'Is the keyword bottle?', INSTEAD ASK: 'Is it a bottle made of a specific material?\n"
                "Example 4: DO NOT ASK: 'Is the keyword lamp?', INSTEAD ASK: 'Is it a type of lamp?\n"
                "Do NOT assume the game has ended, the game will determine when to stop. Do not output any text other than the question."
            ),
        }
        user_start = {
            "role": "user",
            "content": "Ask your first question."
        }
        messages = [sys_prompt, user_start]
        for i in range(game_len):
            messages.append({
                "role": "assistant",
                "content": questions[i]
            })
            if i < game_len-1:
                messages.append({
                    "role": "user",
                    "content": f"Answer: {answers[i]} \nAsk your next question. Remember to not ask if the keyword is a specific thing, but rather ask something about the keyword."
                })
                
    elif role == "answer":
        prompt = {
            "role": "system",
            "content": (
                f"You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. "
                "The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. "
                "Then the Guesser tries to guess the keyword based on the questions and answers in the game. The keyword is a specific place or thing. \n\n"
                "You are participating in a new game of 20 Questions. Your role is to be the Answerer. "
                f"The keyword is {keyword}. Answer only Yes or No based on the keyword. Do not output any other text."
            ),
        }
        messages = [prompt]
        for i in range(game_len):
            messages.append({
                "role": "user",
                "content": f"Answer the following question about the keyword: {keyword}.\nQuestion: " +questions[i]
            })
            answer = 'yes' if 'yes' in answers[i].lower() else 'no'
            messages.append({
                "role": "assistant",
                "content": answer
            })
                
    elif role == "guess":
        prompt = {
            "role": "system",
            "content": (
                "You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. "
                "The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. "
                "Then the Guesser tries to guess the keyword based on the questions and answers in the game. The keyword is a specific place or thing.\n\n"
                "You are participating in a new game of 20 Questions. Your role is to be the Guesser. Based on the given questions and answers, guess the keyword at this point. "
                "Even if information is limited, guess a keyword. Do not ask a question, just state the guessed keyword with no other text except the keyword itself. "
                "DO NOT output any other text other than the guessed keyword. DO NOT refuse to guess. DO NOT REPEAT A PREVIOUS GUESS."
            ),
        }
        for i in range(game_len):
            messages = [prompt]
            for i in range(game_len):
                messages.append({
                    "role": "user",
                    "content": "Game so far:\n" + game_to_string(game,i) +"\nBased on all questions and answers so far, enter your guess. Do not repeat a guess.\nGuess:"
                })
                messages.append({
                    "role": "assistant",
                    "content": guesses[i]
                })
    
    return messages

def print_formatted_game(game, role):
    messages = format_for_fine_tuning(game, role)
    printable_game = ""
    for message in messages:
        printable_game += message['role'] + ':\n' + message['content'] + '\n\n'
    print(printable_game)   
    return   

In [26]:
test_game = {"keyword": "Dynamite", "questions": ["Is it a place?", "Is it broadly related to food, drinks or cooking?", "Is it broadly related to electronics or technology?", "Is it broadly related to arts, sports or entertainment?", "Is it broadly related to clothing, beauty or accessories?", "Is it man-made?", "Is it found indoors?", "Is it broadly related to furniture or architecture?", "Is it broadly related to agriculture or industry?", "Is it a tool or equipment used for a specific task in agriculture or industry?", "Is it primarily used in agriculture?", "Is it commonly used in manufacturing or construction industries?", "Is it powered by electricity?", "Is it a hand tool?", "Is it a heavy machinery or equipment?", "Is it commonly used for measurement or precision work?", "Is it used for transportation or moving objects?", "Is it used for joining or fastening materials?", "Is it involved in molding or forming materials?", "Is it used for cutting or shaping materials?"], "answers": ["No.", "No.", "No.", "No.", "No.", "Yes.", "Yes.", "No.", "Yes.", "Yes.", "No.", "Yes.", "No.", "No.", "No.", "No.", "No.", "No.", "No.", "No."], "guesses": ["Chair", "Book", "Car", "Clock", "Lamp", "Window", "Table", "Computer keyboard", "Hammer", "Wrench", "Drill", "Screwdriver", "Screw", "Ladder", "Anvil", "Vice", "Saw", "Welder's mask", "Workbench", "Conveyor belt"]}
print_formatted_game(test_game, 'guess')

system:
You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. Then the Guesser tries to guess the keyword based on the questions and answers in the game. The keyword is a specific place or thing.

You are participating in a new game of 20 Questions. Your role is to be the Guesser. Based on the given questions and answers, guess the keyword at this point. Even if information is limited, guess a keyword. Do not ask a question, just state the guessed keyword with no other text except the keyword itself. DO NOT output any other text other than the guessed keyword. DO NOT refuse to guess. DO NOT REPEAT A PREVIOUS GUESS.

user:
Game so far:
Question 1: Is it a place?
Answer 1: No.

Based on all questions and answers so far, enter your guess. Do not repeat a guess.
Guess:

assistant:
Chair

user:
Game so far:
Question 1: Is it a pl

In [27]:
import json

def read_jsonl_to_dict_list(file_path):
    data_list = []
    with open(file_path, 'r') as file:
        for line in file:
            data = json.loads(line)
            data_list.append(data)
    return data_list

games_list = read_jsonl_to_dict_list('20qs-data/final_games/gpt-4o-400-mc-tree-games.jsonl')
formatted_games_list = []

for role in ['ask', 'answer', 'guess']:
    formatted_role_games = [format_for_fine_tuning(game, role) for game in games_list]
    formatted_games_list += formatted_role_games
    
formatted_games_list = [{"messages": formatted_game} for formatted_game in formatted_games_list]

In [28]:
formatted_games_list[500]

{'messages': [{'role': 'system',
   'content': 'You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. Then the Guesser tries to guess the keyword based on the questions and answers in the game. The keyword is a specific place or thing. \n\nYou are participating in a new game of 20 Questions. Your role is to be the Answerer. The keyword is Sleep mask. Answer only Yes or No based on the keyword. Do not output any other text.'},
  {'role': 'user',
   'content': 'Answer the following question about the keyword: Sleep mask.\nQuestion: Is it related to food, beverages or cooking?'},
  {'role': 'assistant', 'content': 'no'},
  {'role': 'user',
   'content': 'Answer the following question about the keyword: Sleep mask.\nQuestion: Is it related to industrial production or manufacturing?'},
  {'role': 'assistant', 'content': 'no'},
  

In [29]:
def write_dict_list_to_jsonl(dict_list, file_path):
    with open(file_path, 'w') as file:
        for entry in dict_list:
            json_line = json.dumps(entry)
            file.write(json_line + '\n')
            
write_dict_list_to_jsonl(formatted_games_list, '20qs-data/final_games/400-fine-tuning-MC-formatted-games.jsonl')