In [1]:
import litellm
import time
import sys, os

if not os.environ["OPENAI_API_KEY"]: 
    os.environ["OPENAI_API_KEY"]= '<REDACTED>'

In [107]:
MODEL = "gpt-4o"
#MODEL = "gpt-4o-mini"
EVALUATOR_MODEL="gpt-4o-mini"
# MODEL = "gpt-4o"
#MODEL = "claude-3-5-sonnet-20240620"
litellm.modify_params = True

In [3]:
def completion(model, messages):
    result = litellm.completion(model, messages)
    if "claude" in model:
        time.sleep(60/50) # Adjust based on rate limit (https://console.anthropic.com/settings/limits)
    return result

In [4]:
class DecisionNode:
    def __init__(self, query = None, yes_branch=None, no_branch=None):
        self.query = query
        self.yes_branch = yes_branch
        self.no_branch = no_branch
        
    def print_tree(self, level=0, prefix=""):
        if self.query:
            print(f"{prefix}-- {self.query}")
            if self.yes_branch:
                self.yes_branch.print_tree(level + 1, prefix + "  |")
            if self.no_branch:
                self.no_branch.print_tree(level + 1, prefix + "   ")
        else:
            print(f"{prefix}-- *")
            
    def write_tree_to_file(self, filename):
        with open(filename, 'w') as f:
            self._write_tree(f)

    def _write_tree(self, file, level=0, prefix=""):
        if self.query:
            file.write(f"{prefix}-- {self.query}\n")
            if self.yes_branch:
                self.yes_branch._write_tree(file, level + 1, prefix + "  |")
            if self.no_branch:
                self.no_branch._write_tree(file, level + 1, prefix + "   ")
        else:
            file.write(f"{prefix}-- *\n")

    @staticmethod
    def read_tree_from_file(filename):
        with open(filename, 'r') as f:
            lines = f.readlines()
        return DecisionNode._read_tree(lines, 0, "")

    @staticmethod
    def _read_tree(lines, indent_level, current_prefix):
        if not lines:
            return None

        line = lines.pop(0).rstrip()
        expected_prefix = current_prefix + "-- "

        if not line.startswith(expected_prefix):
            return None

        content = line[len(expected_prefix):]
        if content == "*":
            return DecisionNode()

        yes_branch = DecisionNode._read_tree(lines, indent_level + 1, current_prefix + "  |")
        no_branch = DecisionNode._read_tree(lines, indent_level + 1, current_prefix + "   ")

        return DecisionNode(content, yes_branch, no_branch)

# Creating the tree

In [10]:
MC_tree = DecisionNode.read_tree_from_file('20qs-data/decision_trees/decision_tree_v2.txt')
MC_tree.print_tree()

-- food, beverage, cooking
  |-- food
  |  |-- agriculture
  |  |  |-- *
  |  |   -- prepared food
  |  |     |-- *
  |  |      -- *
  |   -- agriculture
  |     |-- safety
  |     |  |-- *
  |     |   -- *
  |      -- handheld
  |        |-- beverage
  |        |  |-- *
  |        |   -- *
  |         -- *
   -- industry, manufacturing
     |-- safety
     |  |-- transportation, vehicles
     |  |  |-- *
     |  |   -- architecture, construction
     |   -- electronics
     |     |-- *
     |      -- natural material, resource
     |        |-- *
     |         -- *
      -- sports, entertainment
        |-- handheld
        |  |-- electronics
        |  |  |-- *
        |  |   -- arts, media
        |  |     |-- *
        |  |      -- *
        |   -- *
         -- clothing, accessories, beauty
           |-- *
            -- animal
              |-- *
               -- handheld
                 |-- electronics
                 |  |-- *
                 |   -- safety
                

In [41]:
import pickle

with open('20qs-data/decision_trees/gpt_decision_tree_v2.p', 'rb') as f:
    gpt_tree_v2 = pickle.load(f)
    
with open('20qs-data/decision_trees/gpt_decision_tree_v3.p', 'rb') as f:
    gpt_tree_v3 = pickle.load(f)
    
with open('20qs-data/decision_trees/gpt_decision_tree_v3.p', 'rb') as f:
    gpt_tree_v4 = pickle.load(f)
    
gpt_tree_v4.print_tree()

-- Is it related to food, beverages or cooking?
  |-- Is it something that can be consumed on its own?
  |  |-- Is it commonly eaten in solid form rather than liquid form?
  |  |  |-- Is it something that is usually considered a snack or treat?
  |  |  |  |-- Is it a processed food product?
  |  |  |  |  |-- Is it a baked good?
  |  |  |  |  |  |-- Is it primarily made with flour?
  |  |  |  |  |   -- Is it usually packaged in single-serving portions?
  |  |  |  |   -- Is it a type of fruit?
  |  |  |  |     |-- Is it a fruit that typically has seeds or pits?
  |  |  |  |      -- Is it primarily sweet in taste?
  |  |  |   -- Is it primarily derived from plants?
  |  |  |     |-- Is it commonly used as an ingredient in cooked dishes?
  |  |  |     |  |-- Is it typically a vegetable?
  |  |  |     |   -- Is it something that you would typically find in the produce section of a grocery store?
  |  |  |      -- Is it commonly served as a main course or part of a main course?
  |  |  |    

In [7]:
query_map = {
    'food': "Is it a food?",
    'beverage': "Is it a beverage?",
    'living': "Is it a living thing?",
    'plant': "Is it a plant?",
    'animal': 'Is it an animal?',
    'electronics': "Is it related to electronics or technology?",
    'furniture': 'Is it furniture?',
    'transportation, vehicles': 'Is it related to transportation or vehicles?',
    'man-made': "Is it a man-made thing?",
    'architecture, construction': 'Is it related to architectural structures or construction?',
    'natural material, resource': 'Is it a natural material or resource?',
    'natural phenomenon': 'Is it a natural phenomenon?',
    'industry, manufacturing': "Is it related to industrial production or manufacturing?",
    'food, beverage, cooking': 'Is it related to food, beverages or cooking?',
    'handheld': 'Is it something a person can hold in their hand?',
    'agriculture': "Is it related to agriculture?",
    'indoors': 'Is it something that can be found indoors?',
    'arts, media': "Is it broadly related to the arts or media?",
    'safety': "Is it related to safety or safety equipment?",
    'medicine': "Is it broadly related to medicine or healthcare?",
    'clothing, accessories, beauty': "Is it related to clothing, accessories or beauty products?",
    'sports, entertainment': "Is it broadly related to entertainment or sports?",
    'prepared food': "Is it a prepared food or dish?"
}


In [104]:
def game_to_string(game):
    return '\n'.join([f'{step["role"]}: {step["content"]}' for step in game])

def construct_messages(game, role, keyword=None, guesses=[]):
    if role == "questioner":
        prompt = {
            "role": "system",
            "content": (
                "You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. "
                "The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. "
                "Then the Guesser tries to guess the keyword based on the questions and answers in the game. The keyword is a specific thing, NOT a place and NOT a person.\n\n "
                "You are participating in a new game of 20 Questions. Your role is to be the Questioner. You will ask successive yes-or-no questions to determine the keyword. "
                "You have a limited number of questions to ask, so choose a question that will eliminate half of the possible keywords to maximize efficiency. "
                "Avoid asking questions that are too specific too early on. Ask questions that are as broad as possible while still eliminating half of the remaining possibilities. "
                "DO NOT ask if the keyword is a specific thing, rather ask something about the keyword.\n"
                "Example 1: DO NOT ASK: 'Is the keyword car?', INSTEAD ASK: 'Is it a specific type of car, such as a sedan?\n"
                "Example 2: DO NOT ASK: 'Is the keyword cow?', INSTEAD ASK: 'Is it a specific type of cow?\n"
                "Example 3: DO NOT ASK: 'Is the keyword bottle?', INSTEAD ASK: 'Is it a bottle made of a specific material?\n"
                "Example 4: DO NOT ASK: 'Is the keyword lamp?', INSTEAD ASK: 'Is it a type of lamp?\n"
                "Do NOT assume the game has ended, the game will determine when to stop. Do not output any text other than the question."
            ),
        }
        messages = [prompt]
        for message in game:
            if message["role"] == "questioner":
                messages.append({
                    "role": "assistant",
                    "content": message["content"]
                })
            else:
                messages.append({
                    "role": "user",
                    "content": message["content"] + "\nAsk your next question. Remember to not ask if the keyword is a specific thing, but rather ask something about the keyword."
                })
    elif role == "answerer":
        prompt = {
            "role": "system",
            "content": (
                f"You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. "
                "The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. "
                "Then the Guesser tries to guess the keyword based on the questions and answers in the game. The keyword is a specific thing, NOT a place and NOT a person. "
                "You are participating in a new game of 20 Questions. Your role is to be the Answerer. "
                f"The keyword is {keyword}. Answer only Yes or No based on the keyword. Do not output any other text."
            ),
        }
        messages = [prompt]
        for message in game:
            if message["role"] == "questioner":
                messages.append({
                    "role": "user",
                    "content": f"Answer the following question about the keyword: '{keyword}'. " + message["content"]
                })
            else:
                messages.append({
                    "role": "assistant",
                    "content": message["content"]
                })
    elif role == "guesser":
        prompt = {
            "role": "system",
            "content": (
                "You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. "
                "The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. "
                "Then the Guesser tries to guess the keyword based on the questions and answers in the game. The keyword is a specific thing, NOT a place and NOT a person. "
                "You are participating in a new game of 20 Questions. Your role is to be the Guesser. Based on the given questions and answers, guess the keyword at this point. "
                "Even if information is limited, guess a keyword. Do not ask a question, just state the guessed keyword with no other text except the keyword itself. "
                "DO NOT output any other text other than the guessed keyword. DO NOT refuse to guess. DO NOT REPEAT A PREVIOUS GUESS."
            ),
        }
        messages = [prompt, {
            "role": "user",
            "content": game_to_string(game) + "\nPrevious guesses:\n" + '\n'.join(guesses) + "\nYour guess: "
        }]
    elif role == "evaluator":
        prompt = {
            "role": "system",
            "content": (
                "You are an evaluator for the game 20 Questions. Given a keyword and a guess, return True if the guess was correct and False otherwise. "
                "The guess should refer to the same thing as the keyword but the guess should NOT be more vague or general than the keyword. DO NOT output anything except 'True' or 'False'."
                "Here are some guiding examples: \n\n"
                "Example 1:\nKeyword: USA\n Guess: United States\nEvaluation: True\n\n"
                "Example 2:\nKeyword: Peking duck\n Guess: duck\nEvaluation: False\n\n"
                "Example 3:\nKeyword: iPad\n Guess: tablet\nEvaluation: False\n\n"
                "Example 4:\nKeyword: pretoria south africa\n Guess: Pretoria\nEvaluation: True\n\n"
                "Example 5:\nKeyword: Bed and Breakfast\n Guess: BnB\nEvaluation: True\n\n"
                "Example 6:\nKeyword: Diet coke\n Guess: coca-cola\nEvaluation: False\n\n"
            ),
        }
        messages = [prompt, {
            "role": "user",
            "content": f"Keyword: {keyword}\nGuess: {guesses[-1]}\nEvaluation: "
        }]
    return messages


In [9]:
construct_messages([{"role": "questioner", "content": "Is it a place?"}], "answerer", "chair")

[{'role': 'system',
  'content': 'You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. Then the Guesser tries to guess the keyword based on the questions and answers in the game. The keyword is a specific thing, NOT a place and NOT a person. You are participating in a new game of 20 Questions. Your role is to be the Answerer. The keyword is chair. Answer only Yes or No based on the keyword. Do not output any other text.'},
 {'role': 'user',
  'content': "Answer the following question about the keyword: 'chair'. Is it a place?"}]

## Game Simulator

In [108]:
def simulate_game(keyword, tree, print_game = True):
    if print_game: print(f"USING MODEL: {MODEL}")
    if print_game: print(f"KEYWORD: {keyword}")
    if print_game: print()
    
    game = []
    questions = [step["content"] for step in game if step["role"] == "questioner"]
    answers = [step["content"] for step in game if step["role"] == "answerer"]
    guesses = []
    game_won = False
    decision_node = tree
    
    for _ in range(20):
        
        if decision_node and decision_node.query:
            if "?" in decision_node.query:
                question = decision_node.query
            else:
                question = query_map[decision_node.query]
        else:
            question = completion(model=MODEL, messages=construct_messages(game, "questioner", keyword)).choices[0].message.content
    
        game.append({
            "role": "questioner",
            "content": question
        })

        if print_game: print(f"questioner: {question}")

        if "OVER" in question:
            break
        
        questions.append(question)

        answer = completion(model=MODEL, messages=construct_messages(game, "answerer", keyword)).choices[0].message.content
    
        game.append({
            "role": "answerer",
            "content": answer
        })

        answers.append(answer)
        if print_game: print(f"answerer: {answer}")

        if decision_node and decision_node.yes_branch != None and "yes" in answer.lower():
            decision_node = decision_node.yes_branch
        elif decision_node and decision_node.no_branch != None and "no" in answer.lower():
            decision_node = decision_node.no_branch
        else:
            decision_node = None
            
        guess = completion(model=MODEL, messages=construct_messages(game, "guesser", keyword, guesses)).choices[0].message.content

        guesses.append(guess)
        if print_game: print(f"guesser: {guess}")

        evaluation = completion(model=EVALUATOR_MODEL, messages=construct_messages(game, "evaluator", keyword, guesses)).choices[0].message.content
        if print_game: print(f"evaluator: {evaluation}")
        if "true" in evaluation.lower(): 
            game_won = True
            break

        if print_game: print()

    return {
        "keyword": keyword,
        "questions": questions,
        "answers": answers,
        "guesses": guesses,
        "win" : game_won,
    }

In [111]:
simulate_game("seatbelt", MC_tree)

USING MODEL: gpt-4o
KEYWORD: seatbelt

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Phone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Key
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Fire extinguisher
evaluator: False

questioner: Is it typically used in a medical context?
answerer: No.
guesser: Helmet
evaluator: False

quest

{'keyword': 'seatbelt',
 'questions': ['Is it related to food, beverages or cooking?',
  'Is it related to industrial production or manufacturing?',
  'Is it broadly related to entertainment or sports?',
  'Is it related to clothing, accessories or beauty products?',
  'Is it an animal?',
  'Is it something a person can hold in their hand?',
  'Is it related to electronics or technology?',
  'Is it related to safety or safety equipment?',
  'Is it typically used in a medical context?',
  'Is it commonly used in the construction industry?',
  'Is it commonly used in a household setting?',
  'Is it used in outdoor activities or environments?',
  'Is it related to office or workplace safety?',
  'Is it used in transportation or vehicles?'],
 'answers': ['No.',
  'No.',
  'No.',
  'No.',
  'No.',
  'Yes.',
  'No.',
  'Yes.',
  'No.',
  'No.',
  'No.',
  'No.',
  'No.',
  'Yes.'],
 'guesses': ['Computer',
  'Bicycle',
  'Book',
  'Chair',
  'Lamp',
  'Phone',
  'Key',
  'Fire extinguisher',

## Test trees:

In [77]:
first_100_test_words = ['Dandelion', 'weasel', 'pot holder', 'casket', 'push pin', 'Protein bar', 'junction box', 'bonsai tree', 'Nail Polish Remover', 'livestock trailer', 'amazon echo', 'wire brush', 'rocketship', 'Paper towels', 'dresser', 'lemming', 'pear tree', 'conference table', 'extension ladder', 'vortex mixer', 'Wine Aerator', 'Golf Cart', 'Tweezers', 'catalytic converter', 'Ointment', 'Frappuccino', 'aluminum foil', 'headboard', 'butter knife', 'chimney', 'sewing needle', 'angelfish', 'bird netting', 'level sensor', 'cane', 'palmetto tree', 'coal crusher', 'Sprinkler', 'king snake', 'plant saucer', 'beer coaster', 'espresso machine', 'desk lamp', 'Wine Tasting Kit', 'fossil', 'oatmeal cookie', 'rubber', 'Pallet Jack', 'butter knife', 'Sticker', 'lace dress', 'wind vane', 'Mouse', 'fudge brownie', 'Sippy cup', 'loose change', 'baggage conveyor', 'zinc', 'Screwdriver', 'binder clip', 'Matches', 'pumpkin', 'medication dispenser', 'electronic piano', 'Dish soap', 'tea cup', 'Air Conditioner', 'Reptile', 'Pressure gauge', 'Toiletries bag', 'safety glasses', 'fishing net', 'Swing set', 'Grapefruit', 'neon light', 'Stain Remover', 'Screwdriver', 'game board', 'climbing rope', 'Ventilation System', 'Pumpkin pie', 'Olive Oil', 'infrared sensor', 'petunia', 'anchovy', 'user manual', 'upright piano', 'lifting crane', 'reading pillow', 'Spatula', 'Stain Remover', 'Komodo dragon', 'Floor jacks', 'sweetgum tree', 'grape juice', 'ultrasound', 'plastic baggie', 'Soldering iron', 'Reptile', 'caribou']

In [78]:
from tqdm.notebook import tqdm
import numpy as np

trees_map = {'gpt_tree_v2': gpt_tree_v2, 'gpt_tree_v3': gpt_tree_v3, 'gpt_tree_v4': gpt_tree_v4, 'MC_tree': MC_tree}
test_results = {'gpt_tree_v2': {}, 'gpt_tree_v3': {}, 'gpt_tree_v4': {}, 'MC_tree': {}}
    

trees:   0%|          | 0/4 [00:00<?, ?it/s]

keywords:   0%|          | 0/100 [00:00<?, ?it/s]

keywords:   0%|          | 0/100 [00:00<?, ?it/s]

keywords:   0%|          | 0/100 [00:00<?, ?it/s]

keywords:   0%|          | 0/100 [00:00<?, ?it/s]

Tree: gpt_tree_v2
Number of keywords guessed correctly: 19
Average length of games won: 19
Keywords guessed correctly:
Ointment
butter knife
oatmeal cookie
Mouse
fudge brownie
Screwdriver
Dish soap
Air Conditioner
safety glasses
fishing net
neon light
game board
climbing rope
petunia
lifting crane
Spatula
Komodo dragon
grape juice
plastic baggie

Tree: gpt_tree_v3
Number of keywords guessed correctly: 30
Average length of games won: 30
Keywords guessed correctly:
weasel
Protein bar
amazon echo
dresser
Golf Cart
Tweezers
cane
king snake
espresso machine
fossil
oatmeal cookie
Sticker
Mouse
fudge brownie
Screwdriver
binder clip
electronic piano
Dish soap
Air Conditioner
Reptile
Swing set
Grapefruit
game board
Pumpkin pie
petunia
anchovy
lifting crane
Spatula
grape juice
caribou

Tree: gpt_tree_v4
Number of keywords guessed correctly: 25
Average length of games won: 25
Keywords guessed correctly:
weasel
Protein bar
amazon echo
Tweezers
palmetto tree
Sprinkler
king snake
desk lamp
oatmeal c

In [83]:
import random

test_words = random.choices([keyword for keyword in keyword_things if keyword not in test_results['gpt_tree_v2']], k = 150)

In [86]:
print(test_words)

['groundhog', 'Utility Cart', 'Pamphlet', 'cockroach', 'bank statement', 'vegetarian chili', 'Luggage', 'oxygen', 'planter', 'butterfly net', 'parrot', 'smoke alarm', 'hand drill', 'peony', 'fire helmet', 'cupcake', 'hot tea', 'Hairbrush', 'Lathe', 'meatloaf', 'dust jacket', 'medical chart', 'bald eagle', 'Rat', 'pipe', 'hazelnut tree', 'pineapple', 'cable tie', 'steam iron', 'Ultraviolet Lamp', 'Phone charger', 'hair elastic', 'wine decanter', 'Umbrella stand', 'Ear protection', 'pillow sham', 'volleyball', 'metal file', 'earring', 'charger', 'honeydew', 'relief valve', 'Bulb', 'teddy bear', 'silicon', 'Ironing Board', 'chair cushion', 'microphone stand', 'hamster', 'wire hanger', 'jacket', 'Mushroom', 'elastic bandage', 'Muffin Tin', 'doughnut', 'postage stamp', 'Fungi', 'picture frame', 'vest', 'cooler', 'perfume bottle', 'foil paper', 'Ocarina', 'Index Card', 'steak', 'Ear protection', 'Radio scanner', 'chicken wire', 'Floor Mats', 'rice krispie', 'moving walkway', 'Disinfectant', 

In [88]:
for tree in tqdm(trees_map, desc = 'trees'):
    for keyword in tqdm(test_words, leave = False, desc = 'keywords'):
        game_result = simulate_game(keyword, trees_map[tree], print_game = False)
        test_results[tree][keyword] = game_result
        
for tree in test_results:
    keywords_guessed_correctly = [keyword for keyword in test_results[tree] if test_results[tree][keyword]["win"] == True]
    print(f"Tree: {tree}")
    print(f"Number of keywords guessed correctly: {len(keywords_guessed_correctly)}")
    keywords_print = "\n".join(keywords_guessed_correctly)
    avg_len_games_won = np.mean([len(test_results[tree][keyword]['questions']) for keyword in keywords_guessed_correctly])
    print(f"Average length of games won: {avg_len_games_won}")
    print(f"Keywords guessed correctly:\n{keywords_print}\n")

with open("20qs-data/final_games/results_by_tree.jsonl", "w") as f:
    json.dump(test_results, f)

trees:   0%|          | 0/4 [00:00<?, ?it/s]

keywords:   0%|          | 0/150 [00:00<?, ?it/s]

keywords:   0%|          | 0/150 [00:00<?, ?it/s]

keywords:   0%|          | 0/150 [00:00<?, ?it/s]

keywords:   0%|          | 0/150 [00:00<?, ?it/s]

Tree: gpt_tree_v2
Number of keywords guessed correctly: 52
Average length of games won: 14.076923076923077
Keywords guessed correctly:
Ointment
butter knife
oatmeal cookie
Mouse
fudge brownie
Screwdriver
Dish soap
Air Conditioner
safety glasses
fishing net
neon light
game board
climbing rope
petunia
lifting crane
Spatula
Komodo dragon
grape juice
plastic baggie
Luggage
cupcake
hot tea
Lathe
Rat
volleyball
charger
honeydew
Bulb
jacket
Mushroom
doughnut
Fungi
picture frame
cooler
steak
Disinfectant
Hazelnut
Agave
Sleeping bag
Milling machine
garden shovel
Gym Mats
mop
coffee machine
Stair Stepper
lime
impact crusher
strainer
Stove
Oxygen tank
beer glass
Pants

Tree: gpt_tree_v3
Number of keywords guessed correctly: 67
Average length of games won: 14.656716417910447
Keywords guessed correctly:
weasel
Protein bar
amazon echo
dresser
Golf Cart
Tweezers
cane
king snake
espresso machine
fossil
oatmeal cookie
Sticker
Mouse
fudge brownie
Screwdriver
binder clip
electronic piano
Dish soap
Air Co

## Generate all the games

In [15]:
import json

def read_jsonl_and_transform(file_path):
    result_dict = {}

    with open(file_path, 'r') as file:
        for line in file:
            record = json.loads(line)
            keyword = record.pop('keyword')
            result_dict[keyword] = record

    return result_dict

def read_json_to_dict(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

keyword_labels, results = read_jsonl_and_transform('20qs-data/keywords_data/labeled_keywords_v2.jsonl'), read_json_to_dict('20qs-data/keywords_data/labeling_results_v2.jsonl')

def read_file_to_list(file_path):
    with open(file_path, 'r') as file:
        lines = file.read().splitlines()
    return lines

keywords_list = read_file_to_list('20qs-data/keywords_data/keywords.txt')
similar_keywords_list = read_file_to_list('20qs-data/keywords_data/similar_keywords.txt')

keywords_list = keywords_list[1:]
print(f"Number of keywords in data: {len(keywords_list)}")

keyword_places = [keyword for keyword in keywords_list if keyword_labels[keyword]['place'] == 'yes']
keyword_things = [keyword for keyword in keywords_list if keyword_labels[keyword]['place'] == 'no']

print(f"Number of keywords labeled as things: {len(keyword_things)}.")

Number of keywords in data: 2046
Number of keywords labeled as things: 1347.


Read current games:

In [121]:
import json

def read_jsonl_and_transform(file_path):
    result_dict = {}

    with open(file_path, 'r') as file:
        for line in file:
            if line.strip():  # Skip empty lines
                try:
                    record = json.loads(line)
                    keyword = record.pop('keyword')
                    result_dict[keyword] = record
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON for line: {line}")
                    print(f"Error message: {e}")

    return result_dict

# Path to the uploaded file
#file_path = '20qs-data/real_keyword_games_extended.jsonl'

# Read and transform the JSONL file
#result_dict = read_jsonl_and_transform(file_path)

# Get the current keywords with games
current_keywords_with_games = read_jsonl_and_transform('20qs-data/final_games/gpt-4o-400-mc-tree-games.jsonl')

In [123]:
keywords_won = [keyword for keyword in current_keywords_with_games if current_keywords_with_games[keyword]['win'] == True]

KeyError: 'win'

In [117]:
game_results = []

In [119]:
import random
from tqdm import tqdm

dict_keywords = {d['keyword'] for d in game_results}

keywords_left = [keyword for keyword in keyword_things if (keyword not in dict_keywords) and (keyword not in current_keywords_with_games)]
similar_keywords_left = [keyword for keyword in similar_keywords_list if (keyword not in dict_keywords) and (keyword not in current_keywords_with_games)]

print(f"Keywords left: {len(keywords_left)}\n")
print(f"Similar Keywords left: {len(similar_keywords_left)}\n")

generated_keywords = random.choices(keywords_left, k=100)
generated_similar_keywords = random.choices(similar_keywords_left, k=100)
generated_keywords_combined = list(set(generated_keywords + generated_similar_keywords))

for generated_keyword in tqdm(generated_keywords_combined, desc = 'games generated'):
    print("Generated keyword:", generated_keyword)
    game_result = simulate_game(generated_keyword, MC_tree)
    game_results.append(game_result)

Keywords left: 1248

Similar Keywords left: 1206



games generated:   0%|          | 0/190 [00:00<?, ?it/s]

Generated keyword: Hose nozzle
USING MODEL: gpt-4o
KEYWORD: Hose nozzle

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Pencil
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Phone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Balloon
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Rock
evaluator: False

questioner: Is it something that is commonly used in offices or schools?
answerer: No.
gue

games generated:   1%|          | 1/190 [00:40<2:07:42, 40.54s/it]

evaluator: False

Generated keyword: Champagne flute
USING MODEL: gpt-4o
KEYWORD: Champagne flute

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Fork
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Cup
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Spatula
evaluator: False

questioner: Is it an item commonly found in the kitchen?
answerer: Yes.
guesser: Spoon
evaluator: False

questioner: Is it a utensil?
answerer: No.
guesser: Blender
evaluator: False

questioner: Is it an appliance?
answerer: No.
guesser: Cutting board
evaluator: False

questioner: Is it related to food storage?
answerer: No.
guesser: Measuring cup
evaluator: False

questioner: Is it used for cooking or food preparation?
answerer: No.
guesser: Pla

games generated:   1%|          | 2/190 [01:11<1:48:58, 34.78s/it]

evaluator: True
Generated keyword: ghost crab
USING MODEL: gpt-4o
KEYWORD: ghost crab

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a domesticated animal?
answerer: No.
guesser: Lion
evaluator: False

questioner: Is it a mammal?
answerer: No.
guesser: Eagle
evaluator: False

questioner: Is it a bird?
answerer: No.
guesser: Snake
evaluator: False

questioner: Is it a reptile?
answerer: No.
guesser: Fish
evaluator: False

questioner: Is it a fish?
answerer: No.
guesser: Frog
evaluator: False

questi

games generated:   2%|▏         | 3/190 [01:55<2:01:51, 39.10s/it]

evaluator: False

Generated keyword: laboratory manual
USING MODEL: gpt-4o
KEYWORD: laboratory manual

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Book
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it commonly used for writing or drawing?


games generated:   2%|▏         | 4/190 [02:33<1:59:35, 38.58s/it]

evaluator: False

Generated keyword: Loudspeaker
USING MODEL: gpt-4o
KEYWORD: Loudspeaker

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Soccer ball
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Playing card
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it used primarily for playing video games?
answerer: No.
guesser: Television remote
evaluator: False

questioner: Is it commonly used for listening to music or audio?
answerer: Yes.
guesser: Headphones
evaluator: False

questioner: Is it wearable?
answerer: No.
guesser: Speaker
evaluator: False

questioner: Is it typica

games generated:   3%|▎         | 5/190 [03:11<1:58:04, 38.29s/it]

evaluator: False

Generated keyword: Briefcase
USING MODEL: gpt-4o
KEYWORD: Briefcase

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Shoes
evaluator: False

questioner: Is it something that is typically worn on the upper body?
answerer: No.
guesser: Watch
evaluator: False

questioner: Is it something that typically covers or is worn on the feet?
answerer: No.
guesser: Necklace
evaluator: False

questioner: Is it typically worn around the neck or head?
answerer: No.
guesser: Belt
evaluator: False

questioner: Is it an item commonly used in beauty routines, such as makeup or skincare products?
answerer: No.
guesser: Ri

games generated:   3%|▎         | 6/190 [03:53<2:02:01, 39.79s/it]

evaluator: True
Generated keyword: flea
USING MODEL: gpt-4o
KEYWORD: flea

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a type of domesticated animal?
answerer: No.
guesser: Lion
evaluator: False

questioner: Is it a type of wild animal that primarily lives in water?
answerer: No.
guesser: Elephant
evaluator: False

questioner: Is it a type of wild animal that primarily lives on land?
answerer: Yes.
guesser: Tiger
evaluator: False

questioner: Is it a mammal?
answerer: No.
guesser: Eagle
evaluator: Fa

games generated:   4%|▎         | 7/190 [04:29<1:57:31, 38.53s/it]

evaluator: True
Generated keyword: squash
USING MODEL: gpt-4o
KEYWORD: squash

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Recipe
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a type of fruit?
answerer: Yes.
guesser: Banana
evaluator: False

questioner: Is it commonly grown in tropical climates?
answerer: No.
guesser: Strawberry
evaluator: False

questioner: Is it typically consumed raw?
answerer: No.
guesser: Pumpkin
evaluator: False

questioner: Is it a type of berry?
answerer: No.
guesser: Potato
evaluator: False

questioner: Is it commonly used in baking or cooking?
answerer: Yes.
guesser: Pear
evaluator: False

questioner: Is it commonly associated with desserts or sweet dishes?
answerer: No.
guesser: Tomato
evaluator: False

questioner: Is it a type of citrus fruit?
answerer: No.
guesser: Pum

games generated:   4%|▍         | 8/190 [05:04<1:52:53, 37.22s/it]

evaluator: True
Generated keyword: wrist rest
USING MODEL: gpt-4o
KEYWORD: wrist rest

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Table
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Camera
evaluator: False

questioner: Is it a device used primarily for communication?
answerer: No.
guesser: Headphones
evaluator: False

questioner: Is it a device used for entertainment purpo

games generated:   5%|▍         | 9/190 [05:45<1:55:43, 38.36s/it]

evaluator: False

Generated keyword: Lab coat
USING MODEL: gpt-4o
KEYWORD: Lab coat

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Automobile
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Lipstick
evaluator: False

questioner: Is it something that is typically worn on the upper body?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it a type of garment normally worn by both men and women?
answerer: Yes.
guesser: T-shirt
evaluator: False

questioner: Is it typically worn in casual settings?
answerer: No.
guesser: Blouse
evaluator: False

questioner: Is it commonly worn in professional or formal settings?
answerer: Yes.
guesser: Suit
evaluator:

games generated:   5%|▌         | 10/190 [06:17<1:49:50, 36.61s/it]

evaluator: True
Generated keyword: Turnstile
USING MODEL: gpt-4o
KEYWORD: Turnstile

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Phone
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Planet
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it 

games generated:   6%|▌         | 11/190 [07:00<1:54:56, 38.53s/it]

evaluator: False

Generated keyword: Tracksuit
USING MODEL: gpt-4o
KEYWORD: Tracksuit

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Telephone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Video game
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Soccer ball
evaluator: False

questioner: Is it something that primarily involves physical activity?
answerer: Yes.
guesser: Bicycle
evaluator: False

questioner: Is it typically played or performed outdoors?
answerer: No.
guesser: Treadmill
evaluator: False

questioner: Is it an individual activity rather than a team activity?
answerer: No.
guesser: Basketball
evaluator: False

questioner: Is it commonly associated with the use of a specific type of ball?
answerer: No.
guesser: D

games generated:   6%|▋         | 12/190 [07:46<2:01:06, 40.82s/it]

evaluator: False

Generated keyword: delivery van
USING MODEL: gpt-4o
KEYWORD: delivery van

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Clock
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Spoon
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Car.
evaluator: False

questi

games generated:   7%|▋         | 13/190 [08:29<2:02:39, 41.58s/it]

evaluator: True
Generated keyword: plastic bottle
USING MODEL: gpt-4o
KEYWORD: plastic bottle

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Knife
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spatula
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Fork
evaluator: False

questioner: Is it commonly used in the kitchen?
answerer: Yes.
guesser: Spoon
evaluator: False

questioner: Is it an appliance?
answerer: No.
guesser: Cutting board
evaluator: False

questioner: Is it a utensil?
answerer: No.
guesser: Measuring cup
evaluator: False

questioner: Is it a type of cookware?
answerer: No.
guesser: Salt shaker
evaluator: False

questioner: Is it an ingredient used in cooking?
answerer: No.
guesser: Oven mitt
evaluator: F

games generated:   7%|▋         | 14/190 [09:10<2:01:19, 41.36s/it]

evaluator: False

Generated keyword: Dryer
USING MODEL: gpt-4o
KEYWORD: Dryer

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Watch
evaluator: False

questioner: Is it an item primarily worn on the upper body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it typically worn on the lower body?
answerer: No.
guesser: Hat
evaluator: False

questioner: Is it an accessory?
answerer: No.
guesser: Perfume
evaluator: False

questioner: Is it related to personal grooming or hygiene?
answerer: No.
guesser: Belt
evaluator: False

questioner: Is it something that is worn on the feet?
answerer: No.
guesser: 

games generated:   8%|▊         | 15/190 [09:54<2:02:21, 41.95s/it]

evaluator: False

Generated keyword: Electrical panel
USING MODEL: gpt-4o
KEYWORD: Electrical panel

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Machine
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Fire extinguisher
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Helmet
evaluator: False

questioner: Is it related to architectural structures or construction?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it a type of protective gear or equipment used by workers?
answerer: No.
guesser: Scaffolding
evaluator: False

questioner: Is it an integral part of a building's structural design?
answerer: No.
guesser: Fire alarm
evaluator: False

questioner: Is it a device or system used in ensuring safety on a construction site?

games generated:   8%|▊         | 16/190 [10:28<1:54:48, 39.59s/it]

evaluator: True
Generated keyword: Notepad
USING MODEL: gpt-4o
KEYWORD: Notepad

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Tablet
evaluator: False

questioner: Is it a device primarily used for communication?
answerer: No.
guesser: Camera
evaluator: False

questioner: Is it a device used for entertainment purposes?
an

games generated:   9%|▉         | 17/190 [11:11<1:57:33, 40.77s/it]

evaluator: False

Generated keyword: grape juice
USING MODEL: gpt-4o
KEYWORD: grape juice

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Spatula
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee maker
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Tractor
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Fertilizer
evaluator: False

questioner: Is it a tool used in farming?
answerer: No.
guesser: Irrigation system
evaluator: False

questioner: Is it a type of beverage?
answerer: Yes.
guesser: Wine
evaluator: False

questioner: Is it an alcoholic beverage?
answerer: No.
guesser: Juice
evaluator: False

questioner: Is it commonly consumed for breakfast?
answerer: Yes.
guesser: Orange juice
evaluator: False

questioner: Is it typically served hot?
answerer: No.
guesser: Milk
evaluator: False

questioner: Is it a dairy product?
answerer: No.
guesse

games generated:   9%|▉         | 18/190 [11:34<1:41:39, 35.46s/it]

evaluator: True
Generated keyword: carbon
USING MODEL: gpt-4o
KEYWORD: carbon

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Robot
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Computer
evaluator: False

questioner: Is it something that can be used in a household setting?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it a type of device commonly used for entertainment purposes?
answerer: No.
guesser: Vacuum cleaner
evaluator: False

questioner: Is it typically used for communication?
answerer: No.
guesser: Washing machine
evaluator: False

questioner: Is it primarily used for household chores or maintenance?
answerer: No.
guesser: Light bulb
evaluator: False

q

games generated:  10%|█         | 19/190 [12:16<1:46:30, 37.37s/it]

evaluator: False

Generated keyword: wisteria
USING MODEL: gpt-4o
KEYWORD: wisteria

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it a living thing?
answerer: Yes.
guesser: Flower
evaluator: False

questioner: Is it a type of plant?
answerer: Yes.
guesser: Cactus
evaluator: False

questioner: Is it typically found outdoors?
answerer: Yes.
guesser: Grass
evaluator: False

questioner: Is

games generated:  11%|█         | 20/190 [12:59<1:50:20, 38.95s/it]

evaluator: False

Generated keyword: book cover
USING MODEL: gpt-4o
KEYWORD: book cover

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Basketball
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Playing cards
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: Yes.
guesser: Book
evaluator: False

questioner: Is it typically used in music or musical performances?
answerer: No.
guesser: Paintbrush
evaluator: False

questioner: Is it often associated with visual arts, such as painting or drawing?
answerer: No.
guesser: Camera
evaluator: False

games generated:  11%|█         | 21/190 [13:53<2:02:49, 43.60s/it]

evaluator: False

Generated keyword: weather vane
USING MODEL: gpt-4o
KEYWORD: weather vane

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Car.
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Road
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Water
evaluator: False

questione

games generated:  12%|█▏        | 22/190 [14:34<1:59:46, 42.78s/it]

evaluator: True
Generated keyword: Jalapeño pepper
USING MODEL: gpt-4o
KEYWORD: Jalapeño pepper

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Carrot
evaluator: False

questioner: Is it a type of fruit?
answerer: Yes.
guesser: Orange
evaluator: False

questioner: Is it commonly eaten raw?
answerer: Yes.
guesser: Banana
evaluator: False

questioner: Is it typically sweet in taste?
answerer: No.
guesser: Tomato
evaluator: False

questioner: Is it commonly eaten in salads?
answerer: Yes.
guesser: Cucumber
evaluator: False

questioner: Is it a type of vegetable-fruit, like tomatoes or cucumbers?
answerer: Yes.
guesser: Bell pepper
evaluator: False

questioner: Is it commonly green when ripe?
answerer: Yes.
guesser: Avocado
evaluator: False

questioner: Is it known for having a high water conten

games generated:  12%|█▏        | 23/190 [14:58<1:43:17, 37.11s/it]

evaluator: True
Generated keyword: Journal
USING MODEL: gpt-4o
KEYWORD: Journal

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Phone
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Spoon
evaluator: False

questioner: Is it usually found in an office or school environment?
answerer: Yes.


games generated:  13%|█▎        | 24/190 [15:39<1:45:50, 38.26s/it]

evaluator: False

Generated keyword: rose bush
USING MODEL: gpt-4o
KEYWORD: rose bush

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it a living thing?
answerer: Yes.
guesser: Flower
evaluator: False

questioner: Is it a type of plant?
answerer: Yes.
guesser: Grass
evaluator: False

questioner: Does it primarily grow outdoors?
answerer: Yes.
guesser: Bush
evaluator: False

questioner: Is it

games generated:  13%|█▎        | 25/190 [16:14<1:42:12, 37.16s/it]

evaluator: True
Generated keyword: Backpack
USING MODEL: gpt-4o
KEYWORD: Backpack

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Telephone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Watch
evaluator: False

questioner: Is it something typically worn on the upper body?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it considered a type of outerwear or jacket?
answerer: No.
guesser: Blouse
evaluator: False

questioner: Is it typically worn as formal attire?
answerer: No.
guesser: T-shirt
evaluator: False

questioner: Is it often made of cotton?
answerer: Yes.
guesser: Sweater
evaluator: False

questioner: Is it commonly worn as casual clothing?
answer

games generated:  14%|█▎        | 26/190 [16:58<1:47:13, 39.23s/it]

evaluator: False

Generated keyword: guitar
USING MODEL: gpt-4o
KEYWORD: guitar

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Remote control
evaluator: False

questioner: Is it primarily used for playing games?
answerer: No.
guesser: Camera
evaluator: False

questioner: Is it a device used for communication?
answerer: No.
guesser: Headphones
evaluator: False

questioner: Is it something that can display visual content?
answerer: No.
guesser: Digital watch
evaluator: False

questioner: Is it used for

games generated:  14%|█▍        | 27/190 [17:40<1:48:59, 40.12s/it]

evaluator: False

Generated keyword: Hot and sour soup
USING MODEL: gpt-4o
KEYWORD: Hot and sour soup

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Pasta
evaluator: False

questioner: Is it a prepared food or dish?
answerer: Yes.
guesser: Sandwich
evaluator: False

questioner: Is it typically served as a main course?
answerer: No.
guesser: Salad
evaluator: False

questioner: Is it commonly considered a dessert?
answerer: No.
guesser: Soup
evaluator: False

questioner: Is it typically served as an appetizer or snack?
answerer: Yes.
guesser: Nachos
evaluator: False

questioner: Is it usually eaten hot or warm?
answerer: Yes.
guesser: Spring rolls
evaluator: False

questioner: Does it often contain meat?
answerer: Yes.
guesser: Chicken wings
evaluator: False

questioner: Is it commonly associa

games generated:  15%|█▍        | 28/190 [18:15<1:44:20, 38.65s/it]

evaluator: True
Generated keyword: luggage cart
USING MODEL: gpt-4o
KEYWORD: luggage cart

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Planet
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Building
evaluator: False

questioner

games generated:  15%|█▌        | 29/190 [18:54<1:44:14, 38.85s/it]

evaluator: False

Generated keyword: Staircase
USING MODEL: gpt-4o
KEYWORD: Staircase

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Cloud
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Planet
evaluator: False

questioner: 

games generated:  16%|█▌        | 30/190 [19:35<1:45:02, 39.39s/it]

evaluator: False

Generated keyword: First Aid Kit
USING MODEL: gpt-4o
KEYWORD: First Aid Kit

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Key
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: First Aid Kit


games generated:  16%|█▋        | 31/190 [19:48<1:23:34, 31.53s/it]

evaluator: True
Generated keyword: Stapler
USING MODEL: gpt-4o
KEYWORD: Stapler

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Plant
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it used for writing or drawing?
answerer: No.
guesser: Key
evaluat

games generated:  17%|█▋        | 32/190 [20:20<1:23:07, 31.57s/it]

evaluator: True
Generated keyword: rubber
USING MODEL: gpt-4o
KEYWORD: rubber

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Helmet
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: Yes.
guesser: Seatbelt
evaluator: False

questioner: Is it a part or accessory commonly used in automobiles?
answerer: Yes.
guesser: Airbag
evaluator: False

questioner: Is it an internal component of a vehicle's safety system?
answerer: No.
guesser: Rearview mirror
evaluator: False

questioner: Is it primarily used to enhance visibility or alert others?
answerer: No.
guesser: Bumper
evaluator: False

questioner: Is it typically worn by the occupant of the vehicle?
answerer: No.
guesser: Safety cone
evaluator: False

ques

games generated:  17%|█▋        | 33/190 [21:05<1:33:16, 35.64s/it]

evaluator: False

Generated keyword: Washing machine
USING MODEL: gpt-4o
KEYWORD: Washing machine

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it something you wear on your body?
answerer: No.
guesser: Necklace
evaluator: False

questioner: Is it something you use on your body?
answerer: No.
guesser: Perfume
evaluator: False

questioner: Is it something you carry or use frequently?
answerer: Yes.
guesser: Handbag
evaluator: False

questioner: Is it commonly considered a fashion accessory?
answerer: No.
guesser: Mirror
evaluator: False

questioner: Is it something that is oft

games generated:  18%|█▊        | 34/190 [21:46<1:36:58, 37.30s/it]

evaluator: True
Generated keyword: training cone
USING MODEL: gpt-4o
KEYWORD: training cone

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Football
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Playing card
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Baseball bat
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Musical instrument
evaluator: False

questioner: Is it used in a type of sport?
answerer: Yes.
guesser: Tennis racket
evaluator: False

questioner: Is it primarily used in outdoor sports?
answerer: Yes.
guesser: Soccer ball
evaluator: False

questioner: Is it typical

games generated:  18%|█▊        | 35/190 [22:28<1:39:49, 38.64s/it]

evaluator: False

Generated keyword: Nebulizer
USING MODEL: gpt-4o
KEYWORD: Nebulizer

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Rock
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Key
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it used for writing or drawing?
answerer: No.
guesser: Coin
evaluat

games generated:  19%|█▉        | 36/190 [23:11<1:42:32, 39.95s/it]

evaluator: False

Generated keyword: steering wheel
USING MODEL: gpt-4o
KEYWORD: steering wheel

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: Yes.
guesser: Seatbelt
evaluator: False

questioner: Is it commonly used in automobiles?
answerer: Yes.
guesser: Airbag
evaluator: False

questioner: Is it primarily used by the driver?
answerer: Yes.
guesser: Steering wheel


games generated:  19%|█▉        | 37/190 [23:21<1:19:18, 31.10s/it]

evaluator: True
Generated keyword: Shipping label
USING MODEL: gpt-4o
KEYWORD: Shipping label

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Telephone
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Key
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it considered a tool or an implement used to perform ta

games generated:  20%|██        | 38/190 [24:01<1:25:12, 33.63s/it]

evaluator: False

Generated keyword: Frisbee
USING MODEL: gpt-4o
KEYWORD: Frisbee

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Tennis racket
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Playing card
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Basketball
evaluator: False

questioner: Is it used in playing a sport or game?
answerer: Yes.
guesser: Soccer ball
evaluator: False

questioner: Is it typically used indoors?
answerer: No.
guesser: Baseball bat
evaluator: False

questioner: Is it commonly associated wit

games generated:  21%|██        | 39/190 [24:17<1:11:08, 28.27s/it]

evaluator: True
Generated keyword: peach tree
USING MODEL: gpt-4o
KEYWORD: peach tree

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Wheat
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Fertilizer
evaluator: False

questioner: Is it a type of beverage?
answerer: No.
guesser: Tractor
evaluator: False

questioner: Is it a type of farming equipment?
answerer: No.
guesser: Pesticide
evaluator: False

questioner: Is it something that can be grown?
answerer: Yes.
guesser: Corn
evaluator: False

questioner: Is it a type of plant or crop?
answerer: Yes.
guesser: Rice
evaluator: False

questioner: Is it commonly consumed by humans?
answerer: Yes.
guesser: Tomato
evaluator: False

questioner: Is it a type of fruit?
answerer: Yes.
guesser: Banana
eval

games generated:  21%|██        | 40/190 [25:05<1:25:52, 34.35s/it]

evaluator: False

Generated keyword: Oil filter
USING MODEL: gpt-4o
KEYWORD: Oil filter

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Car
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Factory
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it a type of machinery or equipment used in manufacturing?
answerer: Yes.
guesser: Conveyor belt
evaluator: False

questioner: Is it used for assembly or construction?
answerer: No.
guesser: Lathe
evaluator: False

questioner: Is it used for material handling or transportation within a facility?
answerer: No.
guesser: 3D printer
evaluator: False

questioner: Is

games generated:  22%|██▏       | 41/190 [25:48<1:31:45, 36.95s/it]

evaluator: False

Generated keyword: maple tree
USING MODEL: gpt-4o
KEYWORD: maple tree

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Telephone
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Table
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it a living thing?
answerer: Yes.
guesser: Tree
evaluator: False

questioner: Is it a type of plant?
answerer: Yes.
guesser: Flower
evaluator: False

questioner: Is it commonly found in a garden?
answerer: No.
guesser: Cactus
evaluator: False

questioner: I

games generated:  22%|██▏       | 42/190 [26:12<1:21:18, 32.96s/it]

evaluator: True
Generated keyword: White wine
USING MODEL: gpt-4o
KEYWORD: White wine

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Tractor
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Fertilizer
evaluator: False

questioner: Is it a beverage?
answerer: Yes.
guesser: Tea
evaluator: False

questioner: Is it typically consumed hot?
answerer: No.
guesser: Juice
evaluator: False

questioner: Is it carbonated?
answerer: No.
guesser: Milk
evaluator: False

questioner: Is it primarily consumed by children?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it an alcoholic beverage?
answerer: Yes.
guesser: Beer
evaluator: False

questioner: Is it made from grapes?
answerer: Yes.
guesser: Champagne
evaluator: False

questioner: Is it co

games generated:  23%|██▎       | 43/190 [26:32<1:11:25, 29.15s/it]

evaluator: True
Generated keyword: lime zest
USING MODEL: gpt-4o
KEYWORD: lime zest

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Tractor
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Fertilizer
evaluator: False

questioner: Is it a type of plant or crop?
answerer: No.
guesser: Plow
evaluator: False

questioner: Is it a tool or equipment used in agriculture?
answerer: No.
guesser: Pesticide
evaluator: False

questioner: Is it related to agricultural products or commodities?
answerer: Yes.
guesser: Milk
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Grain
evaluator: False

questioner: Is it an animal product?
answerer: No.
guesser: Honey
evaluator: False

questioner: Is it a byproduct or derivative of a crop or plant?

games generated:  23%|██▎       | 44/190 [27:10<1:17:14, 31.75s/it]

evaluator: False

Generated keyword: DVD
USING MODEL: gpt-4o
KEYWORD: DVD

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Soccer ball
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Playing cards
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it primarily used for playing video games?
answerer: No.
guesser: Television remote
evaluator: False

questioner: Is it commonly used for listening to or playing music?
answerer: No.
guesser: Camera
evaluator: False

questioner: Is it mainly used for communication purposes?
answerer: No.
guesser: Virtual reality headset
evaluator: Fal

games generated:  24%|██▎       | 45/190 [27:52<1:24:09, 34.82s/it]

evaluator: False

Generated keyword: water snake
USING MODEL: gpt-4o
KEYWORD: water snake

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a domesticated animal?
answerer: No.
guesser: Tiger
evaluator: False

questioner: Is it a mammal?
answerer: No.
guesser: Eagle
evaluator: False

questioner: Is it a bird?
answerer: No.
guesser: Fish
evaluator: False

questioner: Is it an aquatic animal?
answerer: Yes.
guesser: Shark
evaluator: False

questioner: Is it a type of fish?
answerer: No.
guesser: Octopus


games generated:  24%|██▍       | 46/190 [28:18<1:17:22, 32.24s/it]

evaluator: True
Generated keyword: watch
USING MODEL: gpt-4o
KEYWORD: watch

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Car
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Factory设备
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Computer
evaluator: False

questioner: Is it a type of device commonly used by consumers?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it a portable device?
answerer: Yes.
guesser: Laptop
evaluator: False

questioner: Does it primarily serve as a communication tool?
answerer: No.
guesser: Tablet
evaluator: False

questioner: Is it used for entertainment purposes?
answerer: No.
guesser: Camera
evaluator: False

questioner: Is it often used for productivity or work-related tasks?
answe

games generated:  25%|██▍       | 47/190 [28:45<1:13:02, 30.64s/it]

evaluator: True
Generated keyword: tire inflator
USING MODEL: gpt-4o
KEYWORD: tire inflator

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book.
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Flashlight
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it typically used for communication?
answerer: No.
guesser: Headphones
evaluator: False

questioner: Is it primarily used for entertainment purpos

games generated:  25%|██▌       | 48/190 [29:22<1:16:43, 32.42s/it]

evaluator: False

Generated keyword: Surveillance camera
USING MODEL: gpt-4o
KEYWORD: Surveillance camera

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Laptop
evaluator: False

questioner: Is it a device primarily used for communication?
answerer: No.
guesser: Calculator
evaluator: False

questioner: Is it a device primarily used f

games generated:  26%|██▌       | 49/190 [29:59<1:19:54, 34.00s/it]

evaluator: False

Generated keyword: zebra
USING MODEL: gpt-4o
KEYWORD: zebra

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Shirt
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a type of mammal?
answerer: Yes.
guesser: Cat
evaluator: False

questioner: Is it a commonly domesticated animal?
answerer: No.
guesser: Tiger
evaluator: False

questioner: Is it a wild animal that can be found in forests or jungles?
answerer: No.
guesser: Dolphin
evaluator: False

questioner: Is it an animal that lives primarily in water?
answerer: No.
guesser: Bat
evaluator: F

games generated:  26%|██▋       | 50/190 [30:35<1:20:45, 34.61s/it]

evaluator: True
Generated keyword: Bouncer stand
USING MODEL: gpt-4o
KEYWORD: Bouncer stand

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Video Game
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Stadium
evaluator: False

questioner: Is it an activity or event?
answerer: No.
guesser: Soccer Ball
evaluator: False

questioner: Is it a form of media, such as a movie, TV show, or video game?
answerer: No.
guesser: Concert
evaluator: False

questioner: Is it related to a physical location or venue used for entertainment or sports?
answerer: Yes.
guesser: Theater
evaluator: False

questioner: Is it primarily used for watching or viewing events?
answerer: No.
guesser: Scoreb

games generated:  27%|██▋       | 51/190 [31:15<1:23:40, 36.12s/it]

evaluator: False

Generated keyword: Tissue box
USING MODEL: gpt-4o
KEYWORD: Tissue box

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Rock
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Phone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Balloon
evaluator: False

questioner: Is it related to writing or office supplies?
answerer: No.
gue

games generated:  27%|██▋       | 52/190 [31:54<1:25:03, 36.99s/it]

evaluator: False

Generated keyword: Basketball
USING MODEL: gpt-4o
KEYWORD: Basketball

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Telephone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Basketball


games generated:  28%|██▊       | 53/190 [32:00<1:02:57, 27.57s/it]

evaluator: True
Generated keyword: PVC fitting
USING MODEL: gpt-4o
KEYWORD: PVC fitting

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Machine
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Engine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Factory
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it a tool or piece of machinery used in manufacturing?
answerer: No.
guesser: Product
evaluator: False

questioner: Is it a type of product produced through manufacturing?
answerer: Yes.
guesser: Furniture
evaluator: False

questioner: Is it related to textiles or clothing?
answerer: No.
guesser: Plastic
evaluator: False

questioner: Is it used in construction o

games generated:  28%|██▊       | 54/190 [32:39<1:10:17, 31.01s/it]

evaluator: False

Generated keyword: polar bear
USING MODEL: gpt-4o
KEYWORD: polar bear

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a mammal?
answerer: Yes.
guesser: Cat
evaluator: False

questioner: Is it typically domesticated?
answerer: No.
guesser: Lion
evaluator: False

questioner: Is it larger than a human on average?
answerer: Yes.
guesser: Elephant
evaluator: False

questioner: Does it primarily live on land?
answerer: Yes.
guesser: Bear
evaluator: False

questioner: Is it a herbivore?
an

games generated:  29%|██▉       | 55/190 [33:01<1:03:54, 28.41s/it]

evaluator: True
Generated keyword: salt shaker
USING MODEL: gpt-4o
KEYWORD: salt shaker

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spatula
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Salt shaker


games generated:  29%|██▉       | 56/190 [33:08<49:20, 22.10s/it]  

evaluator: True
Generated keyword: Radio receiver
USING MODEL: gpt-4o
KEYWORD: Radio receiver

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Phone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Remote control
evaluator: False

questioner: Is it primarily used for gaming?
answerer: No.
guesser: Camera
evaluator: False

questioner: Is it a type of device used for communication?
answerer: Yes.
guesser: Walkie-talkie
evaluator: False

questioner: Is it a device that can access the internet?
answerer: No.
guesser: Radio
evaluator: False

questioner: Is it pri

games generated:  30%|███       | 57/190 [33:37<53:29, 24.13s/it]

evaluator: True
Generated keyword: elbow pad
USING MODEL: gpt-4o
KEYWORD: elbow pad

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Stadium
evaluator: False

questioner: Is it something typically found indoors?
answerer: Yes.
guesser: Movie
evaluator: False

questioner: Is it primarily used for watching or listening?
answerer: No.
guesser: Board game
evaluator: False

questioner: Is it related to physical activity or exercise?
answerer: Yes.
guesser: Treadmill
evaluator: False

questioner: Is it a piece of equipment or apparatus?
answerer: Yes.
guesser: Exercise bike
evaluator: False

questioner: Is it typically found 

games generated:  31%|███       | 58/190 [34:17<1:03:28, 28.86s/it]

evaluator: True
Generated keyword: microphone stand
USING MODEL: gpt-4o
KEYWORD: microphone stand

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Playing cards
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: Yes.
guesser: Book
evaluator: False

questioner: Is it a musical instrument?
answerer: No.
guesser: Paintbrush
evaluator: False

questioner: Is it commonly used in visual art, such as painting or drawing?
answerer: No.
guesser: Camera
evaluator: False

questio

games generated:  31%|███       | 59/190 [34:55<1:08:37, 31.43s/it]

evaluator: False

Generated keyword: Lemon zest
USING MODEL: gpt-4o
KEYWORD: Lemon zest

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Potato
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Wheat
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Tractor
evaluator: False

questioner: Is it something that is grown or produced on a farm?
answerer: No.
guesser: Fertilizer
evaluator: False

questioner: Is it used in the process of farming or agriculture?
answerer: No.
guesser: Pesticide
evaluator: False

questioner: Is it a type of beverage?
answerer: No.
guesser: Cooking oil
evaluator: False

questioner: Is it an agricultural product that is not food?
answerer: No.
guesser: Cookbook
evaluator: False

questioner: Is it something that is used primarily indoors?
answerer: Yes.
guesser: Cookware
evaluato

games generated:  32%|███▏      | 60/190 [35:34<1:13:31, 33.93s/it]

evaluator: False

Generated keyword: Handbag
USING MODEL: gpt-4o
KEYWORD: Handbag

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Hat
evaluator: False

questioner: Is it typically worn on the upper part of the body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it typically worn on the lower part of the body?
answerer: No.
guesser: Bracelet
evaluator: False

questioner: Is it an accessory?
answerer: Yes.
guesser: Necklace
evaluator: False

questioner: Is it commonly worn on the hands or wrists?
answerer: No.
guesser: Belt
evaluator: False

questioner: Is it typically worn on or around the neck?
answerer

games generated:  32%|███▏      | 61/190 [35:56<1:05:06, 30.28s/it]

evaluator: True
Generated keyword: pine
USING MODEL: gpt-4o
KEYWORD: pine

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Key
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it used for writing or drawing?
answerer: No.
guesser: Flashlight
evaluator:

games generated:  33%|███▎      | 62/190 [38:44<2:32:29, 71.48s/it]

evaluator: False

Generated keyword: metal lid
USING MODEL: gpt-4o
KEYWORD: metal lid

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Knife
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Cup
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Spatula
evaluator: False

questioner: Is it an everyday kitchen utensil?
answerer: No.
guesser: Blender
evaluator: False

questioner: Is it an appliance used in cooking or food preparation?
answerer: No.
guesser: Bottle opener
evaluator: False

questioner: Is it a container used for storing food or beverages?
answerer: No.
guesser: Spice jar
evaluator: False

questioner: Is it typically found in a kitchen?
answerer: Yes.
guesser: Salt shaker
evaluator: False

questioner: Is it an 

games generated:  33%|███▎      | 63/190 [39:30<2:15:06, 63.83s/it]

evaluator: False

Generated keyword: picnic basket
USING MODEL: gpt-4o
KEYWORD: picnic basket

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Milk
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Oven
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Knife
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Spoon
evaluator: False

questioner: Is it a kitchen utensil or tool?
answerer: No.
guesser: Salt shaker
evaluator: False

questioner: Is it used as a container?
answerer: Yes.
guesser: Jar
evaluator: False

questioner: Is it commonly made of glass?
answerer: No.
guesser: Plastic bottle
evaluator: False

questioner: Is it made of plastic?
answerer: No.
guesser: Metal can
evaluator: False

questioner: Is it made of metal?
answerer: No.
guesser: Ceramic bowl
evaluator: False



games generated:  34%|███▎      | 64/190 [40:08<1:57:49, 56.10s/it]

evaluator: False

Generated keyword: seltzer
USING MODEL: gpt-4o
KEYWORD: seltzer

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Blender
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spatula
evaluator: False

questioner: Is it a beverage?
answerer: Yes.
guesser: Tea
evaluator: False

questioner: Is it typically served hot?
answerer: No.
guesser: Soda
evaluator: False

questioner: Is it carbonated?
answerer: Yes.
guesser: Cola
evaluator: False

questioner: Is it a soft drink?
answerer: Yes.
guesser: Sprite
evaluator: False

questioner: Is it typically caffeinated?
answerer: No.
guesser: Root beer
evaluator: False

questioner: Is it a clear beverage?
answerer: Yes.
guesser: 7-Up
evaluator: False

questioner: Is it a type of flavored wate

games generated:  34%|███▍      | 65/190 [40:26<1:33:21, 44.81s/it]

evaluator: True
Generated keyword: Earrings
USING MODEL: gpt-4o
KEYWORD: Earrings

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Necklace
evaluator: False

questioner: Is it primarily worn on a specific part of the body?
answerer: Yes.
guesser: Hat
evaluator: False

questioner: Is it typically worn on the upper body?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it usually considered outerwear?
answerer: No.
guesser: T-shirt
evaluator: False

questioner: Is it an item of clothing that covers both arms?
answerer: No.
guesser: Bra
evaluator: False

questioner: Is it typically worn as an undergarment

games generated:  35%|███▍      | 66/190 [41:05<1:28:49, 42.98s/it]

evaluator: True
Generated keyword: Linksys
USING MODEL: gpt-4o
KEYWORD: Linksys

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it primarily used for communication?
answerer: Yes.
guesser: Tablet
evaluator: False

questioner: Is it a type of handheld device?
answerer: No.
guesser: Telephone
e

games generated:  35%|███▌      | 67/190 [41:44<1:25:58, 41.94s/it]

evaluator: False

Generated keyword: gel
USING MODEL: gpt-4o
KEYWORD: gel

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Hat
evaluator: False

questioner: Is it primarily worn on the upper body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it typically worn on the lower body?
answerer: No.
guesser: Ring
evaluator: False

questioner: Is it commonly worn on the feet?
answerer: No.
guesser: Watch
evaluator: False

questioner: Is it an accessory rather than a piece of clothing?
answerer: Yes.
guesser: Necklace
evaluator: False

questioner: Is it commonly worn on the head?
answerer: No.
guesser: Brace

games generated:  36%|███▌      | 68/190 [42:24<1:23:56, 41.28s/it]

evaluator: False

Generated keyword: Shorts
USING MODEL: gpt-4o
KEYWORD: Shorts

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Clock
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Shoes
evaluator: False

questioner: Is it something that is typically worn on the body?
answerer: Yes.
guesser: Hat
evaluator: False

questioner: Is it primarily worn on the upper half of the body?
answerer: No.
guesser: Pants
evaluator: False

questioner: Is it primarily worn on the lower half of the body?
answerer: Yes.
guesser: Skirt
evaluator: False

questioner: Is it typically considered a type of footwear?
answerer: No.
guesser: Shorts


games generated:  36%|███▋      | 69/190 [42:38<1:06:38, 33.05s/it]

evaluator: True
Generated keyword: Reading glasses
USING MODEL: gpt-4o
KEYWORD: Reading glasses

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Necklace
evaluator: False

questioner: Is it something that is typically worn on the upper body?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it typically considered a type of outerwear?
answerer: No.
guesser: T-shirt
evaluator: False

questioner: Is it usually worn by both men and women?
answerer: Yes.
guesser: Sweater
evaluator: False

questioner: Is it a type of garment that is commonly worn in a professional or formal setting?
answerer: No.
guesser: T

games generated:  37%|███▋      | 70/190 [43:21<1:12:04, 36.04s/it]

evaluator: False

Generated keyword: Forklift
USING MODEL: gpt-4o
KEYWORD: Forklift

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Car
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Robot
evaluator: False

questioner: Is it related to machinery or mechanical parts?
answerer: Yes.
guesser: Engine
evaluator: False

questioner: Is it a type of tool or instrument?
answerer: No.
guesser: Gear
evaluator: False

questioner: Is it a component commonly found in vehicles?
answerer: No.
guesser: Conveyor belt
evaluator: False

questioner: Is it a type of industrial machinery?
answerer: Yes.
guesser

games generated:  37%|███▋      | 71/190 [43:47<1:05:17, 32.92s/it]

evaluator: True
Generated keyword: snake
USING MODEL: gpt-4o
KEYWORD: snake

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a type of domesticated animal?
answerer: No.
guesser: Lion
evaluator: False

questioner: Is it a type of wild animal commonly found in forests or jungles?
answerer: Yes.
guesser: Tiger
evaluator: False

questioner: Is it a mammal?
answerer: No.
guesser: Snake


games generated:  38%|███▊      | 72/190 [43:58<51:55, 26.40s/it]  

evaluator: True
Generated keyword: lemon juice
USING MODEL: gpt-4o
KEYWORD: lemon juice

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Tractor
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Fertilizer
evaluator: False

questioner: Is it a type of machinery?
answerer: No.
guesser: Irrigation system
evaluator: False

questioner: Is it a consumable product?
answerer: Yes.
guesser: Honey
evaluator: False

questioner: Is it a type of beverage?
answerer: Yes.
guesser: Juice
evaluator: False

questioner: Is it an alcoholic beverage?
answerer: No.
guesser: Milk
evaluator: False

questioner: Is it commonly consumed for its health benefits?
answerer: Yes.
guesser: Tea
evaluator: False

questioner: Is it typically consumed hot?
answerer: No.
guesser: Wa

games generated:  38%|███▊      | 73/190 [44:20<48:56, 25.09s/it]

evaluator: True
Generated keyword: apple
USING MODEL: gpt-4o
KEYWORD: apple

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple


games generated:  39%|███▉      | 74/190 [44:22<34:54, 18.06s/it]

evaluator: True
Generated keyword: Wrench
USING MODEL: gpt-4o
KEYWORD: Wrench

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Telephone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Car
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Robot
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Factory
evaluator: False

questioner: Is it used in construction or building materials?
answerer: Yes.
guesser: Brick
evaluator: False

questioner: Is it used primarily for structural purposes?
answerer: No.
guesser: Paint
evaluator: False

questioner: Is it used for finishing or decorative purposes in construction?
answerer: No.
guesser: Insulation
evaluator: False

questioner: Is it used in the plumbing or pipi

games generated:  39%|███▉      | 75/190 [45:09<51:36, 26.92s/it]

evaluator: False

Generated keyword: Sewer line
USING MODEL: gpt-4o
KEYWORD: Sewer line

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Cloud
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Star
evaluator: False

questioner:

games generated:  40%|████      | 76/190 [45:55<1:01:43, 32.49s/it]

evaluator: False

Generated keyword: Water dish
USING MODEL: gpt-4o
KEYWORD: Water dish

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Telephone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Scissors
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Hammer
evaluator: False

questioner: Is it commonly found in an office or workspace enviro

games generated:  41%|████      | 77/190 [46:38<1:07:07, 35.64s/it]

evaluator: False

Generated keyword: plate
USING MODEL: gpt-4o
KEYWORD: plate

questioner: Is it related to food, beverages or cooking?
answerer: Yes
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No
guesser: Cooking pot
evaluator: False

questioner: Is it related to agriculture?
answerer: No
guesser: Recipe
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes
guesser: Spoon
evaluator: False

questioner: Is it a beverage?
answerer: No
guesser: Fork
evaluator: False

questioner: Is it a tool or utensil used in cooking?
answerer: Yes
guesser: Knife
evaluator: False

questioner: Is it primarily made of metal?
answerer: No
guesser: Spatula
evaluator: False

questioner: Is it used for mixing or stirring?
answerer: No
guesser: Cutting board
evaluator: False

questioner: Is it used for cutting or slicing?
answerer: No
guesser: Measuring cup
evaluator: False

questioner: Is it primarily made of plastic?
answerer: No
guesser: Rolling

games generated:  41%|████      | 78/190 [47:18<1:08:56, 36.93s/it]

evaluator: True
Generated keyword: Chafing dishes
USING MODEL: gpt-4o
KEYWORD: Chafing dishes

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Spatula
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spoon
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Knife
evaluator: False

questioner: Is it a kitchen utensil or tool?
answerer: Yes.
guesser: Fork
evaluator: False

questioner: Is it primarily used for cutting or slicing?
answerer: No.
guesser: Whisk
evaluator: False

questioner: Is it primarily used for mixing or stirring?
answerer: No.
guesser: Cutting board
evaluator: False

questioner: Is it used for measuring ingredients?
answerer: No.
guesser: Peeler
evaluator: False

questioner: Is it used for cooking or bakin

games generated:  42%|████▏     | 79/190 [47:57<1:09:49, 37.74s/it]

evaluator: True
Generated keyword: Space probe
USING MODEL: gpt-4o
KEYWORD: Space probe

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: House
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Cloud
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Star
evaluator: False

questioner: I

games generated:  42%|████▏     | 80/190 [48:30<1:06:16, 36.15s/it]

evaluator: False

Generated keyword: Tablet
USING MODEL: gpt-4o
KEYWORD: Tablet

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Football
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it primarily used for gaming?
answerer: No.
guesser: Remote control
evaluator: False

questioner: Is it a device commonly used for listening to or playing music?
answerer: No.
guesser: Camera
evaluator: False

questioner: Is it something that can be used for communication?
answerer: Yes.
guesser: Smartphone
evaluator: False

questione

games generated:  43%|████▎     | 81/190 [48:41<52:03, 28.65s/it]  

evaluator: True
Generated keyword: Turn signal
USING MODEL: gpt-4o
KEYWORD: Turn signal

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: House
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Sky
evaluator: False

questioner: Is it 

games generated:  43%|████▎     | 82/190 [49:10<51:40, 28.71s/it]

evaluator: False

Generated keyword: CT scan
USING MODEL: gpt-4o
KEYWORD: CT scan

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Cell phone
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Cloud
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Star
evaluator: False

questioner: Is it a 

games generated:  44%|████▎     | 83/190 [49:38<50:51, 28.52s/it]

evaluator: False

Generated keyword: eruption
USING MODEL: gpt-4o
KEYWORD: eruption

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Sun
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it u

games generated:  44%|████▍     | 84/190 [50:05<49:58, 28.29s/it]

guesser: Geyser
evaluator: False

Generated keyword: Freezer
USING MODEL: gpt-4o
KEYWORD: Freezer

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Salt
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Frying pan
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Refrigerator
evaluator: False

questioner: Is it an appliance or tool used in the kitchen?
answerer: Yes.
guesser: Microwave
evaluator: False

questioner: Is it primarily used for cooking or preparing food?
answerer: No.
guesser: Dishwasher
evaluator: False

questioner: Is it used for cleaning purposes in the kitchen?
answerer: No.
guesser: Blender
evaluator: False

questioner: Is it used for storing food or beverages?
answerer: Yes.
guesser: Pantry
evaluator: False

questioner: Is it primarily used to keep items cold or frozen?
answerer: 

games generated:  45%|████▍     | 85/190 [50:20<42:32, 24.31s/it]

evaluator: True
Generated keyword: Ceiling fan
USING MODEL: gpt-4o
KEYWORD: Ceiling fan

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: House
evaluator: False

questioner:

games generated:  45%|████▌     | 86/190 [50:47<43:30, 25.11s/it]

evaluator: False

Generated keyword: Mug
USING MODEL: gpt-4o
KEYWORD: Mug

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Knife
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spoon
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Fork
evaluator: False

questioner: Is it a kitchen utensil?
answerer: Yes.
guesser: Spatula
evaluator: False

questioner: Is it used for cutting or chopping food?
answerer: No.
guesser: Whisk
evaluator: False

questioner: Is it typically used for cooking?
answerer: No.
guesser: Measuring cup
evaluator: False

questioner: Is it used for measuring ingredients?
answerer: No.
guesser: Tongs
evaluator: False

questioner: Is it primarily used for serving food or beverages?
answerer: Yes.
guesser: P

games generated:  46%|████▌     | 87/190 [51:06<39:32, 23.04s/it]

evaluator: True
Generated keyword: ruler
USING MODEL: gpt-4o
KEYWORD: ruler

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Hammer
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Balloon
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it used for writing or drawing?
answerer: Yes.
guesser: Pencil


games generated:  46%|████▋     | 88/190 [51:24<36:49, 21.66s/it]

evaluator: True
Generated keyword: Resistance Bands
USING MODEL: gpt-4o
KEYWORD: Resistance Bands

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Board game piece
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Baseball bat
evaluator: False

questioner: Is it used primarily in a specific sport or game?
answerer: No.
guesser: Playing cards
evaluator: False

questioner: Is it something that is typically used indoors?
answerer: Yes.
guesser: Musical inst

games generated:  47%|████▋     | 89/190 [51:44<35:47, 21.27s/it]

evaluator: True
Generated keyword: Yoga block
USING MODEL: gpt-4o
KEYWORD: Yoga block

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Basketball
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Television remote
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Playing card
evaluator: False

questioner: Is it something typically used outdoors?
answerer: No.
guesser: Board game
evaluator: False

questioner: Is it primarily used in sports?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it a type of game or toy?
answerer: N

games generated:  47%|████▋     | 90/190 [52:16<40:22, 24.22s/it]

evaluator: False

Generated keyword: bear
USING MODEL: gpt-4o
KEYWORD: bear

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a domesticated animal?
answerer: No.
guesser: Elephant
evaluator: False

questioner: Is it a mammal?
answerer: Yes.
guesser: Lion
evaluator: False

questioner: Is it commonly found in water or aquatic environments?
answerer: No.
guesser: Tiger
evaluator: False

questioner: Is it known for its ability to fly?
answerer: No.


games generated:  48%|████▊     | 91/190 [52:26<33:14, 20.15s/it]

guesser: Bear
evaluator: True
Generated keyword: Energy drink
USING MODEL: gpt-4o
KEYWORD: Energy drink

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Teapot
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Blender
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spatula
evaluator: False

questioner: Is it a beverage?
answerer: Yes.
guesser: Coffee
evaluator: False

questioner: Is it typically consumed hot?
answerer: No.
guesser: Soda
evaluator: False

questioner: Is it carbonated?
answerer: Yes.
guesser: Cola
evaluator: False

questioner: Is it commonly caffeinated?
answerer: Yes.
guesser: Energy drink


games generated:  48%|████▊     | 92/190 [52:38<28:54, 17.69s/it]

evaluator: True
Generated keyword: tire
USING MODEL: gpt-4o
KEYWORD: tire

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Robot
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: Yes.
guesser: Seatbelt
evaluator: False

questioner: Is it primarily used for protecting people during a vehicle operation?
answerer: No.
guesser: Airbag
evaluator: False

questioner: Is it related to the maintenance or repair of vehicles?
answerer: No.
guesser: Traffic cone
evaluator: False

questioner: Is it used primarily for the safety of goods or cargo during transportation?
answerer: No.
guesser: Safety goggles
evaluator: False

questioner: Is it associated with the structural integrity of a vehicle?
answerer: Yes.

games generated:  49%|████▉     | 93/190 [53:12<36:16, 22.44s/it]

evaluator: False

Generated keyword: livestock trailer
USING MODEL: gpt-4o
KEYWORD: livestock trailer

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mobile phone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Table
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Pen
evaluator: Fal

games generated:  49%|████▉     | 94/190 [53:41<39:02, 24.40s/it]

evaluator: False

Generated keyword: Coral
USING MODEL: gpt-4o
KEYWORD: Coral

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a domesticated animal?
answerer: No.
guesser: Tiger
evaluator: False

questioner: Is it a type of wild mammal?
answerer: No.
guesser: Eagle
evaluator: False

questioner: Is it a type of bird?
answerer: No.
guesser: Snake
evaluator: False

questioner: Is it a type of reptile?
answerer: No.
guesser: Fish
evaluator: False

questioner: Is it a type of aquatic animal?
answerer

games generated:  50%|█████     | 95/190 [54:07<39:26, 24.91s/it]

evaluator: True
Generated keyword: metal file
USING MODEL: gpt-4o
KEYWORD: metal file

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Car
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it primarily composed of metal?
answerer: Yes.
guesser: Engine
evaluator: False

questioner: Is it a tool or instrument used in construction or maintenance?
answerer: Yes.
guesser: Hammer
evaluator: False

questioner: Is it powered by electricity or batteries?
answerer: No.
guesser: Wrench
evaluator: False

questioner: Is it primarily used for measuring or marking?
an

games generated:  51%|█████     | 96/190 [54:37<41:30, 26.50s/it]

evaluator: False

Generated keyword: Graphic Novel
USING MODEL: gpt-4o
KEYWORD: Graphic Novel

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Tennis racket
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: Yes.
guesser: Book
evaluator: False

questioner: Is it commonly used in visual arts, such as painting or drawing?
answerer: Yes.
guesser: Paintbrush
evaluator: False

questioner: Is it typically used to apply color to a surface?
answerer: No.
guesser: Pencil
eval

games generated:  51%|█████     | 97/190 [55:07<42:33, 27.46s/it]

evaluator: False

Generated keyword: fuji apple
USING MODEL: gpt-4o
KEYWORD: fuji apple

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Wheat
evaluator: False

questioner: Is it a type of fruit?
answerer: Yes.
guesser: Orange
evaluator: False

questioner: Is it typically eaten raw?
answerer: Yes.
guesser: Banana
evaluator: False

questioner: Is it commonly red in color?
answerer: Yes.
guesser: Strawberry
evaluator: False

questioner: Does it have a single seed or pit?
answerer: No.
guesser: Raspberry
evaluator: False

questioner: Is it often used in making desserts?
answerer: Yes.
guesser: Cherry
evaluator: False

questioner: Is it commonly grown on a bush or vine?
answerer: No.
guesser: Strawberry
evaluator: False

questioner: Does it have a smooth skin?
answerer: Yes.
guesser: Tomato
evalu

games generated:  52%|█████▏    | 98/190 [55:43<45:58, 29.98s/it]

evaluator: False

Generated keyword: cocktail glass
USING MODEL: gpt-4o
KEYWORD: cocktail glass

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Spatula
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Cup
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Knife
evaluator: False

questioner: Is it a kitchen utensil or tool?
answerer: No.
guesser: Spice
evaluator: False

questioner: Is it something used for seasoning or flavoring?
answerer: No.
guesser: Recipe book
evaluator: False

questioner: Is it an appliance?
answerer: No.
guesser: Plate
evaluator: False

questioner: Is it used for serving or presenting food?
answerer: Yes.
guesser: Bowl
evaluator: False

questioner: Is it typically made of glass, ceramic, or metal?
an

games generated:  52%|█████▏    | 99/190 [56:09<43:58, 28.99s/it]

evaluator: True
Generated keyword: electric saw
USING MODEL: gpt-4o
KEYWORD: electric saw

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Computer
evaluator: False

questioner: Is it a consumer electronic device?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it primarily used for communication?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it primarily used for entertainment?
answerer: No.
guesser: Camera
evaluator: False

questioner: Is it a household appliance?
answerer: No.
guesser: Tablet
evaluator: False

questioner: Is it primarily used for personal or health care?
answerer: No.
gues

games generated:  53%|█████▎    | 100/190 [56:38<43:36, 29.07s/it]

evaluator: False

Generated keyword: shower chair
USING MODEL: gpt-4o
KEYWORD: shower chair

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it furniture?
answerer: Yes.
guesser: Chair
evaluator: False

questioner: Is it typically found in a living room?
answerer: No.
guesser: Bed
evaluator: False

questioner: Is 

games generated:  53%|█████▎    | 101/190 [57:07<42:55, 28.93s/it]

evaluator: False

Generated keyword: candle holder
USING MODEL: gpt-4o
KEYWORD: candle holder

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pencil
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Key
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Scissors
evaluator: False

questioner: Is it something commonly found in an office or 

games generated:  54%|█████▎    | 102/190 [57:36<42:13, 28.79s/it]

evaluator: False

Generated keyword: Shelving unit
USING MODEL: gpt-4o
KEYWORD: Shelving unit

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Ocean
evaluator: False

questioner: Is it furniture?
answerer: Yes.
guesser: Table
evaluator: False

questioner: Is it primarily used for sitting?
answerer: No.
guesser: Bed
evaluator: False

questioner: I

games generated:  54%|█████▍    | 103/190 [58:05<41:53, 28.90s/it]

evaluator: False

Generated keyword: plastic pipe
USING MODEL: gpt-4o
KEYWORD: plastic pipe

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Telephone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Car
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Factory
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Robot
evaluator: False

questioner: Is it related to machinery or mechanical components?
answerer: No.
guesser: Plastic
evaluator: False

questioner: Is it a product used in construction or building?
answerer: Yes.
guesser: Cement
evaluator: False

questioner: Is it a material used in construction?
answerer: Yes.
guesser: Steel
evaluator: False

questioner: Is it a type of metal?
answerer: No.
gue

games generated:  55%|█████▍    | 104/190 [58:29<39:22, 27.47s/it]

evaluator: True
Generated keyword: Growth agar
USING MODEL: gpt-4o
KEYWORD: Growth agar

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Plastic
evaluator: False

questioner: Is it a piece of machinery or mechanical equipment?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it related to packaging or containing products?
answerer: No.
guesser: Vehicle
evaluator: False

questioner: Is it a type of tool or instrument used in manufacturing?
answerer: No.
guesser: Product
evaluator: False

questioner: Is it related to text

games generated:  55%|█████▌    | 105/190 [58:59<40:01, 28.25s/it]

evaluator: False

Generated keyword: Stun gun
USING MODEL: gpt-4o
KEYWORD: Stun gun

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Laptop
evaluator: False

questioner: Is it commonly used for communication purposes?
answerer: No.
guesser: Calculator
evaluator: False

questioner: Is it primarily used for entertainment or leisure activ

games generated:  56%|█████▌    | 106/190 [59:26<39:07, 27.94s/it]

evaluator: False

Generated keyword: Plastic hanger
USING MODEL: gpt-4o
KEYWORD: Plastic hanger

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Machine
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Robot
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Vehicle
evaluator: False

questioner: Is it used in construction or building?
answerer: No.
guesser: Factory
evaluator: False

questioner: Is it related to paper or packaging products?
answerer: No.
guesser: Textile
evaluator: False

questioner: Is it used in transportation or vehicles?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it a type of machinery or machine component?

games generated:  56%|█████▋    | 107/190 [59:55<39:02, 28.22s/it]

evaluator: False

Generated keyword: lemon balm
USING MODEL: gpt-4o
KEYWORD: lemon balm

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Salt
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Tractor
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Fertilizer
evaluator: False

questioner: Is it a type of tool or machinery used in agriculture?
answerer: No.
guesser: Pesticide
evaluator: False

questioner: Is it a naturally occurring substance or product from agriculture?
answerer: Yes.
guesser: Honey
evaluator: False

questioner: Is it a plant-based product?
answerer: Yes.
guesser: Cotton
evaluator: False

questioner: Is it a type of crop?
answerer: Yes.
guesser: Corn
evaluator: False

questioner: Is it commonly grown for human consumption?
answerer: Yes.
guesser: Wheat
evaluator: False

questioner: Is

games generated:  57%|█████▋    | 108/190 [1:00:29<40:44, 29.82s/it]

evaluator: False

Generated keyword: thyroid
USING MODEL: gpt-4o
KEYWORD: thyroid

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Star
evaluator: False

questioner: 

games generated:  57%|█████▋    | 109/190 [1:00:57<39:34, 29.31s/it]

evaluator: False

Generated keyword: Board Games
USING MODEL: gpt-4o
KEYWORD: Board Games

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Football
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Playing cards
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Chess piece
evaluator: False

questioner: Is it something used in a sport or physical activity?
answerer: No.
guesser: Board game


games generated:  58%|█████▊    | 110/190 [1:01:07<31:24, 23.56s/it]

evaluator: True
Generated keyword: upholstered sofa
USING MODEL: gpt-4o
KEYWORD: upholstered sofa

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Car
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Factory machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Clothing
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Packaging material
evaluator: False

questioner: Is it primarily used as a tool or instrument?
answerer: No.
guesser: Machine part
evaluator: False

questioner: Is it something that is commonly found in construction?
answerer: No.
guesser: Plastic
evaluator: False

questioner: Is it related to packaging or storage?
answerer: No.
guesser: Furniture
evaluator: False

questioner: Is i

games generated:  58%|█████▊    | 111/190 [1:01:39<34:22, 26.11s/it]

evaluator: False

Generated keyword: Kiwi
USING MODEL: gpt-4o
KEYWORD: Kiwi

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Corn
evaluator: False

questioner: Is it a type of produce, such as a fruit or vegetable?
answerer: Yes.
guesser: Carrot
evaluator: False

questioner: Is it typically eaten raw?
answerer: Yes.
guesser: Lettuce
evaluator: False

questioner: Is it classified as a fruit?
answerer: Yes.
guesser: Strawberry
evaluator: False

questioner: Is it commonly eaten as a snack?
answerer: Yes.
guesser: Banana
evaluator: False

questioner: Is it typically grown on trees?
answerer: No.
guesser: Grape
evaluator: False

questioner: Is it a type of berry?
answerer: Yes.
guesser: Blueberry
evaluator: False

questioner: Is it commonly red in color?
answerer: No.
guesser: Raspberry
evaluator:

games generated:  59%|█████▉    | 112/190 [1:01:58<31:09, 23.97s/it]

evaluator: True
Generated keyword: emergency light
USING MODEL: gpt-4o
KEYWORD: emergency light

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Table
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Camera
evaluator: False

questioner: Is it a handheld communication device?
answerer: No.
guesser: Tablet
evaluator: False

questioner: Is it primarily used for entertainment or leisure pur

games generated:  59%|█████▉    | 113/190 [1:02:28<33:02, 25.75s/it]

evaluator: False

Generated keyword: whisk
USING MODEL: gpt-4o
KEYWORD: whisk

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Cup
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spoon
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Knife
evaluator: False

questioner: Is it a cooking utensil or tool?
answerer: Yes.
guesser: Spatula
evaluator: False

questioner: Is it primarily used for cutting or slicing?
answerer: No.
guesser: Whisk


games generated:  60%|██████    | 114/190 [1:02:37<26:14, 20.71s/it]

evaluator: True
Generated keyword: infrared detector
USING MODEL: gpt-4o
KEYWORD: infrared detector

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Helmet
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: Yes.
guesser: Seatbelt
evaluator: False

questioner: Is it a component used in the construction or maintenance of vehicles?
answerer: No.
guesser: Airbag
evaluator: False

questioner: Is it a type of personal protective equipment?
answerer: No.
guesser: Traffic cone
evaluator: False

questioner: Is it used in the operation or functioning of vehicles?
answerer: Yes.
guesser: Traffic light
evaluator: False

questioner: Is it an electronic device or system?
answerer: Yes.
guesser: GPS system
evaluator: 

games generated:  61%|██████    | 115/190 [1:03:09<30:15, 24.20s/it]

evaluator: False

Generated keyword: Nail file
USING MODEL: gpt-4o
KEYWORD: Nail file

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it something primarily worn on the upper body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it something worn on the lower body?
answerer: No.
guesser: Hat
evaluator: False

questioner: Is it something that can be worn on the head?
answerer: No.
guesser: Necklace
evaluator: False

questioner: Is it primarily an accessory rather than a piece of clothing?
answerer: Yes.
guesser: Watch
evaluator: False

questioner: Is it commonly used t

games generated:  61%|██████    | 116/190 [1:03:32<29:16, 23.73s/it]

evaluator: True
Generated keyword: archery target
USING MODEL: gpt-4o
KEYWORD: archery target

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Football
evaluator: False

questioner: Is it something mainly found outdoors?
answerer: Yes.
guesser: Trampoline
evaluator: False

questioner: Is it related to a specific sport?
answerer: Yes.
guesser: Golf course
evaluator: False

questioner: Is it commonly associated with a team sport?
answerer: No.
guesser: Tennis court
evaluator: False

questioner: Is it associated with a sport that primarily uses a ball or similar object?
answerer: No.
guesser: Surfboard
evaluator: False


games generated:  62%|██████▏   | 117/190 [1:03:58<29:43, 24.44s/it]

evaluator: True
Generated keyword: cold pack
USING MODEL: gpt-4o
KEYWORD: cold pack

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Key
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Helmet
evaluator: False

questioner: Is it typically worn on the body?
answerer: No.
guesse

games generated:  62%|██████▏   | 118/190 [1:04:26<30:37, 25.52s/it]

evaluator: True
Generated keyword: metal foil
USING MODEL: gpt-4o
KEYWORD: metal foil

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Oven
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spoon
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Fork
evaluator: False

questioner: Is it a tool or utensil used in cooking or food preparation?
answerer: Yes.
guesser: Knife
evaluator: False

questioner: Is it primarily used for cutting or slicing?
answerer: No.
guesser: Spatula
evaluator: False

questioner: Is it used for measuring?
answerer: No.
guesser: Whisk
evaluator: False

questioner: Is it used for mixing or blending?
answerer: No.
guesser: Peeler
evaluator: False

questioner: Is it primarily used for serving food?
answe

games generated:  63%|██████▎   | 119/190 [1:04:57<32:08, 27.16s/it]

evaluator: False

Generated keyword: oyster
USING MODEL: gpt-4o
KEYWORD: oyster

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Cheeseburger
evaluator: False

questioner: Is it a prepared food or dish?
answerer: No.
guesser: Chocolate
evaluator: False

questioner: Is it a type of ingredient?
answerer: Yes.
guesser: Salt
evaluator: False

questioner: Is it commonly used in baking?
answerer: No.
guesser: Pepper
evaluator: False

questioner: Is it a type of spice or seasoning?
answerer: No.
guesser: Butter
evaluator: False

questioner: Is it a type of fruit or vegetable?
answerer: No.
guesser: Sugar
evaluator: False

questioner: Is it a type of dairy product?
answerer: No.
guesser: Honey
evaluator: False

questioner: Is it a type of meat or protein source?
answerer: Yes.
guesser: Eggs
evaluator:

games generated:  63%|██████▎   | 120/190 [1:05:15<28:27, 24.39s/it]

evaluator: True
Generated keyword: lace dress
USING MODEL: gpt-4o
KEYWORD: lace dress

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Shoes
evaluator: False

questioner: Is it something typically worn on the upper part of the body?
answerer: No.
guesser: Hat
evaluator: False

questioner: Is it primarily worn on the lower part of the body?
answerer: No.
guesser: Watch
evaluator: False

questioner: Is it an accessory?
answerer: No.
guesser: Perfume
evaluator: False

questioner: Is it something that is typically worn on the feet?
answerer: No.
guesser: Belt
evaluator: False

questioner: Is it related to beauty produc

games generated:  64%|██████▎   | 121/190 [1:05:47<30:37, 26.62s/it]

evaluator: False

Generated keyword: Spring roll
USING MODEL: gpt-4o
KEYWORD: Spring roll

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Sushi
evaluator: False

questioner: Is it a prepared food or dish?
answerer: Yes.
guesser: Sandwich
evaluator: False

questioner: Is it typically served hot?
answerer: Yes.
guesser: Soup
evaluator: False

questioner: Is it a main course?
answerer: No.
guesser: French fries
evaluator: False

questioner: Is it typically served as an appetizer or starter?
answerer: Yes.
guesser: Spring rolls


games generated:  64%|██████▍   | 122/190 [1:05:57<24:36, 21.72s/it]

evaluator: True
Generated keyword: ham
USING MODEL: gpt-4o
KEYWORD: ham

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a fruit or vegetable?
answerer: No.
guesser: Rice
evaluator: False

questioner: Is it a type of grain or cereal?
answerer: No.
guesser: Egg
evaluator: False

questioner: Is it a type of meat or animal product?
answerer: Yes.
guesser: Milk
evaluator: False

questioner: Is it a dairy product?
answerer: No.
guesser: Chicken
evaluator: False

questioner: Is it a type of meat?
answerer: Yes.
guesser: Beef
evaluator: False

questioner: Is it poultry?
answerer: No.
guesser: Pork
evaluator: False

questioner: Is it red meat?
answerer: No.
guesser: Fish
evaluator: False

questioner: Is it a type of seafood?
answerer: No.
guesser: Bacon
evalu

games generated:  65%|██████▍   | 123/190 [1:06:16<23:17, 20.86s/it]

evaluator: True
Generated keyword: clay
USING MODEL: gpt-4o
KEYWORD: clay

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Car
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Robot
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Machine tool
evaluator: False

questioner: Is it a natural material or resource?
answerer: Yes.
guesser: Steel
evaluator: False

questioner: Is it mined from the earth?
answerer: Yes.
guesser: Coal
evaluator: False

questioner: Is it a type of metal?
answerer: No.
guesser: Diamond
evaluator: False

questioner: Is it a type of mineral or gemstone?
answerer: Yes.
guesser: Quartz
evaluator: False

questioner: Is it commonly used in jewelry?
answerer: No.
guesser: Granite
evaluator: False

questioner: Is it used primaril

games generated:  65%|██████▌   | 124/190 [1:06:37<23:03, 20.97s/it]

evaluator: True
Generated keyword: Exhaust pipe
USING MODEL: gpt-4o
KEYWORD: Exhaust pipe

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Ocean
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Cloud
evaluator: False

questio

games generated:  66%|██████▌   | 125/190 [1:07:10<26:42, 24.65s/it]

evaluator: False

Generated keyword: Tile scale
USING MODEL: gpt-4o
KEYWORD: Tile scale

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Vehicle
evaluator: False

questioner: Is it a tool or piece of equipment used in manufacturing?
answerer: Yes.
guesser: Wrench
evaluator: False

questioner: Is it a power-operated tool?
answerer: No.
guesser: Hammer
evaluator: False

questioner: Is it commonly used for measuring or marking?
answerer: Yes.
guesser: Caliper
evaluator: False

questioner: Is it typically used by hand rather than being part of 

games generated:  66%|██████▋   | 126/190 [1:07:39<27:45, 26.02s/it]

evaluator: False

Generated keyword: chestnut
USING MODEL: gpt-4o
KEYWORD: chestnut

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Coffee
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a fruit?
answerer: Yes.
guesser: Banana
evaluator: False

questioner: Is it commonly grown in tropical climates?
answerer: No.
guesser: Strawberry
evaluator: False

questioner: Is it a type of berry?
answerer: No.
guesser: Pear
evaluator: False

questioner: Is it typically consumed raw?
answerer: No.
guesser: Pumpkin
evaluator: False

questioner: Is it commonly used in pies or desserts?
answerer: Yes.
guesser: Cherry
evaluator: False

questioner: Is it typically harvested in the autumn?
answerer: Yes.
guesser: Blueberry
evaluator: False

questioner: Is it a type of pome fruit (e.g., apple, pear)?
answerer: No.
guesser: 

games generated:  67%|██████▋   | 127/190 [1:08:09<28:36, 27.25s/it]

evaluator: False

Generated keyword: compass
USING MODEL: gpt-4o
KEYWORD: compass

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Phone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Fire extinguisher
evaluator: False

questioner: Is it something typically used in a medical context?
an

games generated:  67%|██████▋   | 128/190 [1:08:27<25:15, 24.45s/it]

evaluator: True
Generated keyword: Rowing machines
USING MODEL: gpt-4o
KEYWORD: Rowing machines

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Movie
evaluator: False

questioner: Is it primarily used indoors?
answerer: Yes.
guesser: Video game console
evaluator: False

questioner: Is it related to a form of digital entertainment?
answerer: No.
guesser: Board game
evaluator: False

questioner: Is it typically enjoyed by a group of people rather than alone?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to a physical activity?
answerer: Yes.
guesser: Treadmill
evaluator: False

questioner: Do

games generated:  68%|██████▊   | 129/190 [1:08:47<23:28, 23.09s/it]

guesser: Rowing machine
evaluator: True
Generated keyword: penguin
USING MODEL: gpt-4o
KEYWORD: penguin

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a domestic animal?
answerer: No.
guesser: Lion
evaluator: False

questioner: Is it a mammal?
answerer: No.
guesser: Eagle
evaluator: False

questioner: Is it an aquatic animal?
answerer: Yes.
guesser: Shark
evaluator: False

questioner: Is it a fish?
answerer: No.
guesser: Octopus
evaluator: False

questioner: Is it an amphibian?
answerer: No.
guesser: Jel

games generated:  68%|██████▊   | 130/190 [1:09:08<22:28, 22.48s/it]

evaluator: True
Generated keyword: shirt
USING MODEL: gpt-4o
KEYWORD: shirt

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Machine
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Factory
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Vehicle
evaluator: False

questioner: Is it a type of machine or mechanical device?
answerer: No.
guesser: Plastic
evaluator: False

questioner: Is it a type of tool or instrument?
answerer: No.
guesser: Clothing
evaluator: False

questioner: Is it related to packaging or containers?
answerer: No.
guesser: Industrial robot
evaluator: False

questioner: Is it related to construction or building materials?
answerer: N

games generated:  69%|██████▉   | 131/190 [1:09:38<24:17, 24.70s/it]

evaluator: False

Generated keyword: sewing needle
USING MODEL: gpt-4o
KEYWORD: sewing needle

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it something typically worn on the upper body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it typically worn on the lower body?
answerer: No.
guesser: Hat
evaluator: False

questioner: Is it something that is generally worn on the feet?
answerer: No.
guesser: Necklace
evaluator: False

questioner: Is it a type of accessory?
answerer: No.
guesser: Makeup
evaluator: False

questioner: Is it generally used for personal 

games generated:  69%|██████▉   | 132/190 [1:10:09<25:42, 26.60s/it]

evaluator: False

Generated keyword: Soda
USING MODEL: gpt-4o
KEYWORD: Soda

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Knife
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spoon
evaluator: False

questioner: Is it a beverage?
answerer: Yes.
guesser: Coffee
evaluator: False

questioner: Is it typically consumed hot?
answerer: No.
guesser: Soda


games generated:  70%|███████   | 133/190 [1:10:16<19:43, 20.76s/it]

evaluator: True
Generated keyword: oak tree
USING MODEL: gpt-4o
KEYWORD: oak tree

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: Yes.
guesser: Plant
evaluator: False

questioner: Is it a type of plant?
answerer: Yes.
guesser: Flower
evaluator: False

questioner: Is it typically found outdoors?
answerer: Yes.
guesser: Grass
evaluator: False

questioner: Is it larger t

games generated:  71%|███████   | 134/190 [1:10:28<16:45, 17.95s/it]

evaluator: True
Generated keyword: fossilized remains
USING MODEL: gpt-4o
KEYWORD: fossilized remains

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Rock
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it commonly found in an office setting?
an

games generated:  71%|███████   | 135/190 [1:11:07<22:15, 24.27s/it]

evaluator: False

Generated keyword: Honey
USING MODEL: gpt-4o
KEYWORD: Honey

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Wheat
evaluator: False

questioner: Is it a type of fruit?
answerer: No.
guesser: Potato
evaluator: False

questioner: Is it a type of vegetable?
answerer: No.
guesser: Rice
evaluator: False

questioner: Is it a type of grain or cereal?
answerer: No.
guesser: Chicken
evaluator: False

questioner: Is it of animal origin?
answerer: Yes.
guesser: Milk
evaluator: False

questioner: Is it a type of meat?
answerer: No.
guesser: Egg
evaluator: False

questioner: Is it a type of dairy product?
answerer: No.
guesser: Honey


games generated:  72%|███████▏  | 136/190 [1:11:20<18:44, 20.83s/it]

evaluator: True
Generated keyword: Bread pudding
USING MODEL: gpt-4o
KEYWORD: Bread pudding

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Bread
evaluator: False

questioner: Is it a prepared food or dish?
answerer: Yes.
guesser: Pasta
evaluator: False

questioner: Is it typically served hot?
answerer: Yes.
guesser: Soup
evaluator: False

questioner: Is it commonly considered a main course?
answerer: No.
guesser: French fries
evaluator: False

questioner: Is it commonly considered an appetizer?
answerer: No.
guesser: Lasagna
evaluator: False

questioner: Is it typically considered a dessert?
answerer: Yes.
guesser: Chocolate cake
evaluator: False

questioner: Is it usually baked?
answerer: Yes.
guesser: Brownies
evaluator: False

questioner: Is it typically associated with a specific holiday

games generated:  72%|███████▏  | 137/190 [1:11:50<20:54, 23.68s/it]

evaluator: True
Generated keyword: kelp
USING MODEL: gpt-4o
KEYWORD: kelp

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Bread
evaluator: False

questioner: Is it a prepared food or dish?
answerer: No.
guesser: Cheese
evaluator: False

questioner: Is it a type of ingredient commonly used in cooking?
answerer: Yes.
guesser: Salt
evaluator: False

questioner: Is it typically found in the pantry rather than the refrigerator or freezer?
answerer: Yes.
guesser: Sugar
evaluator: False

questioner: Is it a type of spice or seasoning?
answerer: No.
guesser: Flour
evaluator: False

questioner: Is it a type of grain or cereal?
answerer: No.
guesser: Olive oil
evaluator: False

questioner: Is it a type of dried food?
answerer: Yes.
guesser: Pasta
evaluator: False

questioner: Is it a type of legume or 

games generated:  73%|███████▎  | 138/190 [1:12:17<21:22, 24.66s/it]

evaluator: True
Generated keyword: Packing slip
USING MODEL: gpt-4o
KEYWORD: Packing slip

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Candle
evaluator: False

questioner: Is it used for writing or drawing?
answerer: No.
g

games generated:  73%|███████▎  | 139/190 [1:12:45<21:43, 25.57s/it]

evaluator: False

Generated keyword: enamel
USING MODEL: gpt-4o
KEYWORD: enamel

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Car
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Robot
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Factory
evaluator: False

questioner: Is it a machine or mechanical device?
answerer: No.
guesser: Plastic
evaluator: False

questioner: Is it related to packaging or containers?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it related to construction or building materials?
answerer: No.
guesser: Textile
evaluator: False

questioner: Is it related to chemical processes or products?
answerer: Yes

games generated:  74%|███████▎  | 140/190 [1:13:15<22:35, 27.11s/it]

evaluator: False

Generated keyword: Fishing rod
USING MODEL: gpt-4o
KEYWORD: Fishing rod

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Soccer ball
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Playing cards
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Baseball bat
evaluator: False

questioner: Is it used in a specific sport or game?
answerer: Yes.
guesser: Tennis racket
evaluator: False

questioner: Is it commonly used outdoors?
answerer: Yes.
guesser: Frisbee
evaluator: False

questioner: Is it typical

games generated:  74%|███████▍  | 141/190 [1:13:31<19:24, 23.77s/it]

evaluator: True
Generated keyword: seam
USING MODEL: gpt-4o
KEYWORD: seam

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Car
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Factory
evaluator: False

questioner: Is it related to construction or building materials?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Packaging
evaluator: False

questioner: Is it used in textiles or clothing manufacturing?
answerer: Yes.
guesser: Sewing machine
evaluator: False

questioner: Is it a type of fabric or material?
an

games generated:  75%|███████▍  | 142/190 [1:14:02<20:42, 25.89s/it]

evaluator: False

Generated keyword: Taser
USING MODEL: gpt-4o
KEYWORD: Taser

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Phone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Camera
evaluator: False

questioner: Is it used primarily for communication purposes?
answerer: No.
guesser: Calculator
evaluator: False

questioner: Is it used primarily for entertainment purposes?
an

games generated:  75%|███████▌  | 143/190 [1:14:31<20:56, 26.74s/it]

evaluator: False

Generated keyword: salt scrub
USING MODEL: gpt-4o
KEYWORD: salt scrub

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it something typically worn on the upper half of the body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it something typically worn on the lower half of the body?
answerer: No.
guesser: Necklace
evaluator: False

questioner: Is it an accessory rather than an article of clothing?
answerer: No.
guesser: Hat
evaluator: False

questioner: Is it related to beauty products or personal care?
answerer: Yes.
guesser: Perfume
evaluator: 

games generated:  76%|███████▌  | 144/190 [1:15:04<21:54, 28.58s/it]

evaluator: False

Generated keyword: Jewelry chest
USING MODEL: gpt-4o
KEYWORD: Jewelry chest

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it commonly worn on the upper body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it typically worn on the lower body?
answerer: No.
guesser: Hat
evaluator: False

questioner: Is it usually worn on the head or face?
answerer: No.
guesser: Watch
evaluator: False

questioner: Is it an item that is often worn on the hands or arms?
answerer: No.
guesser: Necklace
evaluator: False

questioner: Is it typically worn on the feet or le

games generated:  76%|███████▋  | 145/190 [1:15:37<22:28, 29.97s/it]

evaluator: False

Generated keyword: Spider plant
USING MODEL: gpt-4o
KEYWORD: Spider plant

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Phone
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Key
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Balloon
evaluator: False

questioner: Is it commonly found in an office or workspace?
answerer: Y

games generated:  77%|███████▋  | 146/190 [1:16:08<22:14, 30.34s/it]

evaluator: False

Generated keyword: Training pants
USING MODEL: gpt-4o
KEYWORD: Training pants

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Perfume
evaluator: False

questioner: Is it something that is typically worn?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it primarily worn on the upper half of the body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it primarily worn on the lower half of the body?
answerer: Yes.
guesser: Pants
evaluator: False

questioner: Is it typically considered everyday casual wear?
answerer: No.
guesser: Skirt
evaluator: False

questioner: 

games generated:  77%|███████▋  | 147/190 [1:16:32<20:17, 28.31s/it]

evaluator: True
Generated keyword: Bunsen burner
USING MODEL: gpt-4o
KEYWORD: Bunsen burner

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Key
evaluator: False

questioner: Is it a type of tool or instrument used for a s

games generated:  78%|███████▊  | 148/190 [1:17:05<20:49, 29.74s/it]

evaluator: True
Generated keyword: Google Nest
USING MODEL: gpt-4o
KEYWORD: Google Nest

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Table
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Laptop
evaluator: False

questioner: Is it primarily used for communication?
answerer: No.
guesser: Remote control
evaluator: False

questioner: Is it primarily used for entertainment or me

games generated:  78%|███████▊  | 149/190 [1:17:41<21:42, 31.76s/it]

guesser: Infrared thermometer
evaluator: False

Generated keyword: backgammon board
USING MODEL: gpt-4o
KEYWORD: backgammon board

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Basketball
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Deck of cards
evaluator: False

questioner: Is it commonly used in sports activities?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it something typically associated with games or gaming?
answerer: Yes.

games generated:  79%|███████▉  | 150/190 [1:18:17<21:52, 32.82s/it]

evaluator: False

Generated keyword: Coffee beans
USING MODEL: gpt-4o
KEYWORD: Coffee beans

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Wheat
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Irrigation
evaluator: False

questioner: Is it a type of machinery used in agriculture?
answerer: No.
guesser: Fertilizer
evaluator: False

questioner: Is it something produced by plants?
answerer: Yes.
guesser: Fruit
evaluator: False

questioner: Is it a type of crop?
answerer: Yes.
guesser: Corn
evaluator: False

questioner: Is it primarily used as a staple food?
answerer: No.
guesser: Soybeans
evaluator: False

questioner: Is it commonly used as a seasoning or flavoring?
answerer: No.
guesser: Cotton
evaluator: False

questioner: Is it primarily used 

games generated:  79%|███████▉  | 151/190 [1:18:46<20:45, 31.95s/it]

evaluator: True
Generated keyword: ketchup
USING MODEL: gpt-4o
KEYWORD: ketchup

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spoon
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Fork
evaluator: False

questioner: Is it a kitchen utensil?
answerer: No.
guesser: Plate
evaluator: False

questioner: Is it something that is typically used to store food or ingredients?
answerer: No.
guesser: Microwave
evaluator: False

questioner: Is it an appliance or a tool used in cooking or food preparation?
answerer: No.
guesser: Recipe book
evaluator: False

questioner: Is it something edible?
answerer: Yes.
guesser: Candy
evaluator: False

questioner: Is it a spice or herb?
a

games generated:  80%|████████  | 152/190 [1:19:04<17:25, 27.51s/it]

evaluator: True
Generated keyword: Telescope
USING MODEL: gpt-4o
KEYWORD: Telescope

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Phone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Tablet
evaluator: False

questioner: Is it primarily used for communication?
answerer: No.
guesser: Headphones
evaluator: False

questioner: Is it primarily used for entertainment?
answerer: No.
guesser: Fl

games generated:  81%|████████  | 153/190 [1:19:33<17:22, 28.17s/it]

evaluator: False

Generated keyword: Concert pass
USING MODEL: gpt-4o
KEYWORD: Concert pass

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Soccer ball
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Playing cards
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: Yes.
guesser: Book
evaluator: False

questioner: Is it typically used in music or performance?
answerer: Yes.
guesser: Microphone
evaluator: False

questioner: Is it a musical instrument?
answerer: No.
guesser: Microphone
evaluator: False

questioner: Is it pri

games generated:  81%|████████  | 154/190 [1:20:06<17:47, 29.66s/it]

evaluator: False

Generated keyword: martini
USING MODEL: gpt-4o
KEYWORD: martini

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Spoon
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Juicer
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Cup
evaluator: False

questioner: Is it a beverage?
answerer: Yes.
guesser: Bottle
evaluator: False

questioner: Is it typically served hot?
answerer: No.
guesser: Soda
evaluator: False

questioner: Is it carbonated?
answerer: No.
guesser: Juice
evaluator: False

questioner: Is it alcoholic?
answerer: Yes.
guesser: Wine
evaluator: False

questioner: Is it made from grapes?
answerer: No.
guesser: Beer
evaluator: False

questioner: Is it typically consumed with meals?
answerer: No.
guesser: Whiskey
evaluator: False

questioner: Is it distilled?
answerer: Yes.


games generated:  82%|████████▏ | 155/190 [1:20:41<18:07, 31.07s/it]

evaluator: False

Generated keyword: dolphin
USING MODEL: gpt-4o
KEYWORD: dolphin

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a domesticated animal?
answerer: No.
guesser: Lion
evaluator: False

questioner: Is it larger than a human?
answerer: Yes.
guesser: Elephant
evaluator: False

questioner: Is it primarily found in the ocean?
answerer: Yes.
guesser: Whale
evaluator: False

questioner: Is it a mammal?
answerer: Yes.
guesser: Dolphin


games generated:  82%|████████▏ | 156/190 [1:20:53<14:22, 25.38s/it]

evaluator: True
Generated keyword: Teething ring
USING MODEL: gpt-4o
KEYWORD: Teething ring

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Light bulb
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Mug
evaluator: False

questioner: Is it commonly used for writing or drawing?
an

games generated:  83%|████████▎ | 157/190 [1:21:22<14:32, 26.42s/it]

evaluator: False

Generated keyword: Horse trailer
USING MODEL: gpt-4o
KEYWORD: Horse trailer

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: House
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Bridge
evaluator: False

qu

games generated:  83%|████████▎ | 158/190 [1:21:53<14:47, 27.72s/it]

evaluator: False

Generated keyword: Sparrow
USING MODEL: gpt-4o
KEYWORD: Sparrow

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a domesticated animal?
answerer: No.
guesser: Tiger
evaluator: False

questioner: Does it live primarily in water?
answerer: No.
guesser: Elephant
evaluator: False

questioner: Is it a mammal?
answerer: No.
guesser: Eagle
evaluator: False

questioner: Is it capable of flight?
answerer: Yes.
guesser: Parrot
evaluator: False

questioner: Is it a bird of prey, such as an eagl

games generated:  84%|████████▎ | 159/190 [1:22:11<12:49, 24.83s/it]

evaluator: True
Generated keyword: gold
USING MODEL: gpt-4o
KEYWORD: gold

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it commonly used for communication purposes?
answerer: No.
guesser: Robot
evaluator: False

questioner: Is it primarily used for computing or data processing?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it a type of household appliance?
answerer: No.
guesser: Solar Panel
evaluator: False

questioner: Is it used in the field of transportation or vehicles?
answerer: No.
guesser: Microchip
evaluator: False

questioner: Is it related to entertain

games generated:  84%|████████▍ | 160/190 [1:22:40<13:07, 26.26s/it]

evaluator: False

Generated keyword: Smartwatch
USING MODEL: gpt-4o
KEYWORD: Smartwatch

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Toy
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it a device primarily used for gaming?
answerer: No.
guesser: Remote control
evaluator: False

questioner: Is it a device used primarily for communication?
answerer: Yes.
guesser: Tablet
evaluator: False

questioner: Is it a mobile or handheld device?
answerer: Yes.
guesser: Walkie-talkie
evaluator: False

questioner: Is it capa

games generated:  85%|████████▍ | 161/190 [1:22:54<10:50, 22.43s/it]

evaluator: True
Generated keyword: Cactus
USING MODEL: gpt-4o
KEYWORD: Cactus

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it commonly found in an office or study environment?
answerer: No.


games generated:  85%|████████▌ | 162/190 [1:23:22<11:16, 24.17s/it]

evaluator: False

Generated keyword: Fungi
USING MODEL: gpt-4o
KEYWORD: Fungi

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Blender
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Tractor
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Fertilizer
evaluator: False

questioner: Is it a tool or equipment used in farming?
answerer: No.
guesser: Irrigation
evaluator: False

questioner: Is it a natural product produced through agriculture, such as a plant or crop?
answerer: No.
guesser: Pesticide
evaluator: False

questioner: Is it an agricultural structure or building?
answerer: No.
guesser: Compost
evaluator: False

questioner: Is it a byproduct of an agricultural process?
answerer: No.
guesser: Farming Calendar
evaluator: False

questioner: Does it involve the storage of agricultural products?
ans

games generated:  86%|████████▌ | 163/190 [1:23:56<12:08, 26.99s/it]

evaluator: False

Generated keyword: aerator
USING MODEL: gpt-4o
KEYWORD: aerator

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Pan
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Fork
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Spoon
evaluator: False

questioner: Is it a kitchen utensil or tool?
answerer: Yes.
guesser: Knife
evaluator: False

questioner: Is it primarily used for cutting or slicing?
answerer: No.
guesser: Whisk
evaluator: False

questioner: Is it used for cooking or preparing food?
answerer: No.
guesser: Measuring cup
evaluator: False

questioner: Is it used for eating food?
answerer: No.
guesser: Spatula
evaluator: False

questioner: Is it used for measuring or weighing materials?
answerer: N

games generated:  86%|████████▋ | 164/190 [1:24:25<11:59, 27.69s/it]

guesser: Colander
evaluator: False

Generated keyword: Glass cleaner
USING MODEL: gpt-4o
KEYWORD: Glass cleaner

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Key
evaluator: False

questioner: Is it typically found indoo

games generated:  87%|████████▋ | 165/190 [1:24:47<10:52, 26.11s/it]

evaluator: True
Generated keyword: Hand Sanitizer
USING MODEL: gpt-4o
KEYWORD: Hand Sanitizer

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Phone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Helmet
evaluator: False

questioner: Is it primarily used for personal protection?
answere

games generated:  87%|████████▋ | 166/190 [1:25:18<11:00, 27.52s/it]

evaluator: False

Generated keyword: common milkweed
USING MODEL: gpt-4o
KEYWORD: common milkweed

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Telephone
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Key
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it typically used in an office or workplace settin

games generated:  88%|████████▊ | 167/190 [1:25:45<10:31, 27.45s/it]

evaluator: False

Generated keyword: Tote bag
USING MODEL: gpt-4o
KEYWORD: Tote bag

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Watch
evaluator: False

questioner: Is it something that is typically worn on the upper body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it an item that is commonly worn on the lower body?
answerer: No.
guesser: Hat
evaluator: False

questioner: Is it something that is typically worn on the head or face?
answerer: No.
guesser: Ring
evaluator: False

questioner: Is it an accessory that is commonly worn on or used with the hands or arms?
answerer: Yes.
guesser: 

games generated:  88%|████████▊ | 168/190 [1:26:11<09:54, 27.01s/it]

evaluator: True
Generated keyword: migraine
USING MODEL: gpt-4o
KEYWORD: migraine

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: House
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Star
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it 

games generated:  89%|████████▉ | 169/190 [1:26:39<09:33, 27.32s/it]

evaluator: False

Generated keyword: Sandstone
USING MODEL: gpt-4o
KEYWORD: Sandstone

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Machine
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Factory
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it a natural material or resource?
answerer: Yes.
guesser: Wood
evaluator: False

questioner: Is it commonly used in construction?
answerer: Yes.
guesser: Concrete
evaluator: False

questioner: Is it a type of stone or rock?
answerer: Yes.
guesser: Granite
evaluator: False

questioner: Is it used primarily for decorative purposes?
answerer: No.
guesser: Marble
evaluator: False

questioner: Is it typically used as a foundational or structural material in buildings?


games generated:  89%|████████▉ | 170/190 [1:26:52<07:39, 22.97s/it]

evaluator: True
Generated keyword: B B gun ammo
USING MODEL: gpt-4o
KEYWORD: B B gun ammo

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Car
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Factory
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Robot
evaluator: False

questioner: Is it related to construction or building materials?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it a machine or equipment used in manufacturing processes?
answerer: No.
guesser: Packaging
evaluator: False

questioner: Is it a type of product produced in a manufacturing setting?
answerer: Yes.
guesser: Clothing
evaluator: False

questioner: Is it

games generated:  90%|█████████ | 171/190 [1:27:26<08:19, 26.28s/it]

evaluator: False

Generated keyword: Bubble wrap
USING MODEL: gpt-4o
KEYWORD: Bubble wrap

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Robot
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Fire extinguisher
evaluator: False

questioner: Is it related to architectural structures or construction?
answerer: No.
guesser: Safety goggles
evaluator: False

questioner: Is it a type of personal protective equipment (PPE)?
answerer: No.
guesser: Smoke detector
evaluator: False

questioner: Is it an electronic device or equipment?
answerer: No.
guesser: Safety harness
evaluator: False

questioner: Is it related to fire prevention or firefighting?
answerer: No.
guesser: Safety signs
evalua

games generated:  91%|█████████ | 172/190 [1:27:58<08:23, 28.00s/it]

evaluator: True
Generated keyword: Stock
USING MODEL: gpt-4o
KEYWORD: Stock

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Knife
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Cup
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Spatula
evaluator: False

questioner: Is it a kitchen utensil or tool?
answerer: No.
guesser: Plate
evaluator: False

questioner: Is it something used to serve food or beverages?
answerer: No.
guesser: Spices
evaluator: False

questioner: Is it an appliance?
answerer: No.
guesser: Cookbook
evaluator: False

questioner: Is it something primarily used for storage?
answerer: No.
guesser: Cutting board
evaluator: False

questioner: Is it an ingredient?
answerer: Yes.
guesser: Salt
evaluator: Fals

games generated:  91%|█████████ | 173/190 [1:28:28<08:04, 28.52s/it]

evaluator: False

Generated keyword: DVD Player
USING MODEL: gpt-4o
KEYWORD: DVD Player

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Remote control
evaluator: False

questioner: Is it primarily used for gaming?
answerer: No.
guesser: Camera
evaluator: False

questioner: Is it a device primarily used for listening to audio or music?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it something commonly used for communication?
answerer: No.
guesser: VR headset
evaluator: False


games generated:  92%|█████████▏| 174/190 [1:28:58<07:44, 29.04s/it]

guesser: Photo printer
evaluator: False

Generated keyword: Firecracker
USING MODEL: gpt-4o
KEYWORD: Firecracker

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Table
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Playing card
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Baseball
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Basketball
evaluator: False

questioner: Is it used in playing a specific sport or game?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it typically used indoors?
answerer: No.
guesser: Frisbee
evaluator: False

questioner: Is it rela

games generated:  92%|█████████▏| 175/190 [1:29:32<07:35, 30.36s/it]

evaluator: False

Generated keyword: gown
USING MODEL: gpt-4o
KEYWORD: gown

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Watch
evaluator: False

questioner: Is it something that is typically worn on the upper part of the body?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it generally considered an item of outerwear?
answerer: No.
guesser: Blouse
evaluator: False

questioner: Is it an item typically associated with formal or professional attire?
answerer: Yes.
guesser: Tie
evaluator: False

questioner: Is it an item that is commonly worn by both men and women?
answerer: No.
guesser: Blazer
evaluator

games generated:  93%|█████████▎| 176/190 [1:29:56<06:39, 28.53s/it]

evaluator: True
Generated keyword: juice box
USING MODEL: gpt-4o
KEYWORD: juice box

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Spoon
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Cup
evaluator: False

questioner: Is it a beverage?
answerer: Yes.
guesser: Juice
evaluator: False

questioner: Is it typically served hot?
answerer: No.
guesser: Soda
evaluator: False

questioner: Is it carbonated?
answerer: No.
guesser: Tea
evaluator: False

questioner: Is it commonly consumed at breakfast?
answerer: Yes.
guesser: Milk
evaluator: False

questioner: Is it a dairy product?
answerer: No.
guesser: Orange juice
evaluator: False

questioner: Is it a type of juice?
answerer: Yes.
guesser: Apple juice
evaluator: False

questioner: Is it made from 

games generated:  93%|█████████▎| 177/190 [1:30:27<06:20, 29.23s/it]

evaluator: False

Generated keyword: Fringe
USING MODEL: gpt-4o
KEYWORD: Fringe

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Stadium
evaluator: False

questioner: Is it primarily used indoors?
answerer: No.
guesser: Soccer ball
evaluator: False

questioner: Is it associated with a specific sport?
answerer: No.
guesser: Playground
evaluator: False

questioner: Is it something attended by large groups of people?
answerer: Yes.
guesser: Concert
evaluator: False

questioner: Is it a live event or performance?
answerer: Yes.
guesser: Festival
evaluator: False

questioner: Is it a type of music concert?
answerer: No.
guesser: 

games generated:  94%|█████████▎| 178/190 [1:30:58<05:59, 29.97s/it]

evaluator: False

Generated keyword: Garter snake
USING MODEL: gpt-4o
KEYWORD: Garter snake

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a domestic animal?
answerer: No.
guesser: Elephant
evaluator: False

questioner: Is it a type of wild mammal?
answerer: No.
guesser: Eagle
evaluator: False

questioner: Is it a type of bird?
answerer: No.
guesser: Frog
evaluator: False

questioner: Is it a type of reptile?
answerer: Yes.
guesser: Lizard
evaluator: False

questioner: Is it commonly found in water enviro

games generated:  94%|█████████▍| 179/190 [1:31:30<05:34, 30.45s/it]

evaluator: True
Generated keyword: sweetgum tree
USING MODEL: gpt-4o
KEYWORD: sweetgum tree

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Phone
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: House
evaluator: False

questioner: Is it a living thing?
answerer: Yes.
guesser: Plant
evaluator: False

questioner: Is it a type of plant?
answerer: Yes.
guesser: Flower
evaluator: False

questioner: Is it commonly found in a household garden?
answerer: No.
guesser: Cactus
evaluator: False

questio

games generated:  95%|█████████▍| 180/190 [1:32:01<05:05, 30.56s/it]

evaluator: False

Generated keyword: Dental floss
USING MODEL: gpt-4o
KEYWORD: Dental floss

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Light bulb
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Rock
evaluator: False

questioner: Is it commonly used in arts and crafts?
answerer: No

games generated:  95%|█████████▌| 181/190 [1:32:29<04:29, 29.90s/it]

evaluator: False

Generated keyword: wheel
USING MODEL: gpt-4o
KEYWORD: wheel

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory machine
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Steel
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Automobile tire
evaluator: False

questioner: Is it typically used in the construction or building industry?
answerer: No.
guesser: Machine tool
evaluator: False

questioner: Is it a type of machinery or equipment?
answerer: Yes.
guesser: Conveyor belt
evaluator: False

questioner: Is it typically used for transportation purposes?
answerer: Yes.
guesser: Forklift
evaluator: False

questioner: Is it commo

games generated:  96%|█████████▌| 182/190 [1:33:00<04:02, 30.25s/it]

evaluator: False

Generated keyword: Cardboard box
USING MODEL: gpt-4o
KEYWORD: Cardboard box

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Phone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Car
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Factory
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it used primarily in construction or building?
answerer: No.
guesser: Packaging
evaluator: False

questioner: Is it a type of machinery or equipment used in manufacturing?
answerer: No.
guesser: Plastic
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Textile
evaluator: False

questioner: Is it related to packag

games generated:  96%|█████████▋| 183/190 [1:33:27<03:23, 29.05s/it]

evaluator: True
Generated keyword: Golf Cart
USING MODEL: gpt-4o
KEYWORD: Golf Cart

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Television show
evaluator: False

questioner: Is it something that is typically used indoors?
answerer: No.
guesser: Soccer ball
evaluator: False

questioner: Is it associated with sports or physical activities?
answerer: Yes.
guesser: Stadium
evaluator: False

questioner: Is it a location or venue where sports are played?
answerer: No.
guesser: Running track
evaluator: False

questioner: Is it an item that helps athletes train or compete?
answerer: Yes.
guesser: Treadmill
evaluator: 

games generated:  97%|█████████▋| 184/190 [1:33:59<03:01, 30.20s/it]

evaluator: False

Generated keyword: Surge protector
USING MODEL: gpt-4o
KEYWORD: Surge protector

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it commonly used for communication?
answerer: No.
guesser: Headphones
evaluator: False

questioner: Is it primarily used for entertainment purposes?

games generated:  97%|█████████▋| 185/190 [1:34:28<02:29, 29.86s/it]

evaluator: False

Generated keyword: blueberry
USING MODEL: gpt-4o
KEYWORD: blueberry

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Frying pan
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Corn
evaluator: False

questioner: Is it a fruit?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it typically grown in tropical or subtropical regions?
answerer: No.
guesser: Grapes
evaluator: False

questioner: Is it commonly grown in temperate climates?
answerer: Yes.
guesser: Pear
evaluator: False

questioner: Is it typically eaten raw?
answerer: Yes.
guesser: Cherry
evaluator: False

questioner: Is it commonly associated with a specific season?
answerer: Yes.
guesser: Strawberry
evaluator: False

questioner: Is it commonly associated with summer?
answerer: Yes.
guesser: Watermelon
evaluator: False

questioner: Is it typically red in co

games generated:  98%|█████████▊| 186/190 [1:34:44<01:41, 25.45s/it]

evaluator: True
Generated keyword: ostrich
USING MODEL: gpt-4o
KEYWORD: ostrich

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a common household pet?
answerer: No.
guesser: Elephant
evaluator: False

questioner: Is it a wild animal?
answerer: Yes.
guesser: Tiger
evaluator: False

questioner: Is it primarily found in forests?
answerer: No.
guesser: Lion
evaluator: False

questioner: Is it a marine animal?
answerer: No.
guesser: Camel
evaluator: False

questioner: Is it primarily found in deserts?

games generated:  98%|█████████▊| 187/190 [1:35:13<01:20, 26.77s/it]

evaluator: True
Generated keyword: Adhesive
USING MODEL: gpt-4o
KEYWORD: Adhesive

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Robot
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Factory
evaluator: False

questioner: Is it used in construction or building materials?
answerer: Yes.
guesser: Concrete
evaluator: False

questioner: Is it commonly used as a structural element or support in buildings?
answerer: No.
guesser: Brick
evaluator: False

questioner: Is it typically used for finishing or decorative purposes in construction?
answerer: Yes.
guesser: Paint
evaluator: False

questioner:

games generated:  99%|█████████▉| 188/190 [1:35:39<00:52, 26.48s/it]

evaluator: True
Generated keyword: yogurt container
USING MODEL: gpt-4o
KEYWORD: yogurt container

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Oven
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spoon
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Fork
evaluator: False

questioner: Is it a kitchen utensil or tool?
answerer: No.
guesser: Spice jar
evaluator: False

questioner: Is it something typically used in cooking?
answerer: No.
guesser: Cookbook
evaluator: False

questioner: Is it something one might find in a food storage area, like a pantry or refrigerator?
answerer: Yes.
guesser: Food container
evaluator: False

questioner: Is it typically used as an ingredient in recipes?
answerer: No.
guesser: Can open

games generated:  99%|█████████▉| 189/190 [1:36:10<00:27, 27.79s/it]

evaluator: False

Generated keyword: marigold
USING MODEL: gpt-4o
KEYWORD: marigold

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Lamp
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Book
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it something typically found in a home or office environment?
ans

games generated: 100%|██████████| 190/190 [1:36:40<00:00, 30.53s/it]

evaluator: False






In [124]:
keywords_left = [keyword for keyword in keyword_things if  (keyword not in current_keywords_with_games)]
similar_keywords_left = [keyword for keyword in similar_keywords_list if (keyword not in current_keywords_with_games)]

print(f"Keywords left: {len(keywords_left)}\n")
print(f"Similar Keywords left: {len(similar_keywords_left)}\n")

generated_keywords = random.choices(keywords_left, k=100)
generated_similar_keywords = random.choices(similar_keywords_left, k=100)

print(generated_keywords + generated_similar_keywords)

Keywords left: 1143

Similar Keywords left: 1077

['Conveyor belt', 'picket', 'dishcloth', 'coffee bag', 'boa constrictor', 'concrete wall', 'ottoman', 'mri machine', 'boa constrictor', 'Truck', 'punching bag', 'rhubarb', 'climbing rope', 'ballpoint pen', 'park map', 'Heart Rate Monitors', 'weight plate', 'bookend', 'calcite', 'vacuum cleaner', 'medallion', 'swedish fish', 'staircase', 'cypress knee', 'beer tap', 'hospital bed', 'pillowcase', 'ramen', 'autograph', 'upright piano', 'woodpecker', 'honeydew', 'medallion', 'wiper', 'trumpet', 'Water', 'pea plant', 'plastic tub', 'ballpoint pen', 'water well', 'dye', 'brush', 'Blinds', 'valve', 'maxi dress', 'lint brush', 'Suitcase', 'honeydew melon', 'catalytic converter', 'tupperware', 'paint thinner', 'power adapter', 'rhubarb', 'Wheelchair ramp', 'battery', 'Air compressor', 'bandana', 'compactor', 'duct', 'guitar pick', 'Parallel bars', 'Stove', 'leopard', 'grape', 'spare bulb', 'Headlamp', 'bamboo', 'juniper bush', 'drapery', 'christm

## Save as JSONL

In [120]:
with open("20qs-data/final_games/gpt-4o-200-more-games.jsonl", "w") as f:
    for i in game_results:
        json.dump(i, f)
        f.write('\n')