In [1]:
import litellm
import time
import sys, os

if not os.environ["OPENAI_API_KEY"]: 
    os.environ["OPENAI_API_KEY"]= '<REDACTED>'

In [2]:
MODEL = "gpt-4o-mini"
# MODEL = "gpt-4o"
#MODEL = "claude-3-5-sonnet-20240620"
litellm.modify_params = True

In [3]:
def completion(model, messages):
    result = litellm.completion(model, messages)
    if "claude" in model:
        time.sleep(60/50) # Adjust based on rate limit (https://console.anthropic.com/settings/limits)
    return result

In [4]:
class DecisionNode:
    def __init__(self, query = None, yes_branch=None, no_branch=None):
        self.query = query
        self.yes_branch = yes_branch
        self.no_branch = no_branch
        
    def print_tree(self, level=0, prefix=""):
        if self.query:
            print(f"{prefix}-- {self.query}")
            if self.yes_branch:
                self.yes_branch.print_tree(level + 1, prefix + "  |")
            if self.no_branch:
                self.no_branch.print_tree(level + 1, prefix + "   ")
        else:
            print(f"{prefix}-- *")
            
    def write_tree_to_file(self, filename):
        with open(filename, 'w') as f:
            self._write_tree(f)

    def _write_tree(self, file, level=0, prefix=""):
        if self.query:
            file.write(f"{prefix}-- {self.query}\n")
            if self.yes_branch:
                self.yes_branch._write_tree(file, level + 1, prefix + "  |")
            if self.no_branch:
                self.no_branch._write_tree(file, level + 1, prefix + "   ")
        else:
            file.write(f"{prefix}-- *\n")

    @staticmethod
    def read_tree_from_file(filename):
        with open(filename, 'r') as f:
            lines = f.readlines()
        return DecisionNode._read_tree(lines, 0, "")

    @staticmethod
    def _read_tree(lines, indent_level, current_prefix):
        if not lines:
            return None

        line = lines.pop(0).rstrip()
        expected_prefix = current_prefix + "-- "

        if not line.startswith(expected_prefix):
            return None

        content = line[len(expected_prefix):]
        if content == "*":
            return DecisionNode()

        yes_branch = DecisionNode._read_tree(lines, indent_level + 1, current_prefix + "  |")
        no_branch = DecisionNode._read_tree(lines, indent_level + 1, current_prefix + "   ")

        return DecisionNode(content, yes_branch, no_branch)

# Creating the tree

In [9]:
things_tree = DecisionNode.read_tree_from_file('20qs-data/decision_trees/decision_tree_v2.txt')
things_tree.print_tree()

-- food, beverage, cooking
  |-- food
  |  |-- agriculture
  |  |  |-- *
  |  |   -- prepared food
  |  |     |-- *
  |  |      -- *
  |   -- agriculture
  |     |-- safety
  |     |  |-- *
  |     |   -- *
  |      -- handheld
  |        |-- beverage
  |        |  |-- *
  |        |   -- *
  |         -- *
   -- industry, manufacturing
     |-- safety
     |  |-- transportation, vehicles
     |  |  |-- *
     |  |   -- architecture, construction
     |   -- electronics
     |     |-- *
     |      -- natural material, resource
     |        |-- *
     |         -- *
      -- sports, entertainment
        |-- handheld
        |  |-- electronics
        |  |  |-- *
        |  |   -- arts, media
        |  |     |-- *
        |  |      -- *
        |   -- *
         -- clothing, accessories, beauty
           |-- *
            -- animal
              |-- *
               -- handheld
                 |-- electronics
                 |  |-- *
                 |   -- safety
                

In [11]:
query_map = {
    'food': "Is it a food?",
    'beverage': "Is it a beverage?",
    'living': "Is it a living thing?",
    'plant': "Is it a plant?",
    'animal': 'Is it an animal?',
    'electronics': "Is it related to electronics or technology?",
    'furniture': 'Is it furniture?',
    'transportation, vehicles': 'Is it related to transportation or vehicles?',
    'man-made': "Is it a man-made thing?",
    'architecture, construction': 'Is it related to architectural structures or construction?',
    'natural material, resource': 'Is it a natural material or resource?',
    'natural phenomenon': 'Is it a natural phenomenon?',
    'industry, manufacturing': "Is it related to industrial production or manufacturing?",
    'food, beverage, cooking': 'Is it related to food, beverages or cooking?',
    'handheld': 'Is it something a person can hold in their hand?',
    'agriculture': "Is it related to agriculture?",
    'indoors': 'Is it something that can be found indoors?',
    'arts, media': "Is it broadly related to the arts or media?",
    'safety': "Is it related to safety or safety equipment?",
    'medicine': "Is it broadly related to medicine or healthcare?",
    'clothing, accessories, beauty': "Is it related to clothing, accessories or beauty products?",
    'sports, entertainment': "Is it broadly related to entertainment or sports?",
    'prepared food': "Is it a prepared food or dish?"
}


In [12]:
def game_to_string(game):
    return '\n'.join([f'{step["role"]}: {step["content"]}' for step in game])

def construct_messages(game, role, keyword=None, guesses=[]):
    if role == "questioner":
        prompt = {
            "role": "system",
            "content": (
                "You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. "
                "The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. "
                "Then the Guesser tries to guess the keyword based on the questions and answers in the game. The keyword is a specific thing, NOT a place and NOT a person.\n\n "
                "You are participating in a new game of 20 Questions. Your role is to be the Questioner. You will ask successive yes-or-no questions to determine the keyword. "
                "You have a limited number of questions to ask, so choose a question that will eliminate half of the possible keywords to maximize efficiency. "
                "Avoid asking questions that are too specific too early on. Be as vague as possible while still eliminating half of the remaining possibilities. "
                "DO NOT ask if the keyword is a specific thing, rather ask something about the keyword.\n"
                "Example 1: DO NOT ASK: 'Is the keyword car?', INSTEAD ASK: 'Is it a specific type of car, such as a sedan?\n"
                "Example 2: DO NOT ASK: 'Is the keyword cow?', INSTEAD ASK: 'Is it a specific type of cow?\n"
                "Example 3: DO NOT ASK: 'Is the keyword bottle?', INSTEAD ASK: 'Is it a bottle made of a specific material?\n"
                "Example 4: DO NOT ASK: 'Is the keyword lamp?', INSTEAD ASK: 'Is it a type of lamp?\n"
                "Do NOT assume the game has ended, the game will determine when to stop. Do not output any text other than the question."
            ),
        }
        messages = [prompt]
        for message in game:
            if message["role"] == "questioner":
                messages.append({
                    "role": "assistant",
                    "content": message["content"]
                })
            else:
                messages.append({
                    "role": "user",
                    "content": message["content"]
                })
    elif role == "answerer":
        prompt = {
            "role": "system",
            "content": (
                f"You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. "
                "The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. "
                "Then the Guesser tries to guess the keyword based on the questions and answers in the game. The keyword is a specific thing, NOT a place and NOT a person. "
                "You are participating in a new game of 20 Questions. Your role is to be the Answerer. "
                f"The keyword is {keyword}. Answer only Yes or No based on the keyword. Do not output any other text."
            ),
        }
        messages = [prompt]
        for message in game:
            if message["role"] == "questioner":
                messages.append({
                    "role": "user",
                    "content": f"Answer the following question about the keyword: '{keyword}'. " + message["content"]
                })
            else:
                messages.append({
                    "role": "assistant",
                    "content": message["content"]
                })
    elif role == "guesser":
        prompt = {
            "role": "system",
            "content": (
                "You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. "
                "The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. "
                "Then the Guesser tries to guess the keyword based on the questions and answers in the game. The keyword is a specific thing, NOT a place and NOT a person. "
                "You are participating in a new game of 20 Questions. Your role is to be the Guesser. Based on the given questions and answers, guess the keyword at this point. "
                "Even if information is limited, guess a keyword. Do not ask a question, just state the guessed keyword with no other text except the keyword itself. "
                "DO NOT output any other text other than the guessed keyword. DO NOT refuse to guess. DO NOT REPEAT A PREVIOUS GUESS."
            ),
        }
        messages = [prompt, {
            "role": "user",
            "content": game_to_string(game) + "\nPrevious guesses:\n" + '\n'.join(guesses) + "\nYour guess: "
        }]
    elif role == "evaluator":
        prompt = {
            "role": "system",
            "content": (
                "You are an evaluator for the game 20 Questions. Given a keyword and a guess, return True if the guess was correct and False otherwise. "
                "The guess should refer to the same thing as the keyword but the guess should NOT be more vague or general than the keyword. DO NOT output anything except 'True' or 'False'."
                "Here are some guiding examples: \n\n"
                "Example 1:\nKeyword: USA\n Guess: United States\nEvaluation: True\n\n"
                "Example 2:\nKeyword: Peking duck\n Guess: duck\nEvaluation: False\n\n"
                "Example 3:\nKeyword: iPad\n Guess: tablet\nEvaluation: False\n\n"
                "Example 4:\nKeyword: pretoria south africa\n Guess: Pretoria\nEvaluation: True\n\n"
                "Example 5:\nKeyword: Bed and Breakfast\n Guess: BnB\nEvaluation: True\n\n"
                "Example 6:\nKeyword: Diet coke\n Guess: coca-cola\nEvaluation: False\n\n"
            ),
        }
        messages = [prompt, {
            "role": "user",
            "content": f"Keyword: {keyword}\nGuess: {guesses[-1]}\nEvaluation: "
        }]
    return messages


In [13]:
construct_messages([{"role": "questioner", "content": "Is it a place?"}], "answerer", "chair")

[{'role': 'system',
  'content': 'You are an AI assistant playing the 20 Questions game. In this game the Answerer is given a secret keyword. The Questioner then asks yes-or-no questions regarding the keyword, and the Answerer answers them accurately. Then the Guesser tries to guess the keyword based on the questions and answers in the game. The keyword is a specific place or thing. You are participating in a new game of 20 Questions. Your role is to be the Answerer. The keyword is chair. Answer only Yes or No based on the keyword. Do not output any other text.'},
 {'role': 'user',
  'content': "Answer the following question about the keyword: 'chair'. Is it a place?"}]

## Game Simulator

In [14]:
tree = things_tree

In [15]:
def simulate_game(keyword):
    print(f"USING MODEL: {MODEL}")
    print(f"KEYWORD: {keyword}")
    print()
    
    game = []

    questions = [step["content"] for step in game if step["role"] == "questioner"]
    answers = [step["content"] for step in game if step["role"] == "answerer"]
    guesses = []

    decision_node = tree
    
    for _ in range(20):
        if decision_node and decision_node.query:
            question = query_map[decision_node.query]
        else:
            question = completion(model=MODEL, messages=construct_messages(game, "questioner", keyword)).choices[0].message.content
    
        game.append({
            "role": "questioner",
            "content": question
        })

        print(f"questioner: {question}")

        if "OVER" in question:
            break
        
        questions.append(question)

        answer = completion(model=MODEL, messages=construct_messages(game, "answerer", keyword)).choices[0].message.content
    
        game.append({
            "role": "answerer",
            "content": answer
        })

        answers.append(answer)
        print(f"answerer: {answer}")

        if decision_node and decision_node.yes_branch != None and "yes" in answer.lower():
            decision_node = decision_node.yes_branch
        elif decision_node and decision_node.no_branch != None and "no" in answer.lower():
            decision_node = decision_node.no_branch
        else:
            decision_node = None
            
        guess = completion(model=MODEL, messages=construct_messages(game, "guesser", keyword, guesses)).choices[0].message.content

        guesses.append(guess)
        print(f"guesser: {guess}")

        evaluation = completion(model=MODEL, messages=construct_messages(game, "evaluator", keyword, guesses)).choices[0].message.content
        print(f"evaluator: {evaluation}")
        if "true" in evaluation.lower(): break

        print()

    return {
        "keyword": keyword,
        "questions": questions,
        "answers": answers,
        "guesses": guesses
    }

## Generate all the games

In [17]:
import json

def read_jsonl_and_transform(file_path):
    result_dict = {}

    with open(file_path, 'r') as file:
        for line in file:
            record = json.loads(line)
            keyword = record.pop('keyword')
            result_dict[keyword] = record

    return result_dict

def read_json_to_dict(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

keyword_labels, results = read_jsonl_and_transform('20qs-data/keywords_data/labeled_keywords_v2.jsonl'), read_json_to_dict('20qs-data/keywords_data/labeling_results_v2.jsonl')

def read_file_to_list(file_path):
    with open(file_path, 'r') as file:
        lines = file.read().splitlines()
    return lines

keywords_list = read_file_to_list('20qs-data/keywords_data/keywords.txt')
similar_keywords_list = read_file_to_list('20qs-data/keywords_data/similar_keywords.txt')

keywords_list = keywords_list[1:]
print(f"Number of keywords in data: {len(keywords_list)}")

keyword_places = [keyword for keyword in keywords_list if keyword_labels[keyword]['place'] == 'yes']
keyword_things = [keyword for keyword in keywords_list if keyword_labels[keyword]['place'] == 'no']

print(f"Number of keywords labeled as things: {len(keyword_things)}.")

Number of keywords in data: 2046
Number of keywords labeled as things: 1347.


Read current games:

In [18]:
import json

def read_jsonl_and_transform(file_path):
    result_dict = {}

    with open(file_path, 'r') as file:
        for line in file:
            if line.strip():  # Skip empty lines
                try:
                    record = json.loads(line)
                    keyword = record.pop('keyword')
                    result_dict[keyword] = record
                except json.JSONDecodeError as e:
                    print(f"Error decoding JSON for line: {line}")
                    print(f"Error message: {e}")

    return result_dict

# Path to the uploaded file
#file_path = '20qs-data/real_keyword_games_extended.jsonl'

# Read and transform the JSONL file
#result_dict = read_jsonl_and_transform(file_path)

# Get the current keywords with games
current_keywords_with_games = read_jsonl_and_transform('20qs-data/final_games/gpt-4o-200games-cleaned.jsonl')

In [19]:
game_results = []

In [20]:
import random
from tqdm import tqdm

dict_keywords = {d['keyword'] for d in game_results}

keywords_left = [keyword for keyword in keyword_things if (keyword not in dict_keywords) and (keyword not in current_keywords_with_games)]
similar_keywords_left = [keyword for keyword in similar_keywords_list if (keyword not in dict_keywords) and (keyword not in current_keywords_with_games)]

print(f"Keywords left: {len(keywords_left)}\n")
print(f"Similar Keywords left: {len(similar_keywords_left)}\n")

generated_keywords = random.choices(keywords_left, k=100)
generated_similar_keywords = random.choices(similar_keywords_left, k=100)

for generated_keyword in tqdm(generated_keywords+generated_similar_keywords, desc = 'games generated'):
    print("Generated keyword:", generated_keyword)
    game_result = simulate_game(generated_keyword)
    game_results.append(game_result)

Keywords left: 1347

Keywords left: 1324



games generated:   0%|          | 0/200 [00:00<?, ?it/s]

Generated keyword: tape measure
USING MODEL: gpt-4o
KEYWORD: tape measure

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Textile
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Tools
evaluator: False

questioner: Is it a machine or mechanical device?
answerer: No.
guesser: Raw materials
evaluator: False

questioner: Is it a tool or instrument?
answerer: Yes.
guesser: Drill
evaluator: False

questioner: Is it commonly used in construction or carpentry?
answerer: Yes.
guesser: Hammer
evaluator: False

questioner: Is it a hand tool?
answerer: Yes.
guesser: Saw
evaluator: False

questioner: I

games generated:   0%|          | 1/200 [00:32<1:46:29, 32.11s/it]

evaluator: True
Generated keyword: climber
USING MODEL: gpt-4o
KEYWORD: climber

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Football
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Baseball field
evaluator: False

questioner: Is it primarily used outdoors?
answerer: Yes.
guesser: Soccer field
evaluator: False

questioner: Is it something used in a team sport?
answerer: No.
guesser: Golf course
evaluator: False

questioner: Is it associated with an individual sport or activity?
answerer: Yes.
guesser: Tennis court
evaluator: False

questioner: Does it require any special gear or equipment to use?
answerer: No.
guesser: Running track
evaluator: False

questioner: Is it an ob

games generated:   1%|          | 2/200 [01:33<2:42:30, 49.24s/it]

evaluator: False

Generated keyword: paint roller
USING MODEL: gpt-4o
KEYWORD: paint roller

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Telephone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machines
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Tools
evaluator: False

questioner: Is it a type of tool or machinery?
answerer: Yes.
guesser: Conveyor belt
evaluator: False

questioner: Is it commonly used in construction?
answerer: Yes.
guesser: Crane
evaluator: False

questioner: Is it powered by electricity?
answerer: No.
guesser: Hammer
evaluator: False

questioner: Is it used primarily for lifting or moving heavy objects?
answe

games generated:   2%|▏         | 3/200 [02:30<2:53:43, 52.91s/it]

evaluator: False

Generated keyword: empty can
USING MODEL: gpt-4o
KEYWORD: empty can

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Oven
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Refrigerator
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Stove
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spatula
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Fork
evaluator: False

questioner: Is it a tool or utensil used in cooking?
answerer: No.
guesser: Plate
evaluator: False

questioner: Is it an appliance?
answerer: No.
guesser: Cookbook
evaluator: False

questioner: Is it typically found in a kitchen?
answerer: Yes.
guesser: Salt shaker
evaluator: False

questioner: Is it used for storage?
answerer: No.
guesser: Pot holder
evaluator: False

questioner: Is it a product used for cleaning?
answerer: No.
guesser: Measuring c

games generated:   2%|▏         | 4/200 [03:19<2:47:41, 51.33s/it]

evaluator: False

Generated keyword: Chandelier
USING MODEL: gpt-4o
KEYWORD: Chandelier

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: House
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: River
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Bridge
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Galaxy
evaluator: False

question

games generated:   2%|▎         | 5/200 [04:05<2:40:54, 49.51s/it]

evaluator: False

Generated keyword: Air Conditioner
USING MODEL: gpt-4o
KEYWORD: Air Conditioner

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: River
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Planet
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Ocean
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Desert
evaluator: False

questi

games generated:   3%|▎         | 6/200 [04:59<2:44:42, 50.94s/it]

evaluator: False

Generated keyword: Puzzle
USING MODEL: gpt-4o
KEYWORD: Puzzle

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Soccer Ball
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Playing Cards
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Tennis Racket
evaluator: False

questioner: Is it typically used outdoors?
answerer: No.
guesser: Board Game
evaluator: False

questioner: Is it used in a specific type of sport?
answerer: No.
guesser: Toy Blocks
evaluator: False

questioner: Is it a type of game or toy

games generated:   4%|▎         | 7/200 [05:19<2:11:47, 40.97s/it]

evaluator: True
Generated keyword: merchandise rack
USING MODEL: gpt-4o
KEYWORD: merchandise rack

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Perfume
evaluator: False

questioner: Is it something typically worn on the body?
answerer: No.
guesser: Handbag
evaluator: False

questioner: Is it commonly used as an accessory?
answerer: No.
guesser: Makeup
evaluator: False

questioner: Is it a type of beauty product or cosmetic?
answerer: No.
guesser: Sewing machine
evaluator: False

questioner: Is it something used for maintaining or caring for clothing?
answerer: No.
guesser: Hanger
evaluato

games generated:   4%|▍         | 8/200 [06:11<2:21:44, 44.29s/it]

evaluator: False

Generated keyword: tea bag
USING MODEL: gpt-4o
KEYWORD: tea bag

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee machine
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Refrigerator
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Wine glass
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Fork
evaluator: False

questioner: Is it a kitchen tool or utensil?
answerer: No.
guesser: Oven mitt
evaluator: False

questioner: Is it a container or packaging for food?
answerer: Yes.
guesser: Tupperware
evaluator: False

questioner: Is it made primarily of plastic?
answerer: No.
guesser: Glass jar
evaluator: False

questioner: Is it made primarily of glass?
answerer: No.
guesser: Tin can
evaluator: False

questioner: Is it made primarily of metal?
answerer: N

games generated:   4%|▍         | 9/200 [06:59<2:24:55, 45.52s/it]

evaluator: False

Generated keyword: hole puncher
USING MODEL: gpt-4o
KEYWORD: hole puncher

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Bridge
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Mobile phone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Rock
evaluator: False

questioner: Is it commonly used in a household?
answere

games generated:   5%|▌         | 10/200 [07:36<2:16:03, 42.97s/it]

evaluator: True
Generated keyword: Violin Strings
USING MODEL: gpt-4o
KEYWORD: Violin Strings

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Vehicle
evaluator: False

questioner: Is it related to construction or building materials?
answerer: No.
guesser: Machine tool
evaluator: False

questioner: Is it a type of machinery or equipment used in manufacturing?
answerer: No.
guesser: Raw material
evaluator: False

questioner: Is it related to packaging or shipping?
answerer: No.
guesser: Product
evaluator: False

questioner: I

games generated:   6%|▌         | 11/200 [08:32<2:27:51, 46.94s/it]

evaluator: False

Generated keyword: Joystick
USING MODEL: gpt-4o
KEYWORD: Joystick

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: soccer ball
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: remote control
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: video game controller
evaluator: False

questioner: Is it used primarily for gaming or playing videos?
answerer: Yes.
guesser: television
evaluator: False

questioner: Is it a gaming console or device used for gaming?
answerer: No.
guesser: tablet
evaluator: False

questioner: Is it used primarily for streaming or watching videos?
answerer: No.
guesser: VR h

games generated:   6%|▌         | 12/200 [08:53<2:02:32, 39.11s/it]

evaluator: True
Generated keyword: power pole
USING MODEL: gpt-4o
KEYWORD: power pole

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Ocean
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Star
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Car
evaluator: False

questioner: I

games generated:   6%|▋         | 13/200 [09:44<2:12:15, 42.43s/it]

evaluator: False

Generated keyword: unripe fruit
USING MODEL: gpt-4o
KEYWORD: unripe fruit

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Corn
evaluator: False

questioner: Is it a fruit?
answerer: Yes.
guesser: Banana
evaluator: False

questioner: Is it commonly eaten raw?
answerer: No.
guesser: Pumpkin
evaluator: False

questioner: Is it commonly used in cooking or baking?
answerer: No.
guesser: Olive
evaluator: False

questioner: Is it usually processed or preserved in some way before consumption?
answerer: Yes.
guesser: Raisin
evaluator: False

questioner: Is it a dried fruit?
answerer: No.
guesser: Cranberry
evaluator: False

questioner: Is it commonly made into a jam or jelly?
answerer: No.
guesser: Coconut
evaluator: False

questioner: Is it often used to produce juice or a beverage

games generated:   7%|▋         | 14/200 [10:43<2:27:47, 47.67s/it]

evaluator: False

Generated keyword: hose line
USING MODEL: gpt-4o
KEYWORD: hose line

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Helmet
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Fire extinguisher
evaluator: False

questioner: Is it related to architectural structures or construction?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it typically used by workers on construction sites?
answerer: Yes.
guesser: Safety harness
evaluator: False

questioner: Is it a type of protective clothing or gear?
answerer: No.
guesser: Scaffolding
evaluator: False

questioner: Is it a tool or device used for construction purposes?
answerer: Yes.
guesser: Jackhammer
evaluato

games generated:   8%|▊         | 15/200 [11:33<2:28:28, 48.15s/it]

evaluator: False

Generated keyword: water chestnut
USING MODEL: gpt-4o
KEYWORD: water chestnut

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Tomato
evaluator: False

questioner: Is it a fruit?
answerer: No.
guesser: Carrot
evaluator: False

questioner: Is it a vegetable?
answerer: Yes.
guesser: Potato
evaluator: False

questioner: Is it typically consumed raw?
answerer: No.
guesser: Broccoli
evaluator: False

questioner: Is it a root vegetable?
answerer: No.

guesser: Corn
evaluator: False

questioner: Is it green in color?
answerer: No.
guesser: Pumpkin
evaluator: False

questioner: Is it commonly used in soups or stews?
answerer: Yes.
guesser: Onion
evaluator: False

questioner: Is it a member of the nightshade family (like tomatoes or peppers)?
answerer: No.
guesser: Celery
evaluator: 

games generated:   8%|▊         | 16/200 [12:24<2:30:45, 49.16s/it]

evaluator: False

Generated keyword: car seat
USING MODEL: gpt-4o
KEYWORD: car seat

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Fire extinguisher
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: Yes.
guesser: Seatbelt
evaluator: False

questioner: Is it a part of a vehicle?
answerer: Yes.
guesser: Airbag
evaluator: False

questioner: Is it primarily used in automobiles (cars, trucks, etc.)?
answerer: Yes.
guesser: Brake pad
evaluator: False

questioner: Is it an internal component of a vehicle?
answerer: Yes.
guesser: Engine
evaluator: False

questioner: Is it related to the vehicle's engine or transmission?
answerer: No.
guesser: Steering wheel
evaluator: False

questioner: Is it r

games generated:   8%|▊         | 17/200 [12:52<2:10:49, 42.89s/it]

evaluator: True
Generated keyword: support beam
USING MODEL: gpt-4o
KEYWORD: support beam

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Fire extinguisher
evaluator: False

questioner: Is it related to architectural structures or construction?
answerer: Yes.
guesser: Safety harness
evaluator: False

questioner: Is it a type of personal protective equipment (PPE)?
answerer: No.
guesser: Scaffolding
evaluator: False

questioner: Is it used to prevent structural damage?
answerer: Yes.
guesser: Earthquake-resistant building retrofit
evaluator: False

questioner: Is it related to fire protection?
answerer: No.
guesser: S

games generated:   9%|▉         | 18/200 [13:21<1:56:55, 38.55s/it]

evaluator: True
Generated keyword: fallen tree
USING MODEL: gpt-4o
KEYWORD: fallen tree

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Ocean
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Sahara Desert
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Sun
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Moon
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Mount Rushmore
eval

games generated:  10%|▉         | 19/200 [14:06<2:02:11, 40.51s/it]

evaluator: False

Generated keyword: Bug spray
USING MODEL: gpt-4o
KEYWORD: Bug spray

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Taj Mahal
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Fire extinguisher
evaluator: False

questioner: Is it t

games generated:  10%|█         | 20/200 [14:56<2:09:50, 43.28s/it]

evaluator: False

Generated keyword: curling iron
USING MODEL: gpt-4o
KEYWORD: curling iron

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Dress
evaluator: False

questioner: Is it something you wear on your body?
answerer: No.
guesser: Mirror
evaluator: False

questioner: Is it something you wear on your head?
answerer: No.
guesser: Necklace
evaluator: False

questioner: Is it something you wear on your feet?
answerer: No.
guesser: Perfume
evaluator: False

questioner: Is it something you carry with you or use as an accessory?
answerer: No.
guesser: Makeup set
evaluator: False

questioner: Is it related to 

games generated:  10%|█         | 21/200 [15:35<2:05:32, 42.08s/it]

evaluator: True
Generated keyword: Rags
USING MODEL: gpt-4o
KEYWORD: Rags

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Watch
evaluator: False

questioner: Is it commonly found in a household?
answerer: Yes.
guesser: Chair
evaluat

games generated:  11%|█         | 22/200 [16:24<2:11:26, 44.30s/it]

evaluator: False

Generated keyword: wastewater
USING MODEL: gpt-4o
KEYWORD: wastewater

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Robot
evaluator: False

questioner: Is it a type of machine or mechanical device?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it used in construction or building?
answerer: No.
guesser: Raw materials
evaluator: False

questioner: Is it a chemical or chemical compound?
answerer: No.
guesser: Product
evaluator: False

questioner: Is it related to textiles or clothing manuf

games generated:  12%|█▏        | 23/200 [17:14<2:15:43, 46.01s/it]

evaluator: False

Generated keyword: Garbage bag
USING MODEL: gpt-4o
KEYWORD: Garbage bag

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: School
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Museum
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Hospital
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it a tool or used for a specific task?
answerer

games generated:  12%|█▏        | 24/200 [18:01<2:15:28, 46.19s/it]

evaluator: False

Generated keyword: unripe fruit
USING MODEL: gpt-4o
KEYWORD: unripe fruit

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Tomato
evaluator: False

questioner: Is it a type of fruit?
answerer: Yes.
guesser: Banana
evaluator: False

questioner: Is it typically grown on trees?
answerer: Yes.
guesser: Orange
evaluator: False

questioner: Is it a tropical fruit?
answerer: Yes.
guesser: Mango
evaluator: False

questioner: Is it larger than a grapefruit?
answerer: No.
guesser: Pineapple
evaluator: False

questioner: Is it commonly yellow when ripe?
answerer: Yes.
guesser: Lemon
evaluator: False

questioner: Is it a type of banana?
answerer: Yes.
guesser: Plantain
evaluator: False

questioner: Is it commonly found in supermarkets?
answerer: Yes.
guesser: Cavendish banana
evaluator:

games generated:  12%|█▎        | 25/200 [18:55<2:21:31, 48.52s/it]

evaluator: False

Generated keyword: Glove
USING MODEL: gpt-4o
KEYWORD: Glove

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Disneyland
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Tennis racket
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Playing card
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Baseball bat
evaluator: False

questioner: Is it used in sports or physical activities?
answerer: Yes.
guesser: Soccer ball
evaluator: False

questioner: Is it a piece of sports equipment that you wear on your body?
answerer: Yes.
guesser: Helmet
evaluator: Fal

games generated:  13%|█▎        | 26/200 [19:18<1:58:22, 40.82s/it]

evaluator: True
Generated keyword: hanging hook
USING MODEL: gpt-4o
KEYWORD: hanging hook

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Museum
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it commonly used for writing or

games generated:  14%|█▎        | 27/200 [20:08<2:06:02, 43.71s/it]

evaluator: False

Generated keyword: Tomato
USING MODEL: gpt-4o
KEYWORD: Tomato

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Corn
evaluator: False

questioner: Is it a fruit?
answerer: Yes.
guesser: Banana
evaluator: False

questioner: Is it commonly eaten raw?
answerer: Yes.
guesser: Grapes
evaluator: False

questioner: Is it smaller than a grapefruit?
answerer: Yes.
guesser: Orange
evaluator: False

questioner: Is it typically grown on a tree?
answerer: No.
guesser: Strawberry
evaluator: False

questioner: Is it a type of berry?
answerer: Yes.
guesser: Blueberry
evaluator: False

questioner: Is it commonly used in desserts?
answerer: No.
guesser: Raspberry
evaluator: False

questioner: Is it predominantly red in color?
answerer: Yes.
guesser: Cranberry
evaluator: False

questioner: Is i

games generated:  14%|█▍        | 28/200 [20:40<1:54:48, 40.05s/it]

evaluator: True
Generated keyword: raccoon
USING MODEL: gpt-4o
KEYWORD: raccoon

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: School
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a common pet?
answerer: No.
guesser: Elephant
evaluator: False

questioner: Is it a wild animal?
answerer: Yes.
guesser: Lion
evaluator: False

questioner: Is it primarily found in the ocean?
answerer: No.
guesser: Tiger
evaluator: False

questioner: Is it primarily a land animal?
answerer: Yes.
guesser: Giraffe
evaluator: False

questioner: Is it a mammal?
answerer: Yes.
gues

games generated:  14%|█▍        | 29/200 [21:16<1:50:39, 38.83s/it]

evaluator: True
Generated keyword: pine cone
USING MODEL: gpt-4o
KEYWORD: pine cone

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Pyramids of Giza
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Taj Mahal
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it a household item?
a

games generated:  15%|█▌        | 30/200 [22:01<1:55:01, 40.60s/it]

evaluator: False

Generated keyword: fresheners
USING MODEL: gpt-4o
KEYWORD: fresheners

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Book
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it commonly used in household activities?
answerer: Yes.


games generated:  16%|█▌        | 31/200 [22:44<1:56:49, 41.48s/it]

evaluator: False

Generated keyword: reading glasses
USING MODEL: gpt-4o
KEYWORD: reading glasses

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Necklace
evaluator: False

questioner: Is it something that is typically worn on the body?
answerer: Yes.
guesser: Hat
evaluator: False

questioner: Is it primarily used as outerwear?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it an item of clothing that is most commonly worn above the waist?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it typically worn on the upper torso?
answerer: No.
guesser: Scarf
evaluato

games generated:  16%|█▌        | 32/200 [23:40<2:08:27, 45.88s/it]

evaluator: False

Generated keyword: cilantro
USING MODEL: gpt-4o
KEYWORD: cilantro

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Restaurant
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Grain
evaluator: False

questioner: Is it a fruit?
answerer: No.
guesser: Vegetable
evaluator: False

questioner: Is it a vegetable?
answerer: No.
guesser: Meat
evaluator: False

questioner: Is it a type of grain or cereal?
answerer: No.
guesser: Dairy
evaluator: False

questioner: Is it a type of meat?
answerer: No.
guesser: Egg
evaluator: False

questioner: Is it a dairy product?
answerer: No.
guesser: Sugar
evaluator: False

questioner: Is it commonly processed or refined?
answerer: No.
guesser: Honey
evaluator: False

questioner: Is it a type of seafood?
answerer: No.
guesser: Nut
evaluator: False

questioner: Is it a type of nut or seed?
answerer: No.


games generated:  16%|█▋        | 33/200 [24:16<1:58:58, 42.74s/it]

evaluator: True
Generated keyword: smoothie
USING MODEL: gpt-4o
KEYWORD: smoothie

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Refrigerator
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Oven
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Cup
evaluator: False

questioner: Is it a beverage?
answerer: Yes.
guesser: Bottle
evaluator: False

questioner: Is it non-alcoholic?
answerer: Yes.
guesser: Tea
evaluator: False

questioner: Is it a type of hot beverage?
answerer: No.
guesser: Soda
evaluator: False

questioner: Is it carbonated?
answerer: No.
guesser: Juice
evaluator: False

questioner: Is it a fruit juice or related to fruit juice?
answerer: Yes.
guesser: Orange juice
evaluator: False

questioner: Is it commonly consumed as a breakfast drink?
answerer: Yes.
guesser: Apple juice
evalua

games generated:  17%|█▋        | 34/200 [25:06<2:04:59, 45.18s/it]

evaluator: False

Generated keyword: Rice pudding
USING MODEL: gpt-4o
KEYWORD: Rice pudding

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Ice cream
evaluator: False

questioner: Is it a prepared food or dish?
answerer: Yes.
guesser: Pasta
evaluator: False

questioner: Is it typically served hot?
answerer: Yes.
guesser: Soup
evaluator: False

questioner: Is it a main course?
answerer: No.
guesser: French fries
evaluator: False

questioner: Is it typically served as a side dish?
answerer: No.
guesser: Cake
evaluator: False

questioner: Is it a type of appetizer?
answerer: No.
guesser: Pie
evaluator: False

questioner: Is it a type of dessert?
answerer: Yes.
guesser: Brownie
evaluator: False

questioner: Is it typically baked?
answerer: Yes.
guesser: Cookies
evaluator: False

questioner: Is it

games generated:  18%|█▊        | 35/200 [25:55<2:07:09, 46.24s/it]

evaluator: False

Generated keyword: utility belt
USING MODEL: gpt-4o
KEYWORD: utility belt

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Football stadium
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Movie theater
evaluator: False

questioner: Is it primarily used outdoors?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it electronic?
answerer: No.
guesser: Board game
evaluator: False

questioner: Is it something you sit on?
answerer: No.
guesser: Concert hall
evaluator: False

questioner: Is it something you watch?
answerer: No.
guesser: Musical instrument
evaluator: False

questioner: Is it something related to games?
answerer: No.
g

games generated:  18%|█▊        | 36/200 [26:45<2:08:56, 47.17s/it]

evaluator: False

Generated keyword: junction box
USING MODEL: gpt-4o
KEYWORD: junction box

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Helmet
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Fire extinguisher
evaluator: False

questioner: Is it related to architectural structures or construction?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it part of a building's structural integrity, such as beams or support columns?
answerer: No.
guesser: Scaffolding
evaluator: False

questioner: Is it commonly used for personal protection, like helmets or gloves?
answerer: No.
guesser: Fire alarm
evaluator: False

questioner: Is it related to fire safety?
answerer: No.
g

games generated:  18%|█▊        | 37/200 [27:36<2:11:34, 48.43s/it]

evaluator: False

Generated keyword: Medication
USING MODEL: gpt-4o
KEYWORD: Medication

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Car factory
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it commonly used in construction or building?
answerer: No.
guesser: Textiles
evaluator: False

questioner: Is it a tool or piece of machinery?
answerer: No.
guesser: Packaging
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Plastic
evaluator: False

questioner: Is it a type of packaging or con

games generated:  19%|█▉        | 38/200 [28:32<2:16:39, 50.62s/it]

evaluator: False

Generated keyword: Lathe
USING MODEL: gpt-4o
KEYWORD: Lathe

questioner: Is it related to food, beverages or cooking?
answerer: No
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No
guesser: Robot
evaluator: False

questioner: Is it a tool or piece of equipment?
answerer: Yes
guesser: Welding machine
evaluator: False

questioner: Is it commonly used in construction or building?
answerer: No
guesser: Lathe


games generated:  20%|█▉        | 39/200 [28:49<1:49:20, 40.75s/it]

evaluator: True
Generated keyword: Wine Opener
USING MODEL: gpt-4o
KEYWORD: Wine Opener

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Restaurant
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Kitchen
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Coffee machine
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Cup
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Knife
evaluator: False

questioner: Is it a kitchen utensil or tool?
answerer: Yes.
guesser: Fork
evaluator: False

questioner: Is it primarily used for cutting or slicing?
answerer: No.
guesser: Spatula
evaluator: False

questioner: Is it typically used for stirring or mixing?
answerer: No.
guesser: Whisk
evaluator: False

questioner: Is it commonly used for measuring ingredients?
answerer: No.
guesser: Peeler
evaluator: False

questioner: Is it used for cooking 

games generated:  20%|██        | 40/200 [29:43<1:58:35, 44.47s/it]

evaluator: False

Generated keyword: Milling machine
USING MODEL: gpt-4o
KEYWORD: Milling machine

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Robot
evaluator: False

questioner: Is it a type of consumer electronics?
answerer: No.
guesser: Semiconductor manufacturing equipment
evaluator: False

questioner: Is it a component or part used in electronic devices?
answerer: No.
guesser: Industrial robot
evaluator: False

questioner: Is it used in the production process of electronics or technology products?
answerer: Yes.
guesser: 3D printer
evaluator: False

questioner: Is it a type of machinery or equipment?
answerer

games generated:  20%|██        | 41/200 [30:36<2:05:21, 47.30s/it]

evaluator: False

Generated keyword: sock
USING MODEL: gpt-4o
KEYWORD: sock

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it used in construction or building infrastructure?
answerer: No.
guesser: Robot
evaluator: False

questioner: Is it a type of machine or machinery?
answerer: No.
guesser: Product
evaluator: False

questioner: Is it related to packaging or storage?
answerer: No.
guesser: Vehicle
evaluator: False

questioner: Is it related to clothing or textiles?
answerer: Yes.
guess

games generated:  21%|██        | 42/200 [31:06<1:50:16, 41.88s/it]

evaluator: True
Generated keyword: Wireless Speaker
USING MODEL: gpt-4o
KEYWORD: Wireless Speaker

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Remote control
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Game controller
evaluator: False

questioner: Is it primarily used for gaming?
answerer: No.
guesser: Camera
evaluator: False

questioner: Is it a device used for streaming or watching media?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it primarily used for listening to audio?
answerer: Yes.
guesser: Headphones
evaluator: False

questioner: Is

games generated:  22%|██▏       | 43/200 [31:27<1:33:33, 35.76s/it]

evaluator: True
Generated keyword: dental chair
USING MODEL: gpt-4o
KEYWORD: dental chair

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Mount Rushmore
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Empire State Building
evaluator: False

questioner: Is it furniture?
answerer: Yes.
guesser: Chair
evaluator: False

questioner: Is it typically found in a living r

games generated:  22%|██▏       | 44/200 [32:13<1:40:57, 38.83s/it]

evaluator: False

Generated keyword: Yoga mat
USING MODEL: gpt-4o
KEYWORD: Yoga mat

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Golden Gate Bridge
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Soccer ball
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Playing cards
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Tennis racket
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Baseball
evaluator: False

questioner: Is it used in playing a specific game or sport?
answerer: No.
guesser: Board game piece
evaluator: False

questioner: Is it commonly used outdoors?
answerer: No.
guesser: Book
evaluator: False

questioner: Is

games generated:  22%|██▎       | 45/200 [32:37<1:28:37, 34.31s/it]

evaluator: True
Generated keyword: Resin
USING MODEL: gpt-4o
KEYWORD: Resin

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Computer
evaluator: False

questioner: Is it a consumer electronic device?
answerer: No.
guesser: Microchip
evaluator: False

questioner: Is it a component used within electronic devices?
answerer: Yes.
guesser: Transistor
evaluator: False

questioner: Is it a semiconductor component?
answerer: No.
guesser: Capacitor
evaluator: False

questioner: Is it related to data storage or memory?
answerer: No.
guesser: Resistor
evaluator: False

questioner: Is it related to power supply or energy conversion?
answerer: 

games generated:  23%|██▎       | 46/200 [33:27<1:40:25, 39.13s/it]

evaluator: False

Generated keyword: desk lamp
USING MODEL: gpt-4o
KEYWORD: desk lamp

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Laptop
evaluator: False

questioner: Is it primarily used for communication?
answerer: No.
guesser: Television remote
evaluator: False

questioner: 

games generated:  24%|██▎       | 47/200 [34:12<1:44:15, 40.89s/it]

evaluator: False

Generated keyword: Tulip
USING MODEL: gpt-4o
KEYWORD: Tulip

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Golden Gate Bridge
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Notebook
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it commonly f

games generated:  24%|██▍       | 48/200 [35:06<1:52:58, 44.60s/it]

evaluator: False

Generated keyword: pea plant
USING MODEL: gpt-4o
KEYWORD: pea plant

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Corn
evaluator: False

questioner: Is it a type of fruit or vegetable?
answerer: Yes.
guesser: Tomato
evaluator: False

questioner: Is it a fruit?
answerer: No.
guesser: Carrot
evaluator: False

questioner: Is it commonly eaten raw?
answerer: Yes.
guesser: Lettuce
evaluator: False

questioner: Is it a leafy vegetable?
answerer: No.
guesser: Cucumber
evaluator: False

questioner: Is it a root vegetable?
answerer: No.
guesser: Broccoli
evaluator: False

questioner: Is it a type of legume?
answerer: Yes.
guesser: Peas
evaluator: False

questioner: Is it green in color?
answerer: Yes.
guesser: Green beans
evaluator: False

questioner: Is it commonly eaten in pods?

games generated:  24%|██▍       | 49/200 [35:46<1:49:13, 43.40s/it]

evaluator: True
Generated keyword: ribbon
USING MODEL: gpt-4o
KEYWORD: ribbon

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Garden
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Watch
evaluator: False

questioner: Is it worn on the body?
answerer: Yes.
guesser: Hat
evaluator: False

questioner: Is it primarily worn to cover the upper body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it primarily worn to cover the lower body?
answerer: No.
guesser: Gloves
evaluator: False

questioner: Is it commonly worn on the head?
answerer: Yes.
guesser: Helmet
evaluator: False

questioner: Is it a type of hat?
answerer: No.
guesser: Headband
evaluator: False

q

games generated:  25%|██▌       | 50/200 [36:37<1:54:28, 45.79s/it]

evaluator: True
Generated keyword: peace lily
USING MODEL: gpt-4o
KEYWORD: peace lily

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Museum
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Cactus
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it commonly found in a household?
answerer: Yes

games generated:  26%|██▌       | 51/200 [37:30<1:58:30, 47.72s/it]

evaluator: False

Generated keyword: laundry cart
USING MODEL: gpt-4o
KEYWORD: laundry cart

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Central Park
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Lipstick
evaluator: False

questioner: Is it something that is typically worn?
answerer: No.
guesser: Perfume
evaluator: False

questioner: Is it related to personal grooming or hygiene?
answerer: No.
guesser: Handbag
evaluator: False

questioner: Is it a type of accessory?
answerer: No.
guesser: Makeup brush
evaluator: False

questioner: Is it related to makeup or skincare?
answerer: No.
guesser: Clothing hanger
evaluator: False

questioner: Is it a 

games generated:  26%|██▌       | 52/200 [38:30<2:07:17, 51.61s/it]

evaluator: False

Generated keyword: highlighter
USING MODEL: gpt-4o
KEYWORD: highlighter

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Taj Mahal
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Mobile phone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Rock
evaluator: False

questioner: Is it us

games generated:  26%|██▋       | 53/200 [39:20<2:05:19, 51.15s/it]

evaluator: True
Generated keyword: parsley
USING MODEL: gpt-4o
KEYWORD: parsley

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Blender
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Tractor
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Farm
evaluator: False

questioner: Is it a type of tool?
answerer: No.
guesser: Vineyard
evaluator: False

questioner: Is it used in the process of growing crops?
answerer: No.
guesser: Greenhouse
evaluator: False

questioner: Is it used in the process of raising livestock?
answerer: No.
guesser: Barn
evaluator: False

questioner: Is it a product derived from agriculture?
answerer: Yes.
guesser: Milk
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Cotton
evaluator: False

questioner: Is it a raw material?
answerer: Yes.
guesser: Wheat
eva

games generated:  27%|██▋       | 54/200 [40:14<2:06:13, 51.87s/it]

evaluator: False

Generated keyword: fallen tree
USING MODEL: gpt-4o
KEYWORD: fallen tree

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Amazon Rainforest
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Sahara Desert
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Golden Gate Bridge
evaluator: False

questioner: Is it broadly related to the

games generated:  28%|██▊       | 55/200 [41:06<2:05:10, 51.80s/it]

evaluator: False

Generated keyword: Diving board
USING MODEL: gpt-4o
KEYWORD: Diving board

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Stadium
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Theater
evaluator: False

questioner: Is it related to a specific type of sport?
answerer: Yes.
guesser: Basketball Court
evaluator: False

questioner: Is it primarily used outdoors?
answerer: Yes.
guesser: Soccer Field
evaluator: False

questioner: Is it related to a team sport?
answerer: No.
guesser: Golf Course
evaluator: False

questioner: Is it associated with an individual sport that involves physical exertion?
answerer: Yes.
guesser: Tennis Court
evaluator: False

questioner: I

games generated:  28%|██▊       | 56/200 [41:41<1:52:36, 46.92s/it]

evaluator: True
Generated keyword: Ice Water
USING MODEL: gpt-4o
KEYWORD: Ice Water

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Beer
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Wine
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Cup
evaluator: False

questioner: Is it a beverage?
answerer: Yes.
guesser: Coffee
evaluator: False

questioner: Is it typically served cold?
answerer: Yes.
guesser: Soda
evaluator: False

questioner: Is it carbonated?
answerer: No.
guesser: Juice
evaluator: False

questioner: Is it a dairy product?
answerer: No.
guesser: Iced tea
evaluator: False

questioner: Is it a type of juice?
answerer: No.
guesser: Lemonade
evaluator: False

questioner: Is it often consumed for its health benefits?
answerer: Yes.
guesser: Water
evaluator: False

questioner: Is it a typ

games generated:  28%|██▊       | 57/200 [42:31<1:53:33, 47.64s/it]

evaluator: False

Generated keyword: Veggie Burger
USING MODEL: gpt-4o
KEYWORD: Veggie Burger

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a fruit?
answerer: No.
guesser: Carrot
evaluator: False

questioner: Is it a vegetable?
answerer: Yes.
guesser: Potato
evaluator: False

questioner: Is it typically eaten cooked rather than raw?
answerer: Yes.
guesser: Broccoli
evaluator: False

questioner: Is it a root vegetable?
answerer: No.
guesser: Spinach
evaluator: False

questioner: Is it green in color?
answerer: No.
guesser: Tomato
evaluator: False

questioner: Is it a type of squash?
answerer: No.
guesser: Corn
evaluator: False

questioner: Is it a member of the nightshade family (e.g., tomato, eggplant)?
answerer: No.
guesser: Cauliflower
evaluator:

games generated:  29%|██▉       | 58/200 [43:26<1:58:16, 49.97s/it]

evaluator: False

Generated keyword: lug
USING MODEL: gpt-4o
KEYWORD: lug

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: museum
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: tools
evaluator: False

questioner: Is it a type of machinery or equipment?
answerer: No.
guesser: raw materials
evaluator: False

questioner: Is it a product commonly used in construction or building?
answerer: Yes.
guesser: cement
evaluator: False

questioner: Is it primarily used for structural purposes?
answerer: No.
guesser: Paint
evaluator: False

questioner: Is it commonly used for finishing or inte

games generated:  30%|██▉       | 59/200 [44:20<2:00:23, 51.23s/it]

evaluator: False

Generated keyword: Joystick
USING MODEL: gpt-4o
KEYWORD: Joystick

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Beach
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Remote control
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Game console
evaluator: False

questioner: Is it primarily used for playing games?
answerer: Yes.
guesser: Game controller
evaluator: False

questioner: Is it a type of gaming console?
answerer: No.
guesser: Video game
evaluator: False

questioner: Is it a type of handheld gaming device or accessory?
answerer: Yes.
guesser: Handheld gaming device
evaluator: False

questioner

games generated:  30%|███       | 60/200 [44:41<1:38:37, 42.27s/it]

evaluator: True
Generated keyword: Red velvet cake
USING MODEL: gpt-4o
KEYWORD: Red velvet cake

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Bread
evaluator: False

questioner: Is it a prepared food or dish?
answerer: Yes.
guesser: Pasta
evaluator: False

questioner: Is it typically served hot?
answerer: No.
guesser: Salad
evaluator: False

questioner: Is it a type of dessert?
answerer: Yes.
guesser: Ice cream
evaluator: False

questioner: Is it a baked good?
answerer: Yes.
guesser: Cake
evaluator: False

questioner: Is it commonly eaten during breakfast?
answerer: No.
guesser: Cookie
evaluator: False

questioner: Is it often associated with celebrations or special occasions?
answerer: Yes.
guesser: Pie
evaluator: False

questioner: Is it a type of cake?
answerer: Yes.
guesser: Birthday ca

games generated:  30%|███       | 61/200 [45:34<1:44:59, 45.32s/it]

evaluator: False

Generated keyword: Exhaust Fan
USING MODEL: gpt-4o
KEYWORD: Exhaust Fan

questioner: Is it related to food, beverages or cooking?
answerer: No
guesser: Library
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No
guesser: Warehouse
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes
guesser: Smartphone
evaluator: False

questioner: Is it a consumer electronic device?
answerer: No
guesser: Computer server
evaluator: False

questioner: Is it used in the production or maintenance of electronic devices?
answerer: No
guesser: Semiconductor
evaluator: False

questioner: Is it a component found within electronic devices?
answerer: No
guesser: Industrial robot
evaluator: False

questioner: Is it related to communication technologies?
answerer: No
guesser: Machine tool
evaluator: False

ques

games generated:  31%|███       | 62/200 [46:25<1:48:07, 47.01s/it]

evaluator: False

Generated keyword: smartboard
USING MODEL: gpt-4o
KEYWORD: smartboard

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Hospital
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Museum
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: School
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Bridge
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Airport
evaluator: False

questioner

games generated:  32%|███▏      | 63/200 [47:12<1:47:46, 47.20s/it]

evaluator: False

Generated keyword: hydro turbine
USING MODEL: gpt-4o
KEYWORD: hydro turbine

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Computer
evaluator: False

questioner: Is it primarily used in communication?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it used for computing or data processing?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it related to entertainment or media?
answerer: No.
guesser: Semiconductor
evaluator: False

questioner: Is it used in household appliances?
answerer: No.
guesser: Solar panel
evaluator: False

questioner: Is it used in transportation or au

games generated:  32%|███▏      | 64/200 [48:04<1:49:40, 48.39s/it]

evaluator: False

Generated keyword: telephone
USING MODEL: gpt-4o
KEYWORD: telephone

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Forest
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: School
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Book
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it a device primarily used for communication?
answerer: Yes.
guesser: Tablet
evaluator: False

questioner: Is it a smartphone?
answerer: No.
guesser: Lan

games generated:  32%|███▎      | 65/200 [48:30<1:34:00, 41.78s/it]

evaluator: True
Generated keyword: colander
USING MODEL: gpt-4o
KEYWORD: colander

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Blender
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Coffee mug
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spoon
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Spatula
evaluator: False

questioner: Is it a cooking utensil or tool?
answerer: Yes.
guesser: Knife
evaluator: False

questioner: Is it primarily used for cutting or chopping?
answerer: No.
guesser: Whisk
evaluator: False

questioner: Is it used for mixing or stirring?
answerer: No.
guesser: Measuring cup
evaluator: False

questioner: Is it a piece of cookware, such as a pan or pot?
answerer: No.
guesser: Rolling pin
evaluator: False

questioner: Is it used for serving food?


games generated:  33%|███▎      | 66/200 [48:58<1:24:02, 37.63s/it]

evaluator: True
Generated keyword: washcloth
USING MODEL: gpt-4o
KEYWORD: washcloth

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Lipstick
evaluator: False

questioner: Is it primarily worn on the upper body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it primarily worn on the lower body?
answerer: No.
guesser: Hat
evaluator: False

questioner: Is it an accessory?
answerer: No.
guesser: Perfume
evaluator: False

questioner: Is it related to personal grooming or skincare?
answerer: Yes.
guesser: Lotion
evaluator: False

questioner: Is it a skincare product?
answerer: N

games generated:  34%|███▎      | 67/200 [49:35<1:23:04, 37.47s/it]

evaluator: True
Generated keyword: slit
USING MODEL: gpt-4o
KEYWORD: slit

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Phone
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Planet
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Ocean
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it related to

games generated:  34%|███▍      | 68/200 [50:23<1:29:30, 40.69s/it]

evaluator: False

Generated keyword: spray
USING MODEL: gpt-4o
KEYWORD: spray

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Hospital
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Perfume
evaluator: False

questioner: Is it something typically worn on the upper body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it typically worn on the lower body?
answerer: No.
guesser: Watch
evaluator: False

questioner: Is it an accessory?
answerer: No.
guesser: Hat
evaluator: False

questioner: Is it used for grooming or personal care?
answerer: Yes.
guesser: Hairbrush
evaluator: False

questioner: Is it specifically for use on hair?
answerer: Yes.
guesser: S

games generated:  34%|███▍      | 69/200 [51:19<1:38:44, 45.22s/it]

evaluator: False

Generated keyword: curling iron
USING MODEL: gpt-4o
KEYWORD: curling iron

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Mountains
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it primarily worn on the body?
answerer: No.
guesser: Perfume
evaluator: False

questioner: Is it primarily used for personal grooming or hygiene?
answerer: Yes.
guesser: Hairbrush
evaluator: False

questioner: Is it an electronic device?
answerer: Yes.
guesser: Electric toothbrush
evaluator: False

questioner: Is it used for hair care?
answerer: Yes.
guesser: Hair dryer
evaluator: False

questioner: Is it commonly found in househo

games generated:  35%|███▌      | 70/200 [51:44<1:24:53, 39.18s/it]

evaluator: True
Generated keyword: cinder block
USING MODEL: gpt-4o
KEYWORD: cinder block

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it a product of mechanical engineering?
answerer: No.
guesser: Packaging
evaluator: False

questioner: Is it related to chemicals or chemical processes?
answerer: No.
guesser: Automobile
evaluator: False

questioner: Is it associated with textiles or fabrics?
answerer: No.
guesser: Robot
evaluator: False

questioner: Is it a type of tool or machinery us

games generated:  36%|███▌      | 71/200 [57:05<4:26:00, 123.73s/it]

evaluator: True
Generated keyword: Quesadilla
USING MODEL: gpt-4o
KEYWORD: Quesadilla

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Bread
evaluator: False

questioner: Is it a prepared food or dish?
answerer: Yes.
guesser: Burger
evaluator: False

questioner: Is it typically served hot?
answerer: Yes.
guesser: Soup
evaluator: False

questioner: Is it commonly considered a main course?
answerer: Yes.
guesser: Spaghetti
evaluator: False

questioner: Does it contain meat?
answerer: It can.
guesser: Lasagna
evaluator: False

questioner: Is it commonly associated with a specific cuisine, such as Italian or Chinese?
answerer: Yes.
guesser: Tacos
evaluator: False

questioner: Is it commonly associated with Italian cuisine?
answerer: No.
guesser: Curry
evaluator: False

questioner: Is it commonly a

games generated:  36%|███▌      | 72/200 [57:40<3:27:21, 97.20s/it] 

evaluator: True
Generated keyword: sunglasses
USING MODEL: gpt-4o
KEYWORD: sunglasses

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Watch
evaluator: False

questioner: Is it something typically worn on the upper body?
answerer: Yes.
guesser: T-shirt
evaluator: False

questioner: Is it an article of clothing rather than an accessory?
answerer: No.
guesser: Necklace
evaluator: False

questioner: Is it typically worn around the neck?
answerer: No.
guesser: Hat
evaluator: False

questioner: Is it typically used to cover or protect the head?
answerer: No.
guesser: Scarf
evaluator: False

questioner: Is it typica

games generated:  36%|███▋      | 73/200 [58:06<2:40:13, 75.70s/it]

evaluator: True
Generated keyword: plastic fork
USING MODEL: gpt-4o
KEYWORD: plastic fork

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Coffee
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Refrigerator
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Cup
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Fork
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Spatula
evaluator: False

questioner: Is it a kitchen utensil?
answerer: Yes.
guesser: Chef's knife
evaluator: False

questioner: Is it typically used for cutting or chopping?
answerer: No.
guesser: Whisk
evaluator: False

questioner: Is it used for cooking or baking?
answerer: No.
guesser: Spoon
evaluator: False

questioner: Is it used for serving or eating food?
answerer: Yes.
guesser: Plate
evaluator: False

questioner: Is it primarily made of metal?
answerer: No.
gue

games generated:  37%|███▋      | 74/200 [58:33<2:08:27, 61.17s/it]

evaluator: True
Generated keyword: bedspread
USING MODEL: gpt-4o
KEYWORD: bedspread

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Statue
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Painting
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Skyscraper
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Bridge
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Park
evaluator: False

questioner

games generated:  38%|███▊      | 75/200 [59:19<1:58:02, 56.66s/it]

evaluator: False

Generated keyword: tile
USING MODEL: gpt-4o
KEYWORD: tile

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Automobile
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Industrial robot
evaluator: False

questioner: Is it a tool or piece of machinery?
answerer: No.
guesser: Product packaging
evaluator: False

questioner: Is it a manufactured product used in construction or housing?
answerer: Yes.
guesser: Brick
evaluator: False

questioner: Is it typically used as a building material?
answerer: Yes.
guesser: Concrete
evaluator: False

questioner: Is it commonly used fo

games generated:  38%|███▊      | 76/200 [59:47<1:38:56, 47.88s/it]

evaluator: True
Generated keyword: orange slice
USING MODEL: gpt-4o
KEYWORD: orange slice

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Restaurant
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Tomato
evaluator: False

questioner: Is it a fruit?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it typically sweet?
answerer: Yes.
guesser: Banana
evaluator: False

questioner: Is it commonly eaten in its raw form?
answerer: Yes.
guesser: Grapes
evaluator: False

questioner: Is it a citrus fruit?
answerer: Yes.
guesser: Orange
evaluator: False

questioner: Is it orange in color?
answerer: Yes.
guesser: Mandarin
evaluator: False

questioner: Is it an orange?
answerer: No.
guesser: Tangerine
evaluator: False

questioner: Is it smaller than an orange?
answerer: No.
guesser: Grapefruit
evaluator: False

questioner: Is it a grapefruit?
a

games generated:  38%|███▊      | 77/200 [1:00:35<1:38:17, 47.95s/it]

evaluator: False

Generated keyword: Whistle
USING MODEL: gpt-4o
KEYWORD: Whistle

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Central Park
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Basketball
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Playing cards
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Baseball bat
evaluator: False

questioner: Is it used in a specific sport or recreational activity?
answerer: Yes.
guesser: Tennis ball
evaluator: False

questioner: Is it used in a team sport?
answerer: Yes.
guesser: Soccer ball
evaluator: False

questione

games generated:  39%|███▉      | 78/200 [1:00:55<1:20:17, 39.48s/it]

evaluator: True
Generated keyword: Oxygen tank
USING MODEL: gpt-4o
KEYWORD: Oxygen tank

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Hard Hat
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Fire extinguisher
evaluator: False

questioner: Is it related to architectural structures or construction?
answerer: No.
guesser: Safety goggles
evaluator: False

questioner: Is it a type of wearable safety gear?
answerer: No.
guesser: Fire alarm
evaluator: False

questioner: Is it an electronic safety device?
answerer: No.
guesser: First aid kit
evaluator: False

questioner: Is it used in firefighting or fire prevention?
answerer: Yes.
guesser: Fire blanket
evaluator: False

questioner: Is

games generated:  40%|███▉      | 79/200 [1:01:45<1:25:58, 42.63s/it]

evaluator: False

Generated keyword: bumper
USING MODEL: gpt-4o
KEYWORD: bumper

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: House
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Ocean
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Star
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Bridge
evaluator: False

questioner: Is it somethin

games generated:  40%|████      | 80/200 [1:02:19<1:20:09, 40.08s/it]

evaluator: True
Generated keyword: Relish
USING MODEL: gpt-4o
KEYWORD: Relish

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Chocolate
evaluator: False

questioner: Is it a prepared food or dish?
answerer: Yes.
guesser: Pasta
evaluator: False

questioner: Is it typically served as a main course?
answerer: No.
guesser: Cake
evaluator: False

questioner: Is it typically a dessert?
answerer: No.
guesser: Soup
evaluator: False

questioner: Is it commonly eaten as a snack?
answerer: No.
guesser: Salad
evaluator: False

questioner: Is it typically served as an appetizer or side dish?
answerer: Yes.
guesser: French fries
evaluator: False

questioner: Is it a type of salad?
answerer: No.
guesser: Mashed potatoes
evaluator: False

questioner: Is it typically served hot?
answerer: No.
guesser: Sushi
e

games generated:  40%|████      | 81/200 [1:03:11<1:26:30, 43.62s/it]

evaluator: True
Generated keyword: cowboy boot
USING MODEL: gpt-4o
KEYWORD: cowboy boot

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Toolbox
evaluator: False

questioner: Is it a type of machinery or equipment?
answerer: No.
guesser: Raw materials
evaluator: False

questioner: Is it a product used in construction or building?
answerer: No.
guesser: Packaging materials
evaluator: False

questioner: Is it a product used in transportation or logistics?
answerer: No.
guesser: Automobile
evaluator: False

questioner:

games generated:  41%|████      | 82/200 [1:03:59<1:28:36, 45.06s/it]

evaluator: False

Generated keyword: silicon
USING MODEL: gpt-4o
KEYWORD: silicon

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Computer
evaluator: False

questioner: Is it commonly used in households?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it primarily used for entertainment purposes?
answerer: No.
guesser: Refrigerator
evaluator: False

questioner: Is it primarily used for communication purposes?
answerer: No.
guesser: Microwave oven
evaluator: False

questioner: Is it used for cleaning or maintenance?
answerer: No.
guesser: Washing machine
evaluator: False

questioner: Is it used for temperature c

games generated:  42%|████▏     | 83/200 [1:04:49<1:30:46, 46.55s/it]

evaluator: False

Generated keyword: electric scooter
USING MODEL: gpt-4o
KEYWORD: electric scooter

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Taj Mahal
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Sphinx
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Big Ben
evaluator: False

questioner: Is it broadly related to the arts or media?


games generated:  42%|████▏     | 84/200 [1:05:39<1:31:56, 47.56s/it]

evaluator: False

Generated keyword: Pliers
USING MODEL: gpt-4o
KEYWORD: Pliers

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Warehouse
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it commonly made of metal?
answerer: Yes.
guesser: Robot
evaluator: False

questioner: Is it primarily used in construction or building?
answerer: Yes.
guesser: Steel beams
evaluator: False

questioner: Is it a type of tool or equipment?
answerer: Yes.
guesser: Hammer
evaluator: False

questioner: Is it handheld?
answerer: Yes.
guesser: Wrench
evalu

games generated:  42%|████▎     | 85/200 [1:06:07<1:19:38, 41.56s/it]

evaluator: True
Generated keyword: Protein Shakes
USING MODEL: gpt-4o
KEYWORD: Protein Shakes

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Restaurant
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Bread
evaluator: False

questioner: Is it a prepared food or dish?
answerer: Yes.
guesser: Pasta
evaluator: False

questioner: Is it typically served hot?
answerer: No.
guesser: Salad
evaluator: False

questioner: Is it a type of dessert?
answerer: No.
guesser: Sushi
evaluator: False

questioner: Is it commonly eaten for breakfast?
answerer: Yes.
guesser: Cereal
evaluator: False

questioner: Is it bread or a bread-based product?
answerer: No.
guesser: Yogurt
evaluator: False

questioner: Is it dairy-based?
answerer: Yes.
guesser: Cheese
evaluator: False

questioner: Is it a type of yogurt or yogurt-based?
answerer: No.
guesser: Milk
evaluator: Fals

games generated:  43%|████▎     | 86/200 [1:06:54<1:22:20, 43.34s/it]

evaluator: True
Generated keyword: lemon zest
USING MODEL: gpt-4o
KEYWORD: lemon zest

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Restaurant
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Kitchen
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Beverage
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Cookbook
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Utensil
evaluator: False

questioner: Is it a kitchen utensil or tool?
answerer: No.
guesser: Spice rack
evaluator: False

questioner: Is it an appliance?
answerer: No.
guesser: Recipe card
evaluator: False

questioner: Is it something that can be used for food storage?
answerer: No.
guesser: Cutting board
evaluator: False

questioner: Is it something that is used to enhance the flavor of food?
answerer: Yes.
guesser: Spice
evaluator: False

questioner: Is it a type 

games generated:  44%|████▎     | 87/200 [1:07:44<1:25:27, 45.38s/it]

evaluator: False

Generated keyword: wind vane
USING MODEL: gpt-4o
KEYWORD: wind vane

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Beach
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Museum
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Forest
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Lake
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Bridge
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Statue
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Park
evaluator: False

questioner: Is 

games generated:  44%|████▍     | 88/200 [1:08:37<1:28:50, 47.60s/it]

evaluator: False

Generated keyword: entrance door
USING MODEL: gpt-4o
KEYWORD: entrance door

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Pacific Ocean
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Sahara Desert
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Mount Kilimanjaro
evaluator: False

questioner: Is it broadly related to the 

games generated:  44%|████▍     | 89/200 [1:09:21<1:25:57, 46.47s/it]

evaluator: False

Generated keyword: Protein Shakes
USING MODEL: gpt-4o
KEYWORD: Protein Shakes

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Spaghetti
evaluator: False

questioner: Is it a prepared food or dish?
answerer: Yes.
guesser: Burger
evaluator: False

questioner: Is it typically served hot?
answerer: No.
guesser: Salad
evaluator: False

questioner: Is it a type of dessert?
answerer: No.
guesser: Sushi
evaluator: False

questioner: Is it a type of salad?
answerer: No.
guesser: Sandwich
evaluator: False

questioner: Is it commonly served as an appetizer?
answerer: No.
guesser: Cereal
evaluator: False

questioner: Is it a type of sandwich?
answerer: No.
guesser: Sushi
evaluator: False

questioner: Is it commonly found in fast food restaurants?
answerer: No.
guesser: Cheese Plate
eval

games generated:  45%|████▌     | 90/200 [1:10:10<1:26:46, 47.33s/it]

evaluator: False

Generated keyword: trouser
USING MODEL: gpt-4o
KEYWORD: trouser

questioner: Is it related to food, beverages or cooking?
answerer: No
guesser: Airplane
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No
guesser: Automobile
evaluator: False

questioner: Is it something that is used in construction or building?
answerer: No
guesser: Machine tool
evaluator: False

questioner: Is it a tool or equipment used in manufacturing processes?
answerer: No
guesser: Textile
evaluator: False

questioner: Is it a product or item that is commonly used in everyday life?
answerer: Yes
guesser: Furniture
evaluator: False

questi

games generated:  46%|████▌     | 91/200 [1:10:54<1:23:58, 46.22s/it]

evaluator: True
Generated keyword: Insulation
USING MODEL: gpt-4o
KEYWORD: Insulation

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Fire extinguisher
evaluator: False

questioner: Is it related to architectural structures or construction?
answerer: Yes.
guesser: Safety harness
evaluator: False

questioner: Is it used primarily for personal protection rather than structural integrity?
answerer: No.
guesser: Scaffolding
evaluator: False

questioner: Is it used in maintaining the stability or support of a structure?
answerer: No.
guesser: Steel beams
evaluator: False

questioner: Is it related to fire safety or prevention?
ans

games generated:  46%|████▌     | 92/200 [1:11:39<1:22:42, 45.95s/it]

evaluator: False

Generated keyword: Piping tips
USING MODEL: gpt-4o
KEYWORD: Piping tips

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Stove
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Beverage
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Knife
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Fork
evaluator: False

questioner: Is it a kitchen utensil or tool?
answerer: Yes.
guesser: Spoon
evaluator: False

questioner: Is it primarily used for cutting or slicing?
answerer: No.
guesser: Spatula
evaluator: False

questioner: Is it used for serving or eating food directly?
answerer: No.
guesser: Measuring cup
evaluator: False

questioner: Is it used for cooking or preparing food?
answerer: Yes.
guesser: Grater
evaluator: False

questioner: Is it an appliance that r

games generated:  46%|████▋     | 93/200 [1:12:34<1:26:55, 48.75s/it]

evaluator: False

Generated keyword: tile
USING MODEL: gpt-4o
KEYWORD: tile

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Warehouse
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it a type of machinery or equipment?
answerer: No.
guesser: Product
evaluator: False

questioner: Is it a finished product used by consumers?
answerer: Yes.
guesser: Car
evaluator: False

questioner: Is it commonly used in households?
answerer: Yes.
guesser: Furniture
evaluator: False

questioner: Is it related to cleaning or maintenance?
answerer: No.
gue

games generated:  47%|████▋     | 94/200 [1:13:28<1:28:47, 50.26s/it]

evaluator: False

Generated keyword: nintendo switch
USING MODEL: gpt-4o
KEYWORD: nintendo switch

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Football
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Playing card
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it primarily used for playing video games?
answerer: Yes.
guesser: Game console
evaluator: False

questioner: Is it a specific type of video game controller?
answerer: No.
guesser: Handheld gaming console
evaluator: False

questioner: Is it a handheld gaming device?
answerer: Yes.
guesser: Nintend

games generated:  48%|████▊     | 95/200 [1:13:50<1:12:57, 41.69s/it]

evaluator: True
Generated keyword: library stamp
USING MODEL: gpt-4o
KEYWORD: library stamp

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Book
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Rock
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it used for writ

games generated:  48%|████▊     | 96/200 [1:14:44<1:18:53, 45.51s/it]

evaluator: False

Generated keyword: rice krispie
USING MODEL: gpt-4o
KEYWORD: rice krispie

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Kitchen
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Chocolate
evaluator: False

questioner: Is it a prepared food or dish?
answerer: Yes.
guesser: Sandwich
evaluator: False

questioner: Is it typically served hot?
answerer: No.
guesser: Salad
evaluator: False

questioner: Is it a dessert?
answerer: Yes.
guesser: Ice cream
evaluator: False

questioner: Is it commonly baked?
answerer: No.
guesser: Pudding
evaluator: False

questioner: Is it served frozen?
answerer: No.
guesser: Cake
evaluator: False

questioner: Is it a type of candy?
answerer: No.
guesser: Fruit salad
evaluator: False

questioner: Is it a dairy product?
answerer: No.
guesser: Jelly
evaluator: False

questioner: Is it a fruit-based dessert

games generated:  48%|████▊     | 97/200 [1:15:38<1:22:36, 48.12s/it]

evaluator: False

Generated keyword: hanging basket
USING MODEL: gpt-4o
KEYWORD: hanging basket

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Beach
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Museum
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: School
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Phone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it generally used indoors?
answerer: Yes.
guesser

games generated:  49%|████▉     | 98/200 [1:16:32<1:24:27, 49.68s/it]

evaluator: False

Generated keyword: piano
USING MODEL: gpt-4o
KEYWORD: piano

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Stadium
evaluator: False

questioner: Is it a type of equipment or structure used in sports?
answerer: No.
guesser: Amusement Park
evaluator: False

questioner: Is it an electronic device?
answerer: No.
guesser: Movie
evaluator: False

questioner: Is it related to an indoor activity?
answerer: Yes.
guesser: Theater
evaluator: False

questioner: Is it used primarily for watching or listening to something?
answerer: No.
guesser: Board Game
evaluator: False

questioner: Is it a game or toy?
answere

games generated:  50%|████▉     | 99/200 [1:17:28<1:26:42, 51.51s/it]

evaluator: False

Generated keyword: Pliers
USING MODEL: gpt-4o
KEYWORD: Pliers

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it a type of machinery or equipment?
answerer: Yes.
guesser: CNC machine
evaluator: False

questioner: Is it used in construction or building-related tasks?
answerer: Yes.
guesser: Excavator
evaluator: False

questioner: Is it a hand-held tool?
answerer: Yes.
guesser: Hammer
evaluator: False

questioner: Does it require electricity to operate?
answerer: No

games generated:  50%|█████     | 100/200 [1:18:20<1:26:28, 51.88s/it]

evaluator: False

Generated keyword: Plunger
USING MODEL: gpt-4o
KEYWORD: Plunger

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Paris
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it typically found in an 

games generated:  50%|█████     | 101/200 [1:18:48<1:13:24, 44.49s/it]

evaluator: True
Generated keyword: Dimmer switch
USING MODEL: gpt-4o
KEYWORD: Dimmer switch

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: River
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: House
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Pen
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it a device you would commonly use at home?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it used for communication purposes?
answere

games generated:  51%|█████     | 102/200 [1:19:39<1:16:07, 46.61s/it]

evaluator: False

Generated keyword: thermal printer
USING MODEL: gpt-4o
KEYWORD: thermal printer

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it a consumer electronic device?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it primarily used for communication?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it typically used for entertainment purposes?
answerer: No.
guesser: Washing machine
evaluator: False

questioner: Is it used in a professional or work-related environment?
answerer: Yes.
guesser: Printer
evaluator: False

questioner: Is it a t

games generated:  52%|█████▏    | 103/200 [1:20:24<1:14:43, 46.22s/it]

evaluator: True
Generated keyword: Bow
USING MODEL: gpt-4o
KEYWORD: Bow

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: video game controller
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: baseball bat
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: Yes.
guesser: paintbrush
evaluator: False

questioner: Is it commonly used in visual arts, such as painting or drawing?
answerer: No.
guesser: musical instrument
evaluator: False

questioner: Is it related to music?
answerer: Yes.
guesser: guitar
evaluator: False

questioner: Is it an in

games generated:  52%|█████▏    | 104/200 [1:21:16<1:16:45, 47.97s/it]

evaluator: False

Generated keyword: Lug wrench
USING MODEL: gpt-4o
KEYWORD: Lug wrench

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: School
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Book
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it something typically found in a home?
answerer:

games generated:  52%|█████▎    | 105/200 [1:22:12<1:19:37, 50.29s/it]

evaluator: False

Generated keyword: golden eagle
USING MODEL: gpt-4o
KEYWORD: golden eagle

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: School
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Hospital
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Elephant
evaluator: False

questioner: Is it commonly kept as a pet?
answerer: No.
guesser: Lion
evaluator: False

questioner: Is it a wild animal?
answerer: Yes.
guesser: Tiger
evaluator: False

questioner: Is it a type of mammal?
answerer: No.
guesser: Eagle
evaluator: False

questioner: Is it a type of bird?
answerer: Yes.
guesser: Owl
evaluator: False

questioner: Does it primarily inhabit aquat

games generated:  53%|█████▎    | 106/200 [1:22:46<1:11:14, 45.47s/it]

evaluator: True
Generated keyword: Load-bearing wall
USING MODEL: gpt-4o
KEYWORD: Load-bearing wall

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Lake
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Star
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: River
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Desert
evaluator: False

questio

games generated:  54%|█████▎    | 107/200 [1:23:34<1:11:29, 46.13s/it]

evaluator: False

Generated keyword: corn snake
USING MODEL: gpt-4o
KEYWORD: corn snake

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Elephant
evaluator: False

questioner: Is it a domesticated animal?
answerer: No.
guesser: Tiger
evaluator: False

questioner: Is it a mammal?
answerer: No.
guesser: Eagle
evaluator: False

questioner: Is it a type of bird?
answerer: No.
guesser: Shark
evaluator: False

questioner: Is it a type of reptile?
answerer: Yes.
guesser: Crocodile
evaluator: False

questioner: Is it primarily f

games generated:  54%|█████▍    | 108/200 [1:24:22<1:11:26, 46.59s/it]

evaluator: True
Generated keyword: Lemon
USING MODEL: gpt-4o
KEYWORD: Lemon

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Kitchen
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Wheat
evaluator: False

questioner: Is it a type of fruit?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it commonly consumed raw?
answerer: No.
guesser: Banana
evaluator: False

questioner: Is it typically used in cooking or baking?
answerer: Yes.
guesser: Pumpkin
evaluator: False

questioner: Is it often used in sweet dishes or desserts?
answerer: Yes.
guesser: Cherry
evaluator: False

questioner: Is it a type of fruit that grows on a tree?
answerer: Yes.
guesser: Peach
evaluator: False

questioner: Is it a type of stone fruit?
answerer: No.
guesser: Pear
evaluator: False

questioner: Is it an apple?
answerer: No.
guesser: Fig
evaluator: False

ques

games generated:  55%|█████▍    | 109/200 [1:24:49<1:02:04, 40.93s/it]

evaluator: True
Generated keyword: Nightstand
USING MODEL: gpt-4o
KEYWORD: Nightstand

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Pyramids of Giza
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Moon
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Pacific Ocean
evaluator: False

questioner: Is it furniture?
answerer: Yes.
guesser: Chair
evaluator: False

questioner: Is it typically found in a living room?
answerer: No.

games generated:  55%|█████▌    | 110/200 [1:25:17<55:30, 37.01s/it]  

evaluator: True
Generated keyword: Board game
USING MODEL: gpt-4o
KEYWORD: Board game

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Soccer ball
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Playing cards
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Baseball bat
evaluator: False

questioner: Is it used in sports or physical activities?
answerer: No.
guesser: Board game piece
evaluator: False

questioner: Is it commonly found indoors?
answerer: Yes.
guesser: Book
evaluator: False

questioner: I

games generated:  56%|█████▌    | 111/200 [1:25:36<46:50, 31.58s/it]

evaluator: True
Generated keyword: Jeep
USING MODEL: gpt-4o
KEYWORD: Jeep

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Car factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Steel mill
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Warehouse
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Textile factory
evaluator: False

questioner: Is it a machine or a tool?
answerer: Yes.
guesser: Lathe
evaluator: False

questioner: Is it typically used in construction or building?
answerer: No.
guesser: 3D printer
evaluator: False

questioner: Is it commonly found in a factory setting?
answerer: No.
guesser: Drill Press
evaluator: False

questioner: Is it used for crafting or small-scale producti

games generated:  56%|█████▌    | 112/200 [1:26:25<53:51, 36.72s/it]

evaluator: False

Generated keyword: Cargo crate
USING MODEL: gpt-4o
KEYWORD: Cargo crate

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it used primarily in construction or building?
answerer: No.
guesser: Vehicle
evaluator: False

questioner: Is it a machine or machinery component?
answerer: No.
guesser: Textile
evaluator: False

questioner: Is it a product used for packaging or transporting goods?
answerer: Yes.
guesser: Box
evaluator: False

questioner: Is it primarily made of plasti

games generated:  56%|█████▋    | 113/200 [1:27:07<55:42, 38.42s/it]

evaluator: True
Generated keyword: Milkshake
USING MODEL: gpt-4o
KEYWORD: Milkshake

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Cooking utensils
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Beverage
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Spice jar
evaluator: False

questioner: Is it a beverage?
answerer: Yes.
guesser: Coffee
evaluator: False

questioner: Is it a non-alcoholic beverage?
answerer: Yes.
guesser: Tea
evaluator: False

questioner: Is it commonly served hot?
answerer: No.
guesser: Juice
evaluator: False

questioner: Is it carbonated?
answerer: No.
guesser: Water
evaluator: False

questioner: Is it a type of fruit juice?
answerer: No.
guesser: Milk
evaluator: False

questioner: Is it primarily consumed for hydration?
answerer: No.
guesser: Soda
evaluator: False

ques

games generated:  57%|█████▋    | 114/200 [1:27:46<55:18, 38.59s/it]

evaluator: True
Generated keyword: Area rug
USING MODEL: gpt-4o
KEYWORD: Area rug

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Moon
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Pyramids of Giza
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Pacific Ocean
evaluator: False

questioner: Is it broadly related to the arts or media?
answere

games generated:  57%|█████▊    | 115/200 [1:28:30<57:01, 40.25s/it]

evaluator: False

Generated keyword: butterfly weed
USING MODEL: gpt-4o
KEYWORD: butterfly weed

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car.
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree.
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book.
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Museum
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: Yes.
guesser: Human
evaluator: False

questioner: Is it a plant?
answerer: Yes.
guesser: Flower
evaluator: False

questioner: Is it a type of tree?
answerer: No.
guesser: Cactus
evaluator: False

questioner: Is it a type o

games generated:  58%|█████▊    | 116/200 [1:29:18<59:32, 42.53s/it]

evaluator: False

Generated keyword: Sleep mask
USING MODEL: gpt-4o
KEYWORD: Sleep mask

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Museum
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Watch
evaluator: False

questioner: Is it primarily worn on the upper body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it typically worn on the lower body?
answerer: No.
guesser: Hat
evaluator: False

questioner: Is it an accessory?
answerer: Yes.
guesser: Necklace
evaluator: False

questioner: Is it commonly worn on the head or face?
answerer: Yes.
guesser: Sunglasses
evaluator: False

questioner: Is it typically worn to protect from the sun?
answerer: No.
gues

games generated:  58%|█████▊    | 117/200 [1:30:13<1:04:01, 46.29s/it]

evaluator: False

Generated keyword: blue jay
USING MODEL: gpt-4o
KEYWORD: blue jay

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Ocean
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a domesticated animal?
answerer: No.
guesser: Lion
evaluator: False

questioner: Is it a mammal?
answerer: No.
guesser: Eagle
evaluator: False

questioner: Is it a bird?
answerer: Yes.
guesser: Parrot
evaluator: False

questioner: Is it known for its ability to fly?
answerer: Yes.
guesser: Falcon
evaluator: False

questioner: Is it a bird of prey?
answerer: No.
guesser:

games generated:  59%|█████▉    | 118/200 [1:30:44<56:56, 41.67s/it]  

evaluator: True
Generated keyword: Basil
USING MODEL: gpt-4o
KEYWORD: Basil

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Kitchen utensil
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Farm
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Tractor
evaluator: False

questioner: Is it a type of machine or tool?
answerer: No.
guesser: Irrigation system
evaluator: False

questioner: Is it a crop or plant?
answerer: Yes.
guesser: Corn
evaluator: False

questioner: Is it a type of fruit?
answerer: No.
guesser: Tomato
evaluator: False

questioner: Is it a type of vegetable?
answerer: No.
guesser: Wheat
evaluator: False

questioner: Is it a type of grain or cereal?
answerer: No.
guesser: Soybean
evaluator: False

questioner: Is it a type of herb or spice?
answerer: Yes.
guesser: Basil


games generated:  60%|█████▉    | 119/200 [1:31:05<47:57, 35.52s/it]

evaluator: True
Generated keyword: lime juice
USING MODEL: gpt-4o
KEYWORD: lime juice

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Water bottle
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Tractor
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Farm
evaluator: False

questioner: Is it related to tools or machinery used in agriculture?
answerer: No.
guesser: Fertilizer
evaluator: False

questioner: Is it a type of plant or crop?
answerer: No.
guesser: Irrigation system
evaluator: False

questioner: Is it a process or method used in agriculture?
answerer: No.
guesser: Barn
evaluator: False

questioner: Is it a product of agriculture?
answerer: Yes.
guesser: Honey
evaluator: False

questioner: Is it a natural product, not processed or manufactured?
answerer: No.
guesser: Cheese
evaluator: Fals

games generated:  60%|██████    | 120/200 [1:32:00<55:02, 41.28s/it]

evaluator: False

Generated keyword: Straw stack
USING MODEL: gpt-4o
KEYWORD: Straw stack

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Museum
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Beach
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Bridge
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Lake
evaluator: False

que

games generated:  60%|██████    | 121/200 [1:32:46<56:19, 42.78s/it]

evaluator: False

Generated keyword: Chain link fence
USING MODEL: gpt-4o
KEYWORD: Chain link fence

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Safety goggles
evaluator: False

questioner: Is it related to architectural structures or construction?
answerer: Yes.
guesser: Safety harness
evaluator: False

questioner: Is it a type of personal protective equipment (PPE)?
answerer: No.
guesser: Scaffolding
evaluator: False

questioner: Is it a tool or device used in the construction process?
answerer: No.
guesser: Fire extinguisher
evaluator: False

questioner: Is it a component or material used in constructing buildings?


games generated:  61%|██████    | 122/200 [1:33:40<59:59, 46.14s/it]

evaluator: False

Generated keyword: Switch
USING MODEL: gpt-4o
KEYWORD: Switch

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Soccer Ball
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Playing Cards
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it primarily used for gaming?
answerer: Yes.
guesser: Game console
evaluator: False

questioner: Is it a type of handheld gaming console?
answerer: Yes.
guesser: Nintendo Switch
evaluator: False

questioner: Is it made by Nintendo?
answerer: Yes.
guesser: Nintendo Game Boy
evaluator: False

questioner: Is it 

games generated:  62%|██████▏   | 123/200 [1:34:31<1:01:01, 47.55s/it]

evaluator: False

Generated keyword: Backpacks
USING MODEL: gpt-4o
KEYWORD: Backpacks

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Gucci Handbag
evaluator: False

questioner: Is it primarily worn on the body?
answerer: Yes.
guesser: Necklace
evaluator: False

questioner: Is it typically worn on the upper body?
answerer: Yes.
guesser: T-shirt
evaluator: False

questioner: Is it usually designed to be worn for formal or professional occasions?
answerer: No.
guesser: Sweater
evaluator: False

questioner: Is it generally considered casual or everyday wear?
answerer: Yes.
guesser: T-shirt
eva

games generated:  62%|██████▏   | 124/200 [1:35:21<1:00:56, 48.11s/it]

evaluator: False

Generated keyword: Laser sight
USING MODEL: gpt-4o
KEYWORD: Laser sight

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Golden Gate Bridge
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Laptop
evaluator: False

questioner: Is it primarily used for communication?
answerer: No.
guesser: Camera
evaluator: False

questioner: I

games generated:  62%|██████▎   | 125/200 [1:36:12<1:01:19, 49.06s/it]

evaluator: False

Generated keyword: Bumblebee
USING MODEL: gpt-4o
KEYWORD: Bumblebee

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: remote control
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: video game controller
evaluator: False

questioner: Is it primarily used for gaming?
answerer: No.
guesser: camera
evaluator: False

questioner: Is it a type of audio or video equipment?
answerer: No.
guesser: smartphone
evaluator: False

questioner: Is it used for communication purposes?
answerer: No.
guesser: VR headset
evaluator: False

questioner: Is it commonly used to w

games generated:  63%|██████▎   | 126/200 [1:37:02<1:00:55, 49.40s/it]

evaluator: False

Generated keyword: Transformer
USING MODEL: gpt-4o
KEYWORD: Transformer

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Steel Mill
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it a consumer product?
answerer: No.
guesser: Semiconductor fabrication plant
evaluator: False

questioner: Is it used primarily in the process of manufacturing electronic devices?
answerer: No.
guesser: Supercomputer
evaluator: False

questioner: Is it a type of industrial machinery or equipment?
answerer: Yes.
guesser: 3D printer
evaluator: False

questioner: Is it used for machining or material processing?
answerer: No.
guesser: Robotic Ar

games generated:  64%|██████▎   | 127/200 [1:37:29<51:52, 42.64s/it]  

evaluator: True
Generated keyword: Protractor
USING MODEL: gpt-4o
KEYWORD: Protractor

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Pyramids of Giza
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Niagara Falls
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Key
evaluator: False

questioner: Is it common

games generated:  64%|██████▍   | 128/200 [1:38:12<51:10, 42.65s/it]

evaluator: True
Generated keyword: signature
USING MODEL: gpt-4o
KEYWORD: signature

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Golden Gate Bridge
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Moon
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Pyramids of Giza
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it broadly related to the arts or media?


games generated:  64%|██████▍   | 129/200 [1:39:01<52:57, 44.76s/it]

evaluator: False

Generated keyword: electric range
USING MODEL: gpt-4o
KEYWORD: electric range

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Oven
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Coffee maker
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Restaurant
evaluator: False

questioner: Is it typically found in a kitchen?
answerer: Yes.
guesser: Refrigerator
evaluator: False

questioner: Is it an appliance?
answerer: Yes.
guesser: Microwave
evaluator: False

questioner: Is it used for cooking or baking?
answerer: Yes.
guesser: Stove
evaluator: False

questioner: Is it primarily used on the countertop?
answerer: No.
guesser: Dishwasher
evaluator: False

questioner: Is it primarily used within the oven or stove area?
answerer: Yes.
guesser: Toaster oven
evaluator: False

questioner: I

games generated:  65%|██████▌   | 130/200 [1:39:27<45:25, 38.94s/it]

evaluator: True
Generated keyword: refrigerator
USING MODEL: gpt-4o
KEYWORD: refrigerator

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Restaurant
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee shop
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Kitchen
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Refrigerator


games generated:  66%|██████▌   | 131/200 [1:39:34<33:58, 29.54s/it]

evaluator: True
Generated keyword: engagement ring
USING MODEL: gpt-4o
KEYWORD: engagement ring

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Vehicle
evaluator: False

questioner: Is it a type of machinery or equipment used in manufacturing processes?
answerer: No.
guesser: Plastic
evaluator: False

questioner: Is it a product commonly used in construction or building?
answerer: No.
guesser: Textiles
evaluator: False

questioner: Is it a finished consumer product?
answerer: Yes.
guesser: Furniture
evaluator: False

q

games generated:  66%|██████▌   | 132/200 [1:40:22<39:47, 35.10s/it]

evaluator: False

Generated keyword: Foundation
USING MODEL: gpt-4o
KEYWORD: Foundation

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car.
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Perfume
evaluator: False

questioner: Is it something that is typically worn on the upper part of the body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it commonly worn on the lower part of the body?
answerer: No.
guesser: Hat
evaluator: False

questioner: Is it an accessory rather than a piece of clothing?
answerer: No.
guesser: Makeup
evaluator: False

questioner: Is it related to beauty products, such as skincare or makeup?
answerer: Yes.
guesser: Lotion
evaluat

games generated:  66%|██████▋   | 133/200 [1:40:45<34:56, 31.30s/it]

evaluator: True
Generated keyword: Electric guitar
USING MODEL: gpt-4o
KEYWORD: Electric guitar

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Computer
evaluator: False

questioner: Is it a device that is commonly used in households?
answerer: No.
guesser: Industrial robot
evaluator: False

questioner: Is it used primarily in a professional or industrial setting?
answerer: No.
guesser: Semiconductor
evaluator: False

questioner: Is it a component that might be used within larger technological systems?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it a device primarily used for personal or individual use?
an

games generated:  67%|██████▋   | 134/200 [1:41:35<40:47, 37.09s/it]

evaluator: True
Generated keyword: Flagging tape
USING MODEL: gpt-4o
KEYWORD: Flagging tape

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Helmet
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Safety glasses
evaluator: False

questioner: Is it related to architectural structures or construction?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it a type of protective gear worn by workers?
answerer: No.
guesser: Safety harness
evaluator: False

questioner: Is it a type of safety mechanism or device installed in buildings or structures?
answerer: No.
guesser: Fire extinguisher
evaluator: False

questioner: Is it a tool or equipment used in construction for safety

games generated:  68%|██████▊   | 135/200 [1:42:32<46:31, 42.95s/it]

evaluator: False

Generated keyword: electric range
USING MODEL: gpt-4o
KEYWORD: electric range

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Beverage
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Refrigerator
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Restaurant
evaluator: False

questioner: Is it a type of kitchen appliance?
answerer: Yes.
guesser: Blender
evaluator: False

questioner: Is it used for cooking or preparing food?
answerer: Yes.
guesser: Oven
evaluator: False

questioner: Is it used for heating food?
answerer: Yes.
guesser: Microwave
evaluator: False

questioner: Is it typically placed on a countertop?
answerer: No.
guesser: Stove
evaluator: False

questioner: Is it an oven?
answerer: No.
guesser: Stovetop
evaluator: False

questioner: Is it a stove or range?
answerer: 

games generated:  68%|██████▊   | 136/200 [1:42:55<39:22, 36.92s/it]

evaluator: True
Generated keyword: ATV
USING MODEL: gpt-4o
KEYWORD: ATV

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Stadium
evaluator: False

questioner: Is it primarily used outdoors?
answerer: Yes.
guesser: Soccer field
evaluator: False

questioner: Is it a type of equipment or gear used in sports?
answerer: Yes.
guesser: Basketball hoop
evaluator: False

questioner: Is it used in a competitive team sport?
answerer: No.
guesser: Golf club
evaluator: False

questioner: Is it used in an individual sport?
answerer: Yes.
guesser: Tennis court
evaluator: False

questioner: Is it commonly used in water sports?
answerer

games generated:  68%|██████▊   | 137/200 [1:43:41<41:44, 39.75s/it]

evaluator: True
Generated keyword: green anaconda
USING MODEL: gpt-4o
KEYWORD: green anaconda

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Forest
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a domesticated animal?
answerer: No.
guesser: Lion
evaluator: False

questioner: Is it a mammal?
answerer: No.
guesser: Eagle
evaluator: False

questioner: Is it a bird?
answerer: No.
guesser: Shark
evaluator: False

questioner: Is it a reptile?
answerer: Yes.
guesser: Snake
evaluator: False

questioner: Is it commonly found in the wild in deserts?
answerer: No.

games generated:  69%|██████▉   | 138/200 [1:44:36<45:45, 44.28s/it]

evaluator: False

Generated keyword: Garden sprayer
USING MODEL: gpt-4o
KEYWORD: Garden sprayer

questioner: Is it related to food, beverages or cooking?
answerer: No
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No
guesser: Golden Gate Bridge
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No
guesser: Mount Everest
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No
guesser: Great Wall of China
evaluator: False

questioner: Is it an animal?
answerer: No
guesser: Statue of Liberty
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No
guesser: Pen
evaluator: False

questioner: Is i

games generated:  70%|██████▉   | 139/200 [1:45:26<46:37, 45.85s/it]

evaluator: False

Generated keyword: Table Centerpieces
USING MODEL: gpt-4o
KEYWORD: Table Centerpieces

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tower of London
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: pen
evaluator: False

ques

games generated:  70%|███████   | 140/200 [1:46:42<54:55, 54.92s/it]

evaluator: False

Generated keyword: Scrunchie
USING MODEL: gpt-4o
KEYWORD: Scrunchie

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Metal fabrication
evaluator: False

questioner: Is it used in construction or building?
answerer: No.
guesser: Textile mill
evaluator: False

questioner: Is it related to textiles or fabrics?
answerer: Yes.
guesser: Loom
evaluator: False

questioner: Is it primarily used in the production of clothing?
answerer: No.
guesser: Upholstery
evaluator: False

questioner: Is it associated with furnitu

games generated:  70%|███████   | 141/200 [1:47:31<52:25, 53.31s/it]

evaluator: False

Generated keyword: Teddy bear
USING MODEL: gpt-4o
KEYWORD: Teddy bear

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it a type of machine or machinery?
answerer: No.
guesser: Products
evaluator: False

questioner: Is it a tool or instrument used in manufacturing?
answerer: No.
guesser: Raw materials
evaluator: False

questioner: Is it a type of product created through manufacturing?
answerer: Yes.
guesser: Automobile
evaluator: False

questioner: Is it commonly used in 

games generated:  71%|███████   | 142/200 [1:48:25<51:41, 53.48s/it]

evaluator: False

Generated keyword: Barracuda
USING MODEL: gpt-4o
KEYWORD: Barracuda

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a mammal?
answerer: No.
guesser: Chicken
evaluator: False

questioner: Is it a bird?
answerer: No.
guesser: Fish
evaluator: False

questioner: Is it a type of reptile?
answerer: No.
guesser: Insect
evaluator: False

questioner: Is it an amphibian?
answerer: No.
guesser: Crab
evaluator: False

questioner: Is it an insect?
answerer: No.
guesser: Fish
evaluator: False

qu

games generated:  72%|███████▏  | 143/200 [1:49:04<46:42, 49.16s/it]

evaluator: True
Generated keyword: Hair Spray
USING MODEL: gpt-4o
KEYWORD: Hair Spray

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Handbag
evaluator: False

questioner: Is it something that people wear on their feet?
answerer: No.
guesser: Necklace
evaluator: False

questioner: Is it primarily worn on the upper body?
answerer: No.
guesser: Bracelet
evaluator: False

questioner: Is it an accessory rather than a main article of clothing?
answerer: Yes.
guesser: Hat
evaluator: False

questioner: Is it commonly worn on the head?
answerer: No.
guesser: Belt
evaluator: False

questioner: Is it

games generated:  72%|███████▏  | 144/200 [1:50:04<48:59, 52.50s/it]

evaluator: False

Generated keyword: Envelope
USING MODEL: gpt-4o
KEYWORD: Envelope

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Key
evaluator: False

questioner: Is it a type of tool?
answerer: No.
guesser: Toy
evaluator: False


games generated:  72%|███████▎  | 145/200 [1:50:41<43:51, 47.85s/it]

evaluator: True
Generated keyword: Shoe Bench
USING MODEL: gpt-4o
KEYWORD: Shoe Bench

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Watch
evaluator: False

questioner: Is it something that is typically worn?
answerer: No.
guesser: Perfume
evaluator: False

questioner: Is it used primarily for grooming or personal care?
answerer: No.
guesser: Sewing machine
evaluator: False

questioner: Is it a type of accessory?
answerer: No.
guesser: Mirror
evaluator: False

questioner: Is it used for storage or organization?
answerer: Yes.
guesser: Wardrobe
evaluator: False

questioner: Is it often found in a bedroom or 

games generated:  73%|███████▎  | 146/200 [1:51:20<40:32, 45.05s/it]

evaluator: True
Generated keyword: Sorbet maker
USING MODEL: gpt-4o
KEYWORD: Sorbet maker

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Refrigerator
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Oven
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Cup
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Fork
evaluator: False

questioner: Is it a kitchen utensil or tool?
answerer: Yes.
guesser: Spatula
evaluator: False

questioner: Is it primarily used for cutting or chopping?
answerer: No.
guesser: Whisk
evaluator: False

questioner: Is it an electrical appliance?
answerer: Yes.
guesser: Blender
evaluator: False

questioner: Is it used for mixing or blending ingredients?
answerer: Yes.
guesser: Food processor
evaluator: False

questioner: Is it a handheld device?
answere

games generated:  74%|███████▎  | 147/200 [1:52:07<40:15, 45.58s/it]

evaluator: False

Generated keyword: CD player
USING MODEL: gpt-4o
KEYWORD: CD player

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Baseball bat
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it primarily used for playing video games or watching media?
answerer: No.
guesser: Camera
evaluator: False

questioner: Is it a device used for communication?
answerer: No.
guesser: Drone
evaluator: False

questioner: Is it a piece of sports equipment?
answerer: No.
guesser: Virtual reality headset
evaluator: 

games generated:  74%|███████▍  | 148/200 [1:52:46<37:48, 43.62s/it]

evaluator: True
Generated keyword: bookmark
USING MODEL: gpt-4o
KEYWORD: bookmark

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Watch
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Balloon
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it a tool or an instrument used for a specific purpose?

games generated:  74%|███████▍  | 149/200 [1:53:32<37:42, 44.37s/it]

evaluator: False

Generated keyword: Chai tea
USING MODEL: gpt-4o
KEYWORD: Chai tea

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Wine
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Tractor
evaluator: False

questioner: Is it typically used in the cultivation process?
answerer: No.
guesser: Pesticide
evaluator: False

questioner: Is it a finished agricultural product?
answerer: Yes.
guesser: Milk
evaluator: False

questioner: Is it a beverage?
answerer: Yes.
guesser: Juice
evaluator: False

questioner: Is it alcoholic?
answerer: No.
guesser: Tea
evaluator: False

questioner: Is it commonly consumed hot?
answerer: Yes.
guesser: Hot chocolate
evaluator: False

questioner: Is it made from coffee beans?
answerer: No.
guesser: Green tea
evaluat

games generated:  75%|███████▌  | 150/200 [1:54:15<36:38, 43.97s/it]

evaluator: False

Generated keyword: Hair clip
USING MODEL: gpt-4o
KEYWORD: Hair clip

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Dress
evaluator: False

questioner: Is it something that is primarily worn or used by a specific gender?
answerer: No.
guesser: Watch
evaluator: False

questioner: Is it typically worn on the upper body?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it an item of clothing?
answerer: No.
guesser: Necklace
evaluator: False

questioner: Is it an accessory?
answerer: Yes.
guesser: Scarf
evaluator: False

questioner: Is it typically worn on the head?
answerer: Yes.
guesser: Ha

games generated:  76%|███████▌  | 151/200 [1:54:57<35:26, 43.40s/it]

evaluator: True
Generated keyword: Falcon
USING MODEL: gpt-4o
KEYWORD: Falcon

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Beach
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Hospital
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it a domesticated animal?
answerer: No.
guesser: Lion
evaluator: False

questioner: Is it primarily found in aquatic environments?
answerer: No.
guesser: Elephant
evaluator: False

questioner: Is it typically found in forests or wooded areas?
answerer: No.
guesser: Eagle
evaluator: False

questioner: Is it a type of bird?
answerer: Yes.
guesser: Penguin
evaluator: False

qu

games generated:  76%|███████▌  | 152/200 [1:55:19<29:33, 36.94s/it]

evaluator: True
Generated keyword: Gift card
USING MODEL: gpt-4o
KEYWORD: Gift card

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: House
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Phone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it used for writing or drawing?
answerer: No.
guesser: Key
ev

games generated:  76%|███████▋  | 153/200 [1:56:04<30:48, 39.34s/it]

evaluator: False

Generated keyword: Flashlight
USING MODEL: gpt-4o
KEYWORD: Flashlight

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: School
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Book
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it something a person commonly uses on a daily basis?
answerer: No.
guesser: Drone
evaluator: False

questioner: Is it a device used for communication?
a

games generated:  77%|███████▋  | 154/200 [1:56:49<31:27, 41.02s/it]

evaluator: False

Generated keyword: clasp
USING MODEL: gpt-4o
KEYWORD: clasp

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Necklace
evaluator: False

questioner: Is it primarily worn on the upper body?
answerer: No.
guesser: Shoes
evaluator: False

questioner: Is it primarily worn on the lower body?
answerer: No.
guesser: Hat
evaluator: False

questioner: Is it an accessory rather than an article of clothing?
answerer: Yes.
guesser: Watch
evaluator: False

questioner: Is it commonly worn on the head?
answerer: No.
guesser: Bracelet
evaluator: False

questioner: Is it commonly worn on the hands?
answerer: No.

games generated:  78%|███████▊  | 155/200 [1:57:34<31:44, 42.33s/it]

evaluator: False

Generated keyword: engagement ring
USING MODEL: gpt-4o
KEYWORD: engagement ring

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Tool
evaluator: False

questioner: Is it a type of machine or equipment used in manufacturing?
answerer: No.
guesser: Raw materials
evaluator: False

questioner: Is it a chemical or a type of chemical compound?
answerer: No.
guesser: Product
evaluator: False

questioner: Is it related to construction or building materials?
answerer: No.
guesser: Textile
evaluator: False

ques

games generated:  78%|███████▊  | 156/200 [1:58:20<31:48, 43.37s/it]

evaluator: False

Generated keyword: Fruit puree
USING MODEL: gpt-4o
KEYWORD: Fruit puree

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: apple
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: bread
evaluator: False

questioner: Is it a type of fruit?
answerer: No.
guesser: potato
evaluator: False

questioner: Is it a type of vegetable?
answerer: No.
guesser: rice
evaluator: False

questioner: Is it an animal product?
answerer: No.
guesser: wheat
evaluator: False

questioner: Is it a type of grain or cereal?
answerer: No.
guesser: herb
evaluator: False

questioner: Is it a type of herb or spice?
answerer: No.
guesser: nut
evaluator: False

questioner: Is it a type of nut or seed?
answerer: No.
guesser: mushroom
evaluator: False

questioner: Is it a type of legume?
answerer: No.
guesser: sugarcane
evaluator: False

questioner: Is it a processed o

games generated:  78%|███████▊  | 157/200 [1:59:07<31:53, 44.50s/it]

evaluator: False

Generated keyword: Dishwasher
USING MODEL: gpt-4o
KEYWORD: Dishwasher

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Beach
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Museum
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: City
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Bridge
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Space
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.
guesser: Ocean
evaluator: False

questione

games generated:  79%|███████▉  | 158/200 [1:59:42<29:10, 41.69s/it]

evaluator: True
Generated keyword: Canary
USING MODEL: gpt-4o
KEYWORD: Canary

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: Elephant
evaluator: False

questioner: Is it a domesticated animal?
answerer: Yes.
guesser: Dog
evaluator: False

questioner: Is it commonly kept as a pet?
answerer: Yes.
guesser: Cat
evaluator: False

questioner: Is it a type of mammal?
answerer: No.
guesser: Parrot
evaluator: False

questioner: Is it a type of bird?
answerer: Yes.
guesser: Canary


games generated:  80%|███████▉  | 159/200 [2:00:00<23:34, 34.50s/it]

evaluator: True
Generated keyword: Trowel
USING MODEL: gpt-4o
KEYWORD: Trowel

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Balloon
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Keychain
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Toy
evaluator: False

questioner: Is it typically found in a household setting?
answerer: Yes.

games generated:  80%|████████  | 160/200 [2:00:47<25:27, 38.18s/it]

evaluator: True
Generated keyword: Catcher
USING MODEL: gpt-4o
KEYWORD: Catcher

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Disneyland
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Soccer field
evaluator: False

questioner: Is it a type of location used for entertainment or sports?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it a form of media, such as a film, TV show, or book?
answerer: No.
guesser: Roller coaster
evaluator: False

questioner: Is it a type of equipment or gear used in sports or entertainment?
answerer: No.
guesser: Concert ticket
evaluator: False

questioner: Is it an activity or type of game?
answerer: No.
guesse

games generated:  80%|████████  | 161/200 [2:01:39<27:35, 42.45s/it]

evaluator: False

Generated keyword: glove
USING MODEL: gpt-4o
KEYWORD: glove

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Park
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Movie theater
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Stadium
evaluator: False

questioner: Is it a location where entertainment or sports events take place?
answerer: No.
guesser: Television
evaluator: False

questioner: Is it a form of media, such as a movie, book, or video game?
answerer: No.
guesser: Concert
evaluator: False

questioner: Is it equipment or gear used in sports or entertainment?
answerer: Yes.
guesser: Basketball
evaluator: False

questioner: Is it primarily used in outdoor sports?
answerer: Yes.
guesser: Soccer ball
evaluator:

games generated:  81%|████████  | 162/200 [2:02:27<27:53, 44.03s/it]

evaluator: False

Generated keyword: Smoke detector
USING MODEL: gpt-4o
KEYWORD: Smoke detector

questioner: Is it related to food, beverages or cooking?
answerer: No
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Train
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Flower
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it a device used for communication?
answerer: No.
guesser: Camera
evaluator: False

questioner: Is it typically powered by batteries?
answerer: Yes.

games generated:  82%|████████▏ | 163/200 [2:03:38<32:12, 52.22s/it]

evaluator: True
Generated keyword: urn
USING MODEL: gpt-4o
KEYWORD: urn

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Sahara Desert
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Pen
evaluator: False

questioner: Is it commonly found in h

games generated:  82%|████████▏ | 164/200 [2:04:25<30:24, 50.67s/it]

evaluator: False

Generated keyword: Smartphone
USING MODEL: gpt-4o
KEYWORD: Smartphone

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Smartphone


games generated:  82%|████████▎ | 165/200 [2:04:33<21:59, 37.70s/it]

evaluator: True
Generated keyword: Digital audio player
USING MODEL: gpt-4o
KEYWORD: Digital audio player

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Chair
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Playing card
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it primarily used for gaming?
answerer: No.
guesser: Remote control
evaluator: False

questioner: Is it primarily used for watching or playing media, such as movies, music, or photos?
answerer: Yes.
guesser: Smartphone
evaluator: False

questioner: Is it a portable device?
answerer: Yes.
guesser: Tablet


games generated:  83%|████████▎ | 166/200 [2:04:55<18:44, 33.07s/it]

evaluator: True
Generated keyword: Bottle opener
USING MODEL: gpt-4o
KEYWORD: Bottle opener

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Refrigerator
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Oven
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Knife
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Spatula
evaluator: False

questioner: Is it used in the process of cooking or preparing food?
answerer: No.
guesser: Plate
evaluator: False

questioner: Is it typically found in a kitchen?
answerer: Yes.
guesser: Cup
evaluator: False

questioner: Is it a kitchen tool or utensil?
answerer: Yes.
guesser: Fork
evaluator: False

questioner: Is it primarily used for cutting or slicing?
answerer: No.
guesser: Whisk
evaluator: False

questioner: Is it used for measuring ingr

games generated:  84%|████████▎ | 167/200 [2:05:35<19:21, 35.20s/it]

evaluator: True
Generated keyword: Rubber
USING MODEL: gpt-4o
KEYWORD: Rubber

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: Yes.
guesser: Seatbelt
evaluator: False

questioner: Is it used primarily by construction or heavy machinery operators?
answerer: No.
guesser: Airbag
evaluator: False

questioner: Is it used primarily in personal vehicles, such as cars or motorcycles?
answerer: Yes.
guesser: Crash helmet
evaluator: False

questioner: Is it a device used for monitoring or tracking?
answerer: No.
guesser: Child safety seat
evaluator: False

questioner: Is it a type of protective gear or clothing?
answerer: Yes.
guesser: Gloves
evaluator:

games generated:  84%|████████▍ | 168/200 [2:06:22<20:36, 38.63s/it]

evaluator: False

Generated keyword: Condenser
USING MODEL: gpt-4o
KEYWORD: Condenser

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Bicycle
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Machine
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Factory
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Computer
evaluator: False

questioner: Is it a consumer electronic device?
answerer: No.
guesser: Semiconductor
evaluator: False

questioner: Is it primarily used in industrial or commercial settings?
answerer: Yes.
guesser: Industrial robot
evaluator: False

questioner: Is it a type of machinery or equipment used in manufacturing processes?
answerer: Yes.
guesser: 3D printer
evaluator: False

questioner: Is it a machine or equipment used for assembly or packaging?
answerer: No.
guesser: CNC machi

games generated:  84%|████████▍ | 169/200 [2:07:15<22:15, 43.08s/it]

evaluator: False

Generated keyword: Popcorn
USING MODEL: gpt-4o
KEYWORD: Popcorn

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Carrot
evaluator: False

questioner: Is it a type of vegetable?
answerer: No.
guesser: Bread
evaluator: False

questioner: Is it a type of fruit?
answerer: No.
guesser: Wheat
evaluator: False

questioner: Is it a type of grain or cereal?
answerer: Yes.
guesser: Rice
evaluator: False

questioner: Is it commonly used to make bread?
answerer: No.
guesser: Corn
evaluator: False

questioner: Is it commonly consumed as a side dish or staple food?
answerer: No.
guesser: Quinoa
evaluator: False

questioner: Is it typically used as an ingredient in baking?
answerer: No.
guesser: Oats
evaluator: False

questioner: Is it often used in breakfast cereals?
answerer: No.
guesser

games generated:  85%|████████▌ | 170/200 [2:07:43<19:14, 38.49s/it]

evaluator: True
Generated keyword: Grater
USING MODEL: gpt-4o
KEYWORD: Grater

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Cake
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Restaurant
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Fork
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Spoon
evaluator: False

questioner: Is it a cooking utensil?
answerer: Yes.
guesser: Spatula
evaluator: False

questioner: Is it primarily used for cutting or chopping?
answerer: No.
guesser: Whisk
evaluator: False

questioner: Is it used for stirring or mixing?
answerer: No.
guesser: Pot
evaluator: False

questioner: Is it used for measuring ingredients?
answerer: No.
guesser: Pan
evaluator: False

questioner: Is it used for baking?
answerer: No.
guesser: Tongs
evaluator: False

que

games generated:  86%|████████▌ | 171/200 [2:08:15<17:41, 36.60s/it]

evaluator: True
Generated keyword: Pavers
USING MODEL: gpt-4o
KEYWORD: Pavers

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Automobile
evaluator: False

questioner: Is it a type of machinery or tool?
answerer: No.
guesser: Textile
evaluator: False

questioner: Is it a product used in construction or building?
answerer: Yes.
guesser: Concrete
evaluator: False

questioner: Is it primarily used for structural support?
answerer: No.
guesser: Paint
evaluator: False

questioner: Is it used for interior elements or finishes?
answe

games generated:  86%|████████▌ | 172/200 [2:08:55<17:30, 37.52s/it]

evaluator: True
Generated keyword: Dog crate
USING MODEL: gpt-4o
KEYWORD: Dog crate

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Airplane
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Building
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it furniture?
answerer: Yes.
guesser: Chair
evaluator: False

questioner: Is it primarily used for sitting?
answerer: No.
guesser: Table
evaluator: False

questioner: Is it primari

games generated:  86%|████████▋ | 173/200 [2:09:37<17:28, 38.84s/it]

evaluator: False

Generated keyword: Chicken breast
USING MODEL: gpt-4o
KEYWORD: Chicken breast

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Wheat
evaluator: False

questioner: Is it a type of plant-based food?
answerer: No.
guesser: Chicken
evaluator: False

questioner: Is it a type of meat?
answerer: Yes.
guesser: Beef
evaluator: False

questioner: Is it commonly consumed as poultry?
answerer: Yes.
guesser: Turkey
evaluator: False

questioner: Is it a specific cut of chicken?
answerer: Yes.
guesser: Chicken breast


games generated:  87%|████████▋ | 174/200 [2:09:56<14:14, 32.86s/it]

evaluator: True
Generated keyword: N95 respirator
USING MODEL: gpt-4o
KEYWORD: N95 respirator

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Helmet
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Fire extinguisher
evaluator: False

questioner: Is it related to architectural structures or construction?
answerer: No.
guesser: Safety goggles
evaluator: False

questioner: Is it a wearable item?
answerer: Yes.
guesser: Safety helmet
evaluator: False

questioner: Is it primarily used in medical settings?
answerer: Yes.
guesser: Scrubs
evaluator: False

questioner: Is it used for respiratory protection?
answerer: Yes.
guesser: Face mask
evaluator: False

questioner: Is it a spec

games generated:  88%|████████▊ | 175/200 [2:10:14<11:52, 28.50s/it]

evaluator: True
Generated keyword: Tea leaves
USING MODEL: gpt-4o
KEYWORD: Tea leaves

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee cup
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Farm
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Tractor
evaluator: False

questioner: Is it a tool used in agriculture?
answerer: No.
guesser: Irrigation system
evaluator: False

questioner: Is it a type of plant?
answerer: Yes.
guesser: Corn
evaluator: False

questioner: Is it commonly used as an ingredient in cooking?
answerer: No.
guesser: Ornamental plant
evaluator: False

questioner: Is it primarily grown for non-food purposes?
answerer: No.
guesser: Herbs
evaluator: False

questioner: Is it a type of grain or cereal?
answerer: No.
guesser: Fruit tree
evaluator: False

questioner: Is it a fruit?
a

games generated:  88%|████████▊ | 176/200 [2:11:04<14:01, 35.06s/it]

evaluator: False

Generated keyword: Golden hamster
USING MODEL: gpt-4o
KEYWORD: Golden hamster

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: smartphone
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: library
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: museum
evaluator: False

questioner: Is it an animal?
answerer: Yes.
guesser: elephant
evaluator: False

questioner: Is it a domesticated animal?
answerer: Yes.
guesser: dog
evaluator: False

questioner: Is it commonly kept as a pet?
answerer: Yes.
guesser: cat
evaluator: False

questioner: Is it a type of mammal?
answerer: Yes.
guesser: hamster
evaluator: False

questioner: Is it commonly known to have fur?
answerer: Yes.
guesser: rabbit
evaluator: False

ques

games generated:  88%|████████▊ | 177/200 [2:11:54<15:04, 39.32s/it]

evaluator: False

Generated keyword: Milkshake
USING MODEL: gpt-4o
KEYWORD: Milkshake

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Restaurant
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Kitchen
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Cookbook
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Mug
evaluator: False

questioner: Is it a beverage?
answerer: Yes.
guesser: Soda
evaluator: False

questioner: Is it non-alcoholic?
answerer: Yes.
guesser: Tea
evaluator: False

questioner: Is it typically served hot?
answerer: No.
guesser: Juice
evaluator: False

questioner: Is it carbonated?
answerer: No.
guesser: Milk
evaluator: False

questioner: Is it commonly consumed at breakfast?
answerer: No.
guesser: Water
evaluator: False

questioner: Is it fruit-based?
answerer: No.
guesser: Lemonade
evaluator: False

questioner: Is it a type of milk

games generated:  89%|████████▉ | 178/200 [2:12:20<12:58, 35.38s/it]

evaluator: True
Generated keyword: Irrigation system
USING MODEL: gpt-4o
KEYWORD: Irrigation system

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Grand Canyon
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Taj Mahal
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Pyramids of Giza
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Golden Gate Bridge
evaluator: False

questioner: Is it broadly related t

games generated:  90%|████████▉ | 179/200 [2:13:05<13:22, 38.21s/it]

evaluator: False

Generated keyword: Home fries
USING MODEL: gpt-4o
KEYWORD: Home fries

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Coffee
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Ice cream
evaluator: False

questioner: Is it a prepared food or dish?
answerer: Yes.
guesser: Sandwich
evaluator: False

questioner: Is it typically served hot?
answerer: Yes.
guesser: Soup
evaluator: False

questioner: Is it most commonly consumed as a main course?
answerer: No.
guesser: Fried rice
evaluator: False

questioner: Is it a type of dessert?
answerer: No.
guesser: French fries
evaluator: False

questioner: Is it a type of appetizer or snack?
answerer: No.
guesser: Pasta
evaluator: False

questioner: Is it commonly associated with breakfast?
answerer: Yes.
guesser: Pancakes
evaluator: False

questioner: Is it primarily made from eggs?
answerer: N

games generated:  90%|█████████ | 180/200 [2:13:48<13:12, 39.62s/it]

evaluator: False

Generated keyword: backpack
USING MODEL: gpt-4o
KEYWORD: backpack

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: Yes.
guesser: Shirt
evaluator: False

questioner: Is it primarily worn on the upper body?
answerer: Yes.
guesser: Jacket
evaluator: False

questioner: Is it an item of clothing or an accessory?
answerer: Yes.
guesser: Blouse
evaluator: False

questioner: Is it typically worn indoors rather than outdoors?
answerer: No.
guesser: Coat
evaluator: False

questioner: Is it typically used to protect against certain weather conditions?
answerer: No.
guesser: Sweater
evaluator: False

questioner: Is it a spe

games generated:  90%|█████████ | 181/200 [2:14:17<11:33, 36.50s/it]

evaluator: True
Generated keyword: Honeydew
USING MODEL: gpt-4o
KEYWORD: Honeydew

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Corn
evaluator: False

questioner: Is it a fruit?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it typically eaten raw?
answerer: Yes.
guesser: Banana
evaluator: False

questioner: Is it grown on trees?
answerer: No.
guesser: Strawberry
evaluator: False

questioner: Is it a berry?
answerer: No.
guesser: Grape
evaluator: False

questioner: Is it a type of melon?
answerer: Yes.
guesser: Watermelon
evaluator: False

questioner: Is it watermelon?
answerer: No.
guesser: Cantaloupe
evaluator: False

questioner: Is it cantaloupe?
answerer: No.
guesser: Honeydew


games generated:  91%|█████████ | 182/200 [2:14:40<09:47, 32.62s/it]

evaluator: True
Generated keyword: Rubber gloves
USING MODEL: gpt-4o
KEYWORD: Rubber gloves

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Helmet
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Fire extinguisher
evaluator: False

questioner: Is it related to architectural structures or construction?
answerer: No.
guesser: First aid kit
evaluator: False

questioner: Is it protective gear or clothing?
answerer: Yes.
guesser: Gloves
evaluator: False

questioner: Is it typically worn on the head?
answerer: No.
guesser: Safety goggles
evaluator: False

questioner: Is it primarily used in medical settings?
answerer: Yes.
guesser: Face mask
evaluator: False

questioner: Is it a type of

games generated:  92%|█████████▏| 183/200 [2:15:17<09:33, 33.74s/it]

evaluator: True
Generated keyword: Safety goggles
USING MODEL: gpt-4o
KEYWORD: Safety goggles

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Fire extinguisher
evaluator: False

questioner: Is it related to architectural structures or construction?
answerer: Yes.
guesser: Scaffolding
evaluator: False

questioner: Is it used for personal protective equipment (PPE)?
answerer: Yes.
guesser: Safety harness
evaluator: False

questioner: Is it primarily used to protect the head?
answerer: No.
guesser: Gloves
evaluator: False

questioner: Is it used to protect the body or torso?
answerer: No.
guesser: Safety goggles


games generated:  92%|█████████▏| 184/200 [2:15:32<07:30, 28.16s/it]

evaluator: True
Generated keyword: Grand piano
USING MODEL: gpt-4o
KEYWORD: Grand piano

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Laptop
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Tree
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Stadium
evaluator: False

questioner: Is it a type of venue or place where entertainment or sporting events occur?
answerer: No.
guesser: Movie
evaluator: False

questioner: Is it a type of equipment or gear used in sports or entertainment?
answerer: Yes.
guesser: Football
evaluator: False

questioner: Is it primarily used in outdoor sports or activities?
answerer: No.
guesser: Musical instrument
evaluator: False

questioner: Is it commonly used in team sports?
answerer: No.
guesser: Tre

games generated:  92%|█████████▎| 185/200 [2:16:19<08:26, 33.74s/it]

evaluator: False

Generated keyword: Velcro strap
USING MODEL: gpt-4o
KEYWORD: Velcro strap

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: No.
guesser: Fire extinguisher
evaluator: False

questioner: Is it related to architectural structures or construction?
answerer: No.
guesser: Safety goggles
evaluator: False

questioner: Is it a type of personal protective equipment (PPE)?
answerer: No.
guesser: Fire alarm
evaluator: False

questioner: Is it used for fire prevention or firefighting?
answerer: No.
guesser: Safety harness
evaluator: False

questioner: Is it related to environmental protection or pollution control?
answerer: No

games generated:  93%|█████████▎| 186/200 [2:17:05<08:45, 37.56s/it]

evaluator: False

Generated keyword: smartwatch
USING MODEL: gpt-4o
KEYWORD: smartwatch

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Book
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Statue
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Movie
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Game controller
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Remote control
evaluator: False

questioner: Is it primarily used for gaming?
answerer: No.
guesser: Camera
evaluator: False

questioner: Is it used for watching or listening to media?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it a kind of tool or device used for recreational activities?
answerer: Yes.
guesser: Virtual Reality Headset
evaluator: False

qu

games generated:  94%|█████████▎| 187/200 [2:17:51<08:42, 40.16s/it]

evaluator: True
Generated keyword: Chamomile flower
USING MODEL: gpt-4o
KEYWORD: Chamomile flower

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Coffee
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Farm
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Tractor
evaluator: False

questioner: Is it a tool used in food or beverage preparation?
answerer: No.
guesser: Vineyard
evaluator: False

questioner: Is it a type of plant?
answerer: Yes.
guesser: Wheat
evaluator: False

questioner: Is it primarily grown for its edible parts?
answerer: No.
guesser: Cotton
evaluator: False

questioner: Is it commonly used for flavoring or seasoning?
answerer: No.
guesser: Hemp
evaluator: False

questioner: Is it a type of ornamental plant?
answerer: No.
guesser: Tobacco
evaluator: False

questioner: Is it primaril

games generated:  94%|█████████▍| 188/200 [2:18:40<08:34, 42.87s/it]

evaluator: False

Generated keyword: Aluminum foil
USING MODEL: gpt-4o
KEYWORD: Aluminum foil

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: No.
guesser: Refrigerator
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Oven
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Mug
evaluator: False

questioner: Is it a beverage?
answerer: No.
guesser: Spatula
evaluator: False

questioner: Is it a cooking utensil?
answerer: No.
guesser: Fork
evaluator: False

questioner: Is it an appliance?
answerer: No.
guesser: Bottle opener
evaluator: False

questioner: Is it a type of container?
answerer: No.
guesser: Cookbook
evaluator: False

questioner: Is it an ingredient used in cooking or baking?
answerer: No.
guesser: Napkin
evaluator: False

questioner: Is it used for serving food or beverages?
answerer: No.
guesser: Men

games generated:  94%|█████████▍| 189/200 [2:19:26<08:00, 43.69s/it]

evaluator: False

Generated keyword: Screwdriver
USING MODEL: gpt-4o
KEYWORD: Screwdriver

questioner: Is it related to food, beverages or cooking?
answerer: No
guesser: Computer
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No
guesser: Machinery
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No
guesser: Assembly line
evaluator: False

questioner: Is it a natural material or resource?
answerer: No
guesser: Machine tool
evaluator: False

questioner: Is it a machine or a part of a machine?
answerer: No
guesser: Raw material
evaluator: False

questioner: Is it related to construction or building materials?
answerer: Yes
guesser: Concrete
evaluator: False

questioner: Is it commonly used in residential buildings?
answerer: Yes
guesser: Brick
evaluator: False

questioner: Is it primarily used for str

games generated:  95%|█████████▌| 190/200 [2:20:13<07:25, 44.58s/it]

evaluator: True
Generated keyword: Inflatable pool
USING MODEL: gpt-4o
KEYWORD: Inflatable pool

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Hollywood
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Stadium
evaluator: False

questioner: Is it something that is typically used outdoors?
answerer: Yes.
guesser: Carnival
evaluator: False

questioner: Is it associated with a specific sport?
answerer: No.
guesser: Theme park
evaluator: False

questioner: Is it a type of equipment or gear?
answerer: No.
guesser: Festival
evaluator: False

questioner: Is it something that involves physical activity or exercise?
answerer: Yes.
guesser: Playground
evaluator: False

quest

games generated:  96%|█████████▌| 191/200 [2:20:58<06:42, 44.78s/it]

evaluator: False

Generated keyword: Cheerios
USING MODEL: gpt-4o
KEYWORD: Cheerios

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Bread
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Cheese
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Pizza
evaluator: False

questioner: Is it a prepared food or dish?
answerer: Yes.
guesser: Sandwich
evaluator: False

questioner: Is it typically served hot?
answerer: No.
guesser: Salad
evaluator: False

questioner: Is it a type of dessert?
answerer: No.
guesser: Sushi
evaluator: False

questioner: Is it consumed as a main course?
answerer: No.
guesser: Sushi
evaluator: False

questioner: Is it typically served as an appetizer or snack?
answerer: No.
guesser: Ice cream
evaluator: False

questioner: Is it related to breakfast?
answerer: Yes.
guesser: Cereal
evaluator: False

questioner: Is it sweet?
answerer: Yes.
guesser: Pancakes
evaluator: False

questioner: 

games generated:  96%|█████████▌| 192/200 [2:21:33<05:34, 41.80s/it]

evaluator: True
Generated keyword: Marmalade
USING MODEL: gpt-4o
KEYWORD: Marmalade

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: No.
guesser: Sandwich
evaluator: False

questioner: Is it a prepared food or dish?
answerer: Yes.
guesser: Spaghetti
evaluator: False

questioner: Is it typically served hot?
answerer: No.
guesser: Salad
evaluator: False

questioner: Is it a type of dessert?
answerer: No.
guesser: Sushi
evaluator: False

questioner: Is it typically found in sandwiches or wraps?
answerer: No.
guesser: Sashimi
evaluator: False

questioner: Is it a type of salad?
answerer: No.
guesser: Hamburger
evaluator: False

questioner: Is it a snack food?
answerer: No.
guesser: Sushi
evaluator: False

questioner: Is it typically consumed as a main course?
answerer: No.
guesser: Ice Cream
evaluator: False



games generated:  96%|█████████▋| 193/200 [2:22:19<05:00, 42.98s/it]

evaluator: False

Generated keyword: Fitbit
USING MODEL: gpt-4o
KEYWORD: Fitbit

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Television
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Playing card
evaluator: False

questioner: Is it related to electronics or technology?
answerer: Yes.
guesser: Video game controller
evaluator: False

questioner: Is it primarily used for gaming?
answerer: No.
guesser: Remote control
evaluator: False

questioner: Is it a type of media device, such as for viewing or listening to content?
answerer: No.
guesser: Smartphone
evaluator: False

questioner: Is it used for recording or capturing content?
answerer: No.
guesser: Camera


games generated:  97%|█████████▋| 194/200 [2:23:04<04:21, 43.63s/it]

evaluator: False

Generated keyword: Fruit
USING MODEL: gpt-4o
KEYWORD: Fruit

questioner: Is it related to food, beverages or cooking?
answerer: Yes.
guesser: Pizza
evaluator: False

questioner: Is it a food?
answerer: Yes.
guesser: Apple
evaluator: False

questioner: Is it related to agriculture?
answerer: Yes.
guesser: Carrot
evaluator: False

questioner: Is it a fruit?
answerer: Yes.
guesser: Banana
evaluator: False

questioner: Is it typically eaten raw?
answerer: Yes.
guesser: Orange


games generated:  98%|█████████▊| 195/200 [2:23:15<02:49, 33.83s/it]

evaluator: True
Generated keyword: Magnetic stirrer
USING MODEL: gpt-4o
KEYWORD: Magnetic stirrer

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Machine tool
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Conveyor belt
evaluator: False

questioner: Is it related to construction or building materials?
answerer: No.
guesser: Vehicle
evaluator: False

questioner: Is it a tool or a piece of machinery?
answerer: Yes.
guesser: Lathe
evaluator: False

questioner: Is it used primarily in metalworking or woodworking?
answerer: No.
guesser: Drill press
evaluator: False

questioner: Is

games generated:  98%|█████████▊| 196/200 [2:24:02<02:31, 37.80s/it]

evaluator: False

Generated keyword: event ticket
USING MODEL: gpt-4o
KEYWORD: event ticket

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Car
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Mountain
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: Yes.
guesser: Stadium
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: Yes.
guesser: Basketball
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Playing cards
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: Yes.
guesser: Book
evaluator: False

questioner: Is it something commonly used in the performing arts, such as theater or music?
answerer: Yes.
guesser: Microphone
evaluator: False

questioner: Is it a musical instrument?
answerer: No.
guesser: Paintbrush
evaluator: False

questione

games generated:  98%|█████████▊| 197/200 [2:24:42<01:55, 38.61s/it]

evaluator: False

Generated keyword: Area rug
USING MODEL: gpt-4o
KEYWORD: Area rug

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Eiffel Tower
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: No.
guesser: Mount Everest
evaluator: False

questioner: Is it broadly related to entertainment or sports?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to clothing, accessories or beauty products?
answerer: No.
guesser: Pyramids of Giza
evaluator: False

questioner: Is it an animal?
answerer: No.
guesser: Telescope
evaluator: False

questioner: Is it something a person can hold in their hand?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it a living thing?
answerer: No.
guesser: Great Wall of China
evaluator: False

questioner: Is it furniture?
answerer: No.
guesser: Ocean
evaluator: False

questioner: Is it broadly related to the arts or media?
answerer: No.

games generated:  99%|█████████▉| 198/200 [2:25:24<01:18, 39.40s/it]

evaluator: False

Generated keyword: Switch track
USING MODEL: gpt-4o
KEYWORD: Switch track

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Library
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: No.
guesser: Warehouse
evaluator: False

questioner: Is it related to electronics or technology?
answerer: No.
guesser: Machine
evaluator: False

questioner: Is it a natural material or resource?
answerer: No.
guesser: Assembly line
evaluator: False

questioner: Is it a type of machinery or equipment?
answerer: Yes.
guesser: Conveyor belt
evaluator: False

questioner: Is it used in the construction industry?
answerer: No.
guesser: Lathe
evaluator: False

questioner: Is it used in the automotive industry?
answerer: No.
guesser: Drill press
evaluator: False

questioner: Is it used in the textile or clothing in

games generated: 100%|█████████▉| 199/200 [2:26:11<00:41, 41.95s/it]

evaluator: False

Generated keyword: Traffic cones
USING MODEL: gpt-4o
KEYWORD: Traffic cones

questioner: Is it related to food, beverages or cooking?
answerer: No.
guesser: Statue of Liberty
evaluator: False

questioner: Is it related to industrial production or manufacturing?
answerer: Yes.
guesser: Factory
evaluator: False

questioner: Is it related to safety or safety equipment?
answerer: Yes.
guesser: Hard hat
evaluator: False

questioner: Is it related to transportation or vehicles?
answerer: Yes.
guesser: Airbag
evaluator: False

questioner: Is it primarily used on land vehicles, such as cars or trucks?
answerer: Yes.
guesser: Seatbelt
evaluator: False

questioner: Is it a component or accessory specifically designed for vehicle operation?
answerer: No.
guesser: Traffic cone


games generated: 100%|██████████| 200/200 [2:26:23<00:00, 43.92s/it]

evaluator: True





In [21]:
len(game_results)

200

In [22]:
dict_keywords

set()

## Save as JSONL

In [23]:
with open("20qs-data/final_games/gpt-4o-200games.jsonl", "w") as f:
    for i in game_results:
        json.dump(i, f)
        f.write('\n')