In [1]:
import litellm
import time
import sys, os

if not os.environ["OPENAI_API_KEY"]: 
    os.environ["OPENAI_API_KEY"]= '<REDACTED>'

In [17]:
#MODEL = "ft:gpt-4o-mini-2024-07-18:personal:20qs-ft-1:9tKOfrv9"
MODEL = "gpt-4o-mini"
#EVALUATOR_MODEL="gpt-4o-mini"
# MODEL = "gpt-4o"
#MODEL = "claude-3-5-sonnet-20240620"
litellm.modify_params = True

In [3]:
def completion(model, messages):
    result = litellm.completion(model, messages)
    if "claude" in model:
        time.sleep(60/50) # Adjust based on rate limit (https://console.anthropic.com/settings/limits)
    return result

In [4]:
class DecisionNode:
    def __init__(self, query = None, yes_branch=None, no_branch=None):
        self.query = query
        self.yes_branch = yes_branch
        self.no_branch = no_branch
        
    def print_tree(self, level=0, prefix=""):
        if self.query:
            print(f"{prefix}-- {self.query}")
            if self.yes_branch:
                self.yes_branch.print_tree(level + 1, prefix + "  |")
            if self.no_branch:
                self.no_branch.print_tree(level + 1, prefix + "   ")
        else:
            print(f"{prefix}-- *")
            
    def write_tree_to_file(self, filename):
        with open(filename, 'w') as f:
            self._write_tree(f)

    def _write_tree(self, file, level=0, prefix=""):
        if self.query:
            file.write(f"{prefix}-- {self.query}\n")
            if self.yes_branch:
                self.yes_branch._write_tree(file, level + 1, prefix + "  |")
            if self.no_branch:
                self.no_branch._write_tree(file, level + 1, prefix + "   ")
        else:
            file.write(f"{prefix}-- *\n")

    @staticmethod
    def read_tree_from_file(filename):
        with open(filename, 'r') as f:
            lines = f.readlines()
        return DecisionNode._read_tree(lines, 0, "")

    @staticmethod
    def _read_tree(lines, indent_level, current_prefix):
        if not lines:
            return None

        line = lines.pop(0).rstrip()
        expected_prefix = current_prefix + "-- "

        if not line.startswith(expected_prefix):
            return None

        content = line[len(expected_prefix):]
        if content == "*":
            return DecisionNode()

        yes_branch = DecisionNode._read_tree(lines, indent_level + 1, current_prefix + "  |")
        no_branch = DecisionNode._read_tree(lines, indent_level + 1, current_prefix + "   ")

        return DecisionNode(content, yes_branch, no_branch)

1. Start with a list of 2000 or so example keywords
2. Have the questioner agent generate 5 yes / no questions that will attempt to bisect the remaining keywords in the list
3. For each question, have another agent go through each remaining keyword in the list and eliminate possible keywords based on if the answer to the question was "yes"
4. Determine which question eliminated closest to 50% of the remaining questions (i.e. not too specific and not too broad of a question).
5. For that chosen question, generate 5 new questions each for the "no" path and the "yes" path (i.e. recursively repeat this for each branch)

In [95]:
import json
with open("20qs-data/keywords_data/labeled_keywords_v2.jsonl", "r") as f:
    lines = f.read().split('\n')
objects = []
for line in lines:
    try:
        objects.append(json.loads(line))
    except:
        pass
all_keywords = [i["keyword"] for i in objects if i["place"] == "no"]

In [476]:
def generate_questions(questions, answers, remaining_keywords, num_questions=5):
    prompt = {
        "role": "system",
        "content": (
            "You are an data scientist who has dedicated his life's research to a scientific approach to the game 20 Questions."
            f"Given previous questions and answers, as well the remaining possible keywords, generate the {num_questions} best follow-up yes-or-no questions that attempt to reduce the remaining number of possible keywords by half."
            "The keyword is a specific thing, NOT a place and NOT a person."
            "Each question should be independent of the others."
            "Questions should be as vague and general as possible to attempt to eliminate 50% of possible keywords given the previous questions and answers. This is extremely important."
            "Output questions only, no other text. Each question should be numbered in markdown with no other formatting."
        ),
    }

    previous_q_and_a = ""
    for i in range(len(questions)):
        previous_q_and_a += f"{i+1}. Question: {questions[i]}  Answer: {answers[i]}\n"
    
    q_and_a = {
        "role": "user",
        "content": '\n'.join([f"Previous questions and answers: {previous_q_and_a}\n\nRemaining keywords: {remaining_keywords}" for i in range(len(questions))]),
    }

    print(previous_q_and_a)
    
    messages = [prompt, q_and_a]
    
    response = completion("gpt-4o", messages).choices[0].message.content
    
    return [i[3:].strip() for i in response.split('\n')]

generate_questions(["Is it related to cooking, food, or beverages?", "Is it commonly found in a kitchen?"], ["Yes", "Yes"], ["soda", "hamburger", "blender", "stove"], 5)

1. Question: Is it related to cooking, food, or beverages?  Answer: Yes2. Question: Is it commonly found in a kitchen?  Answer: Yes


['Is it an appliance?',
 'Is it a type of food?',
 'Is it typically found inside a refrigerator?',
 'Does it require preparation before consumption?',
 'Does it use electricity to function?']

In [419]:
def rephrase_as_fact(question, answer):
    prompt = {
        "role": "system",
        "content": (
            "Given a question and answer pair, rephrase it as a single fact."
            "Example input:\nQuestion: Is it smaller than a breadbox?\nAnswer: No"
            "Example output: It is larger than a breadbox."
        ),
    }
    
    q_and_a = {
        "role": "user",
        "content": f"Question: {question}\nAnswer: {answer}",
    }
    
    messages = [prompt, q_and_a]
    
    response = completion(MODEL, messages).choices[0].message.content

    return response

rephrase_as_fact("Is it related to food, beverages or cooking?", "No")

'It is not related to food, beverages, or cooking.'

In [433]:
def evaluate_keyword(fact, keyword):
    prompt = {
        "role": "system",
        "content": (
            "Given a fact, state whether or not the fact is true about a given keyword."
            "Reply only with the text 'True' or 'False'."
            # "If the keyword does not really fit with most people's definition of the question, reply with 'False'."
            # "If the keyword does fit with most people's definition of the question, reply with 'True'."
            # "Example input 1:\nFact: It is larger than a breadbox\nKeyword: spoon"
            # "Example output 1: False"
            # "Example input 2:\nFact: It is a common household item\nKeyword: bed"
            # "Example output 2: True"
            # "Example input 3:\nFact: It is related to food, beverages or cooking.\nKeyword: recipe"
            # "Example output 3: True"
            # "Example input 4 (keyword doesn't really fit):\nFact: It is related to food, beverages or cooking.\nKeyword: carbon"
            # "Example output 4: False"
        ),
    }
    
    q_and_a = {
        "role": "user",
        "content": f"Keyword: {keyword}\nFact: {fact}",
    }
    
    messages = [prompt, q_and_a]
    
    response = completion("gpt-4o-mini", messages).choices[0].message.content

    return "t" in response.lower()

evaluate_keyword("It is larger than a microwave.", "DVD.")

False

In [240]:
import math
import random

def most_common(lst):
    return max(set(lst), key=lst.count)

def test_keyword_percentage_eliminated(question, keywords, keyword_test_percentage=0.25):
    fact = rephrase_as_fact(question, "Yes")
    count = 0
    num_test_keywords = max(min(50, len(keywords)), math.floor(len(keywords) * keyword_test_percentage))
    keyword_test_set = random.sample(keywords, k=len(keywords))[:num_test_keywords]
    print(f"Num keywords in test set: {len(keyword_test_set)}")
    remaining_keywords_yes = []
    remaining_keywords_no = []
    for x, keyword in enumerate(keyword_test_set):
        #evaluations = [evaluate_keyword(fact, keyword) for _ in range(3)]
        #result = most_common(evaluations)
        #print(f"{x+1}) Keyword: {keyword}. Results: {evaluations} -> {result}")
        result = evaluate_keyword(fact, keyword)
        print(f"{x+1}) Keyword: {keyword}. Result: {result}")
        if result: 
            count += 1
            remaining_keywords_yes.append(keyword)
        else:
            remaining_keywords_no.append(keyword)
    return count / len(keyword_test_set), remaining_keywords_yes, remaining_keywords_no

In [364]:
test_keyword_percentage_eliminated("Is it be found indoors?", all_keywords, 0.01)

Num keywords in test set: 50
1) Keyword: nose. Result: True
2) Keyword: Public Address System. Result: True
3) Keyword: flashlight. Result: True
4) Keyword: sock. Result: True
5) Keyword: parts cleaner. Result: True
6) Keyword: Power strip. Result: True
7) Keyword: Chew toy. Result: True
8) Keyword: aspirin. Result: True
9) Keyword: Coffee beans. Result: True
10) Keyword: Cereal. Result: True
11) Keyword: butter knife. Result: True
12) Keyword: mango. Result: False
13) Keyword: velvet couch. Result: True
14) Keyword: electric motorcycle. Result: False
15) Keyword: seagull. Result: False
16) Keyword: sewing needle. Result: True
17) Keyword: steering wheel. Result: False
18) Keyword: wireless microphone. Result: True
19) Keyword: curling. Result: False
20) Keyword: unripe fruit. Result: False
21) Keyword: Yoga mat. Result: True
22) Keyword: Wheelchair ramp. Result: False
23) Keyword: whiteboard. Result: True
24) Keyword: cotton candy. Result: True
25) Keyword: rainbow cake. Result: True


(0.74,
 ['nose',
  'Public Address System',
  'flashlight',
  'sock',
  'parts cleaner',
  'Power strip',
  'Chew toy',
  'aspirin',
  'Coffee beans',
  'Cereal',
  'butter knife',
  'velvet couch',
  'sewing needle',
  'wireless microphone',
  'Yoga mat',
  'whiteboard',
  'cotton candy',
  'rainbow cake',
  'metal file',
  'French toast',
  'humidity meter',
  'Wall Art',
  'wonton',
  'tape measure',
  'Ceiling fan',
  'Backrest',
  'cereal bar',
  'keyring',
  'gown',
  'autograph',
  'safety glasses',
  'camp stove',
  'yeast',
  'mri machine',
  'concentrator',
  'Salad Dressing',
  'Onyx'],
 ['mango',
  'electric motorcycle',
  'seagull',
  'steering wheel',
  'curling',
  'unripe fruit',
  'Wheelchair ramp',
  'igloo',
  'excavator',
  'Soybeans',
  'RV',
  'Sunscreen',
  'roadrunner'])

In [477]:
def build_node(current_depth, remaining_keywords, questions, answers, max_depth=5, num_test_questions=5, keyword_test_percentage=0.25):
    possible_questions = generate_questions(questions, answers, remaining_keywords=remaining_keywords, num_questions=num_test_questions)

    best_question = None
    best_remaining_keywords_yes = []
    best_remaining_keywords_no = []
    best_question_score = 1 #lower is better
    for q in possible_questions:
        print(f"Question: {q}")
        percentage, new_remaining_keywords_yes, new_remaining_keywords_no = test_keyword_percentage_eliminated(q, remaining_keywords, keyword_test_percentage)
        score = abs(0.5 - percentage)
        print(f"Percentage: {percentage}%\tScore: {score}\n")
        if score < best_question_score:
            best_question_score = score
            best_question = q
            best_remaining_keywords_yes = new_remaining_keywords_yes
            best_remaining_keywords_no = new_remaining_keywords_no
            if score < 0.15:
                break

    if current_depth == max_depth or best_question_score == 0.5:
        return DecisionNode(query=best_question)

    else:
        print(f"_______\nDepth {current_depth} -- Starting Yes branch for question: {best_question}")
        print(f"Remaining keywords yes: {best_remaining_keywords_yes}")
        yes_node = build_node(current_depth + 1, best_remaining_keywords_yes, questions + [best_question], answers + ["Yes"], max_depth, num_test_questions, keyword_test_percentage)
        
        print(f"_______\nDepth {current_depth} -- Starting No branch for question: {best_question}")
        print(f"Remaining keywords no: {best_remaining_keywords_no}")
        no_node = build_node(current_depth + 1, best_remaining_keywords_no, questions + [best_question], answers + ["No"], max_depth, num_test_questions, keyword_test_percentage)
        
        return DecisionNode(query=best_question, no_branch=no_node, yes_branch=yes_node)
    

In [478]:
def build_tree(starting_question, keywords, initial_starting_keywords_yes=None, initial_starting_keywords_no=None, max_depth=5, num_test_questions=5, keyword_test_percentage=0.25):
    if initial_starting_keywords_yes and initial_starting_keywords_no:
        print("Using provided initial starting keywords")
        starting_keywords_yes = initial_starting_keywords_yes
        starting_keywords_no = initial_starting_keywords_no
    else:    
        print(f"Determining keyword split for starting question: {starting_question}")
        starting_percentage_split, starting_keywords_yes, starting_keywords_no = test_keyword_percentage_eliminated(starting_question, keywords, keyword_test_percentage=1)
        print(f"Starting percentage split: {starting_percentage_split}")
    
    print(f"Starting Yes branch for question {starting_question}")
    print(f"Remaining keywords yes: {starting_keywords_yes}")
    yes_node = build_node(1, starting_keywords_yes, [starting_question], ["Yes"], max_depth, num_test_questions, keyword_test_percentage)

    print(f"Starting No branch for question {starting_question}")
    print(f"Remaining keywords no: {starting_keywords_no}")
    no_node = build_node(1, starting_keywords_no, [starting_question], ["No"], max_depth, num_test_questions, keyword_test_percentage)
    
    return DecisionNode(query=starting_question, no_branch=no_node, yes_branch=yes_node)

### Save these keywords for the starting question to save time since these won't change

In [439]:
culinary_starting_keywords_yes = ['pot holder', 'saltwater', 'thermometer probe', 'aperol spritz', 'banana', 'chutney', 'squid', 'pumpkin', 'unripe banana', 'nitrogen', 'measuring spoon', 'tea cup', 'hazelnut tree', 'cutter', 'Edible flowers', 'chiller', 'Garbage Disposal', 'Wine Opener', 'aerator', 'apple', 'raisin', 'fish scale', 'tongue', 'corkscrew', 'empty can', 'skewer', 'dining chair', 'pomegranate', 'beer coaster', 'colander', 'ice tray', 'bean', 'beer stein', 'electric oven', 'granola', 'Honeybee', 'agave plant', 'Udon Noodles', 'cocktail glass', 'yogurt container', 'unripe fruit', 'Green beans', 'crayfish', 'bread maker', 'beer glass', 'Snacks', 'bacon', 'lid', 'Bread knife', 'compost', 'butter knife', 'Popcorn', 'hot tea', 'Agave', 'Wire rack', 'Heating Element', 'Measuring cup', 'grape', 'Rice pudding', 'water jug', 'strainer', 'Cinnamon roll', 'grape juice', 'Scones', 'cake', 'protein shake', 'dishwasher', 'serving platter', 'cherry tree', 'bamboo', 'Nachos', 'tin foil', 'green alga', 'rooster', 'seltzer', 'Water Bottle', 'vegan chocolate', 'rhubarb', 'Coffee Grinder', 'Chocolate cake', 'ginger ale', 'Spatula', 'peeler', 'lemon juice', 'wonton', 'acacia', 'Potato', 'ramen', 'sushi', 'Grapefruit', 'gin', 'Plastic Gloves', 'spray', 'Red wine', 'water dish', 'rice krispie', 'wing', 'Bird seed', 'peanut butter', 'Croissant', 'almond milk', 'candy wrapper', 'seagrass', 'mango', 'cupcake', 'swedish fish', 'lemon balm', 'Tofu', 'vegetarian chili', 'tea leaf', 'bean plant', 'Protein bar', 'kitchen knife', 'pudding', 'flour', 'Coffee Makers', 'Bacteria', 'grill', 'ice crusher', 'nitrous', 'juniper tree', 'Brownies', 'Radish', 'Silicone', 'Fried rice', 'ice pop', 'barcode', 'vietnamese coffee', 'Roots', 'Sippy cup', 'Dandelion', 'goose', 'metal spoon', 'oatmeal cookie', 'soup', 'Pesticide', 'Water Buffalo', 'Hot dog', 'whisk', 'hamburger helper', 'ice tea', 'Olive Oil', 'lime', 'rosemary', 'brussel sprout', 'canning jar', 'Fortune cookie', 'metal foil', 'Soybeans', 'orange juice', 'metal lid', 'Orange tree', 'Donuts', 'broth', 'pear tree', 'ricotta', 'fudge brownie', 'Oven rack', 'rainbow cake', 'Latte', 'steak knife', 'syrup', 'doughnut', 'Oven door', 'tomato plant', 'Soda', 'juniper bush', 'applesauce', 'fishing net', 'kelp', 'cereal bar', 'coffee ground', 'Wine Tasting Kit', 'Conveyor belt', 'bar towel', 'lemon zest', 'Plastic wrap', 'Orange peel', 'martini', 'shaker', 'fanta', 'Cutlery', 'espresso machine', 'milling', 'sealing machine', 'hummus', 'carbon', 'fruit cup', 'pea soup', 'Cereal', 'maple tree', 'Iced coffee', 'coconut', 'Bonding Agent', 'pellet', 'salt shaker', 'juice box', 'pea plant', 'Water', 'anchovy', 'smoothie', 'Irrigation system', 'pizza box', 'squash', 'Iron', 'Ice Water', 'zinc', 'tiramisu', 'Earl Grey tea', 'cruise ship', 'Orange zest', 'Cauliflower', 'gelatin', 'Energy drink', 'coffee machine', 'Cutting Board', 'Finger food', 'cooler', 'ginger', 'High Chair', 'migraine', 'Baguette', 'Muffin', 'frozen meat', 'yeast', 'saucer', 'dill pickle', 'grinder', 'noodle', 'lemon wedge', 'plastic fork', 'dye', 'Toaster Oven', 'dishtowel', 'Kale smoothie', 'gel', 'coffee bag', 'frappe', 'Microwave', 'Food warmers', 'Bottled Water', 'kiwifruit', 'ziploc', 'Champagne flute', 'Hazelnut', 'Vanilla Extract', 'potato chip', 'plastic tube', 'Wine Aerator', 'Coffee grounds', 'yarrow', 'tupperware', 'jaffa cake', 'Mixing Bowl', 'ham', 'melon', 'sweet tea', 'Protein Shakes', 'Aprons', 'Vegetable', 'coffee mug', 'wine glass', 'Bread pudding', 'rose', 'mango tree', 'peach tree', 'Paper towels', 'Sieve', 'plastic tub', 'Grape Vine', 'raspberry', 'honeydew', 'Habanero pepper', 'yule log', 'Apple pie', 'peppermint', 'sauerkraut', 'Hash browns', 'apricot tree', 'pineapple', 'aluminum foil', 'beer tap', 'wine decanter', 'bamboo leaf', 'picnic basket', 'tea bag', 'Brewery merchandise', 'acorn', 'Popcorn machine', 'oak tree', 'water bowl', 'Edamame', 'cotton candy', 'kheer', 'mesquite tree', 'Unleavened Bread', 'meatloaf', 'French toast', 'Relish', 'Muffin Tin', 'Frappuccino', 'gas stove', 'Piping tips', 'oyster', 'Red velvet cake', 'juicer', 'ketchup', 'wonton soup', 'Salad Dressing', 'Cream cheese', 'honeycrisp', 'plastic pallet', 'Fungi', 'plastic baggie', 'evaporator', 'Quesadilla', 'Tomato', 'serving bowl', 'Stove', 'dishcloth', 'bowl', 'Veggie Burger', 'foil paper', 'wooden spoon', 'leaf', 'Mini fridge', 'Kitchen cabinet', 'Utility Cart', 'frozen waffle', 'Cantaloupe', 'Silicone mat', 'Marshmallow', 'Coffee beans', 'honey', 'Mushroom', 'cilantro', 'water chestnut', 'Cheesecake', 'fennel', 'Planter Box', 'Sugar packet', 'camp stove', 'coke', 'Yogurt', 'orange slice', 'Food bowl', 'chocolate bar', 'steak', 'toothpick', 'Pumpkin pie', 'Rolling pin', 'weighing scale', 'butter', 'jam', 'duckweed', 'Ice cream maker', 'cannoli', 'nightshade', 'jasmine', 'wild garlic', 'Plate', 'honeydew melon', 'alfalfa', 'Ice cube tray', 'parsley', 'Vending Machine', 'jello', 'refrigerator', 'Placemat', 'Hibiscus']
culinary_starting_keywords_no = [i for i in all_keywords if i not in culinary_starting_keywords_yes]

In [438]:
len(culinary_starting_keywords_yes) / len(all_keywords)

0.2746844840386043

### Generate the entire tree

In [481]:
test_tree  = build_tree(
    "Is it related to food, beverages or cooking?", 
    #"Is it larger than a microwave?",
    all_keywords, 
    initial_starting_keywords_yes=culinary_starting_keywords_yes, 
    initial_starting_keywords_no=culinary_starting_keywords_no, 
    max_depth=6, 
    num_test_questions=5,
    keyword_test_percentage=1
)

Using provided initial starting keywords
Starting Yes branch for question Is it related to food, beverages or cooking?
Remaining keywords yes: ['pot holder', 'saltwater', 'thermometer probe', 'aperol spritz', 'banana', 'chutney', 'squid', 'pumpkin', 'unripe banana', 'nitrogen', 'measuring spoon', 'tea cup', 'hazelnut tree', 'cutter', 'Edible flowers', 'chiller', 'Garbage Disposal', 'Wine Opener', 'aerator', 'apple', 'raisin', 'fish scale', 'tongue', 'corkscrew', 'empty can', 'skewer', 'dining chair', 'pomegranate', 'beer coaster', 'colander', 'ice tray', 'bean', 'beer stein', 'electric oven', 'granola', 'Honeybee', 'agave plant', 'Udon Noodles', 'cocktail glass', 'yogurt container', 'unripe fruit', 'Green beans', 'crayfish', 'bread maker', 'beer glass', 'Snacks', 'bacon', 'lid', 'Bread knife', 'compost', 'butter knife', 'Popcorn', 'hot tea', 'Agave', 'Wire rack', 'Heating Element', 'Measuring cup', 'grape', 'Rice pudding', 'water jug', 'strainer', 'Cinnamon roll', 'grape juice', 'Scone

In [463]:
def add_empty_leaves(node):
    if not node.no_branch and not node.yes_branch:
        node.no_branch = DecisionNode()
        node.yes_branch = DecisionNode()
        return 
    add_empty_leaves(node.no_branch) 
    add_empty_leaves(node.yes_branch)

In [486]:
add_empty_leaves(test_tree)

In [487]:
test_tree.print_tree()

-- Is it related to food, beverages or cooking?
  |-- Is it an edible item?
  |  |-- Is it primarily sweet?
  |  |  |-- Is it typically consumed as a dessert?
  |  |  |  |-- Can it be found in a liquid or semi-liquid form?
  |  |  |  |  |-- Is it fruit-based?
  |  |  |  |  |  |-- Does it come from a tropical region?
  |  |  |  |  |  |  |-- *
  |  |  |  |  |  |   -- *
  |  |  |  |  |   -- Is it dairy-based?
  |  |  |  |  |     |-- *
  |  |  |  |  |      -- *
  |  |  |  |   -- Is it typically baked?
  |  |  |  |     |-- Is it commonly decorated with frosting or icing?
  |  |  |  |     |  |-- *
  |  |  |  |     |   -- *
  |  |  |  |      -- Is it predominantly eaten cold or frozen?
  |  |  |  |        |-- *
  |  |  |  |         -- *
  |  |  |   -- Is it a beverage?
  |  |  |     |-- Does it contain caffeine?
  |  |  |     |  |-- Does it commonly come in a can?
  |  |  |     |  |  |-- *
  |  |  |     |  |   -- *
  |  |  |     |   -- Does it have fruit juice as a primary ingredient?
  |  | 

### Save to text file

In [488]:
test_tree.write_tree_to_file('20qs-data/decision_trees/gpt_decision_tree_v4.txt')

### Load tree from txt file (doesn't load the whole tree)

In [489]:
tree2 = DecisionNode.read_tree_from_file('20qs-data/decision_trees/gpt_decision_tree_v4.txt')

In [490]:
tree2.print_tree()

-- Is it related to food, beverages or cooking?
  |-- Is it an edible item?
  |  |-- Is it primarily sweet?
  |  |  |-- Is it typically consumed as a dessert?
  |  |  |  |-- Can it be found in a liquid or semi-liquid form?
  |  |  |  |  |-- Is it fruit-based?
  |  |  |  |  |  |-- Does it come from a tropical region?
  |  |  |  |  |  |  |-- *
  |  |  |  |  |  |   -- *
  |  |  |  |  |   -- Is it dairy-based?
  |  |  |  |  |     |-- *
  |  |  |  |  |      -- *
  |  |  |  |   -- Is it typically baked?
  |  |  |  |     |-- Is it commonly decorated with frosting or icing?
  |  |  |  |     |  |-- *
  |  |  |  |     |   -- *
  |  |  |  |      -- Is it predominantly eaten cold or frozen?
  |  |  |  |        |-- *
  |  |  |  |         -- *
  |  |  |   -- Is it a beverage?
  |  |  |     |-- Does it contain caffeine?
  |  |  |     |  |-- Does it commonly come in a can?
  |  |  |     |  |  |-- *
  |  |  |     |  |   -- *
  |  |  |     |   -- Does it have fruit juice as a primary ingredient?
  |  | 

### Save with pickle

In [461]:
import pickle
with open('20qs-data/decision_trees/gpt_decision_tree_v3.p', 'wb') as f:
    pickle.dump(test_tree, f)

### Example of loading decision tree with Pickle

In [295]:
with open('20qs-data/decision_trees/gpt_decision_tree_v2.p', 'rb') as f:
    tree3 = pickle.load(f)

### Testing some alternative starting questions

In [435]:
question = "Is it related to food, beverages or cooking?"
result, _, _ = test_keyword_percentage_eliminated(question, all_keywords, 0.1)
print(f"Question: {question}  {result}")

Num keywords in test set: 134
1) Keyword: Incense. Result: False
2) Keyword: bookbag. Result: False
3) Keyword: Joystick. Result: False
4) Keyword: baggage conveyor. Result: False
5) Keyword: Emergency Exit Sign. Result: False
6) Keyword: zinc. Result: True
7) Keyword: vintage clock. Result: False
8) Keyword: dinosaur skeleton. Result: False
9) Keyword: tongue. Result: True
10) Keyword: Lawnmower. Result: False
11) Keyword: flea. Result: False
12) Keyword: dogwood. Result: False
13) Keyword: corkscrew. Result: True
14) Keyword: paint thinner. Result: False
15) Keyword: Nap mat. Result: False
16) Keyword: ice tea. Result: True
17) Keyword: pea plant. Result: True
18) Keyword: Sound system. Result: False
19) Keyword: cutter. Result: True
20) Keyword: ginger ale. Result: True
21) Keyword: mixing console. Result: False
22) Keyword: chimney. Result: False
23) Keyword: frisbee. Result: False
24) Keyword: wall mirror. Result: False
25) Keyword: rollercoaster. Result: False
26) Keyword: Paperw

In [247]:
result, _, _ = test_keyword_percentage_eliminated("Is it something you can hold in your hand?", all_keywords, 0.2)
print(f"Question: Is it something you can hold in your hand?  {result}")

result, _, _ = test_keyword_percentage_eliminated("Is it used primarily for entertainment purposes?", all_keywords, 0.2)
print(f"Question: Is it used primarily for entertainment purposes?  {result}")

result, _, _ = test_keyword_percentage_eliminated("Is it found in nature (not man-made)?", all_keywords, 0.2)
print(f"Question: Is it found in nature (not man-made)?  {result}")

result, _, _ = test_keyword_percentage_eliminated("Is it something you would typically find in a kitchen?", all_keywords, 0.2)
print(f"Question: Is it something you would typically find in a kitchen?  {result}")

result, _, _ = test_keyword_percentage_eliminated("Does it require electricity to function?", all_keywords, 0.2)
print(f"Question: Does it require electricity to function?  {result}")

Num keywords in test set: 269
1) Keyword: unripe banana. Result: True
2) Keyword: colander. Result: True
3) Keyword: pail. Result: True
4) Keyword: flashlight. Result: True
5) Keyword: water snake. Result: True
6) Keyword: weight plate. Result: True
7) Keyword: power adapter. Result: True
8) Keyword: valve. Result: True
9) Keyword: Lawnmower. Result: False
10) Keyword: ice tea. Result: True
11) Keyword: plastic fork. Result: True
12) Keyword: martini. Result: True
13) Keyword: Plastic wrap. Result: True
14) Keyword: hole puncher. Result: True
15) Keyword: centerpiece. Result: True
16) Keyword: travel pillow. Result: True
17) Keyword: waterslide. Result: False
18) Keyword: teddy bear. Result: True
19) Keyword: parking pass. Result: True
20) Keyword: Mushroom. Result: True
21) Keyword: Bird seed. Result: True
22) Keyword: bedspread. Result: False
23) Keyword: shower mat. Result: True
24) Keyword: groundhog. Result: True
25) Keyword: plant saucer. Result: True
26) Keyword: Hot water bottl

In [318]:
def generate_first_question(previous_questions):
    prompt = {
        "role": "system",
        "content": (
            "You are an data scientist who has dedicated his life's research to a scientific approach to the game 20 Questions."
            f"When requested, generate the best yes-or-no question that will reduce the remaining number of possible keywords by half."
            "The keyword is a specific thing, NOT a place and NOT a person."
            "Questions should be as vague and general as possible to attempt to eliminate 50% of possible keywords given the previous questions and answers. This is extremely important."
            "Additionally, it should not be one of the questions that the user has already tried."
            "Output the question only, no other text."
        ),
    }
    
    q_and_a = {
        "role": "user",
        "content": f"Here are the previous questions I've tried: {previous_questions}. Generate a new starting question please.",
    }
    
    messages = [prompt, q_and_a]
    
    response = completion("gpt-4o", messages).choices[0].message.content
    
    return response

In [415]:
previous_questions = []
for i in range(50):
    question = generate_first_question(previous_questions)
    print(f"Question: {question}")
    result, _, _ = test_keyword_percentage_eliminated(question, all_keywords, 0.1)
    print(f"Result: {result}\n___________\n")
    previous_questions.append(question)

Question: Is it a living thing?
Num keywords in test set: 134
1) Keyword: Baguette. Result: False
2) Keyword: Guitar String. Result: False
3) Keyword: Cantaloupe. Result: True
4) Keyword: junction box. Result: False
5) Keyword: water snake. Result: True
6) Keyword: Nail polish. Result: False
7) Keyword: ham. Result: False
8) Keyword: grape juice. Result: False
9) Keyword: ice crusher. Result: False
10) Keyword: Tongue Depressor. Result: False
11) Keyword: internet router. Result: False
12) Keyword: Luggage. Result: False
13) Keyword: Oxygen tank. Result: False
14) Keyword: Sunscreen. Result: False
15) Keyword: Karaoke machine. Result: False
16) Keyword: lifting belt. Result: False
17) Keyword: trellis. Result: False
18) Keyword: Bulb. Result: False
19) Keyword: HDMI cable. Result: False
20) Keyword: Floor jacks. Result: False
21) Keyword: Utility Box. Result: False
22) Keyword: Fertilizer. Result: False
23) Keyword: maxi dress. Result: False
24) Keyword: wire hanger. Result: False
25) 

KeyboardInterrupt: 