In [1]:
import litellm
import time
import sys, os

if not os.environ["OPENAI_API_KEY"]: 
    os.environ["OPENAI_API_KEY"]= '<REDACTED>'

In [17]:
#MODEL = "ft:gpt-4o-mini-2024-07-18:personal:20qs-ft-1:9tKOfrv9"
MODEL = "gpt-4o-mini"
#EVALUATOR_MODEL="gpt-4o-mini"
# MODEL = "gpt-4o"
#MODEL = "claude-3-5-sonnet-20240620"
litellm.modify_params = True

In [3]:
def completion(model, messages):
    result = litellm.completion(model, messages)
    if "claude" in model:
        time.sleep(60/50) # Adjust based on rate limit (https://console.anthropic.com/settings/limits)
    return result

In [4]:
class DecisionNode:
    def __init__(self, query = None, yes_branch=None, no_branch=None):
        self.query = query
        self.yes_branch = yes_branch
        self.no_branch = no_branch
        
    def print_tree(self, level=0, prefix=""):
        if self.query:
            print(f"{prefix}-- {self.query}")
            if self.yes_branch:
                self.yes_branch.print_tree(level + 1, prefix + "  |")
            if self.no_branch:
                self.no_branch.print_tree(level + 1, prefix + "   ")
        else:
            print(f"{prefix}-- *")
            
    def write_tree_to_file(self, filename):
        with open(filename, 'w') as f:
            self._write_tree(f)

    def _write_tree(self, file, level=0, prefix=""):
        if self.query:
            file.write(f"{prefix}-- {self.query}\n")
            if self.yes_branch:
                self.yes_branch._write_tree(file, level + 1, prefix + "  |")
            if self.no_branch:
                self.no_branch._write_tree(file, level + 1, prefix + "   ")
        else:
            file.write(f"{prefix}-- *\n")

    @staticmethod
    def read_tree_from_file(filename):
        with open(filename, 'r') as f:
            lines = f.readlines()
        return DecisionNode._read_tree(lines, 0, "")

    @staticmethod
    def _read_tree(lines, indent_level, current_prefix):
        if not lines:
            return None

        line = lines.pop(0).rstrip()
        expected_prefix = current_prefix + "-- "

        if not line.startswith(expected_prefix):
            return None

        content = line[len(expected_prefix):]
        if content == "*":
            return DecisionNode()

        yes_branch = DecisionNode._read_tree(lines, indent_level + 1, current_prefix + "  |")
        no_branch = DecisionNode._read_tree(lines, indent_level + 1, current_prefix + "   ")

        return DecisionNode(content, yes_branch, no_branch)

1. Start with a list of 2000 or so example keywords
2. Have the questioner agent generate 5 yes / no questions that will attempt to bisect the remaining keywords in the list
3. For each question, have another agent go through each remaining keyword in the list and eliminate possible keywords based on if the answer to the question was "yes"
4. Determine which question eliminated closest to 50% of the remaining questions (i.e. not too specific and not too broad of a question).
5. For that chosen question, generate 5 new questions each for the "no" path and the "yes" path (i.e. recursively repeat this for each branch)

In [95]:
import json
with open("20qs-data/keywords_data/labeled_keywords_v2.jsonl", "r") as f:
    lines = f.read().split('\n')
objects = []
for line in lines:
    try:
        objects.append(json.loads(line))
    except:
        pass
all_keywords = [i["keyword"] for i in objects if i["place"] == "no"]

In [276]:
def generate_questions(questions, answers, num_questions=5):
    prompt = {
        "role": "system",
        "content": (
            "You are an data scientist who has dedicated his life's research to a scientific approach to the game 20 Questions."
            f"Given previous questions and answers, generate the {num_questions} best follow-up yes-or-no questions that attempt to reduce the remaining number of possible keywords by half."
            "The keyword is a specific thing, NOT a place and NOT a person."
            "Each question should be independent of the others."
            "Questions should be as vague and general as possible to attempt to eliminate 50% of possible keywords given the previous questions and answers. This is extremely important."
            "Output questions only, no other text. Each question should be numbered in markdown with no other formatting."
        ),
    }
    
    q_and_a = {
        "role": "user",
        "content": '\n'.join([f"Question: {questions[i]}\nAnswer: {answers[i]}" for i in range(len(questions))]),
    }
    
    messages = [prompt, q_and_a]
    
    response = completion("gpt-4o", messages).choices[0].message.content
    
    return [i[3:].strip() for i in response.split('\n')]

generate_questions(["Is it related to cooking, food, or beverages?"], ["Yes"], 2)

['Is it something that you eat or drink directly?',
 'Is it primarily used as an ingredient in recipes?']

In [73]:
def rephrase_as_fact(question, answer):
    prompt = {
        "role": "system",
        "content": (
            "Given a question and answer pair, rephrase it as a single fact."
            "Example input:\nQuestion: Is it smaller than a breadbox?\nAnswer: No"
            "Example output: It is larger than a breadbox."
        ),
    }
    
    q_and_a = {
        "role": "user",
        "content": f"Question: {question}\nAnswer: {answer}",
    }
    
    messages = [prompt, q_and_a]
    
    response = completion(MODEL, messages).choices[0].message.content

    return response

rephrase_as_fact("Is it related to food, beverages or cooking?", "No")

'It is not related to food, beverages, or cooking.'

In [209]:
def evaluate_keyword(fact, keyword):
      
    prompt = {
        "role": "system",
        "content": (
            "Given a fact, state whether or not it fits a given keyword."
            "Reply only with the text 'True' or 'False'."
            "Example input:\nFact: It is larger than a breadbox\nKeyword: spoon"
            "Example output: False"
        ),
    }
    
    q_and_a = {
        "role": "user",
        "content": f"Fact: {fact}\nKeyword: {keyword}",
    }
    
    messages = [prompt, q_and_a]
    
    response = completion(MODEL, messages).choices[0].message.content

    return "t" in response.lower()

evaluate_keyword("It is related to food, beverages or cooking.", "coke")

True

In [240]:
import math
import random

def most_common(lst):
    return max(set(lst), key=lst.count)

def test_keyword_percentage_eliminated(question, keywords, keyword_test_percentage=0.25):
    fact = rephrase_as_fact(question, "Yes")
    count = 0
    num_test_keywords = max(min(50, len(keywords)), math.floor(len(keywords) * keyword_test_percentage))
    keyword_test_set = random.sample(keywords, k=len(keywords))[:num_test_keywords]
    print(f"Num keywords in test set: {len(keyword_test_set)}")
    remaining_keywords_yes = []
    remaining_keywords_no = []
    for x, keyword in enumerate(keyword_test_set):
        #evaluations = [evaluate_keyword(fact, keyword) for _ in range(3)]
        #result = most_common(evaluations)
        #print(f"{x+1}) Keyword: {keyword}. Results: {evaluations} -> {result}")
        result = evaluate_keyword(fact, keyword)
        print(f"{x+1}) Keyword: {keyword}. Result: {result}")
        if result: 
            count += 1
            remaining_keywords_yes.append(keyword)
        else:
            remaining_keywords_no.append(keyword)
    return count / len(keyword_test_set), remaining_keywords_yes, remaining_keywords_no

In [217]:
test_keyword_percentage_eliminated("Is it be found indoors?", all_keywords, 0.01)

Num keywords in test set: 13
1) Keyword: car antenna. Results: [False, False, False] -> False
2) Keyword: plastic sign. Results: [True, True, True] -> True
3) Keyword: gas stove. Results: [True, True, True] -> True
4) Keyword: snowmobile. Results: [False, False, False] -> False
5) Keyword: Fertilizer. Results: [True, True, True] -> True
6) Keyword: clay. Results: [True, True, True] -> True
7) Keyword: soccer ball. Results: [True, True, True] -> True
8) Keyword: petunia. Results: [False, False, False] -> False
9) Keyword: beer coaster. Results: [True, True, True] -> True
10) Keyword: Red wine. Results: [True, True, True] -> True
11) Keyword: Cauliflower. Results: [True, True, True] -> True
12) Keyword: mousepad. Results: [True, True, True] -> True
13) Keyword: Brochure. Results: [True, True, True] -> True


(0.7692307692307693,
 ['plastic sign',
  'gas stove',
  'Fertilizer',
  'clay',
  'soccer ball',
  'beer coaster',
  'Red wine',
  'Cauliflower',
  'mousepad',
  'Brochure'],
 ['car antenna', 'snowmobile', 'petunia'])

In [263]:
def build_node(current_depth, remaining_keywords, questions, answers, max_depth=5, num_test_questions=5, keyword_test_percentage=0.25):
    possible_questions = generate_questions(questions, answers, num_questions=num_test_questions)

    best_question = None
    best_remaining_keywords_yes = []
    best_remaining_keywords_no = []
    best_question_score = 1 #lower is better
    for q in possible_questions:
        print(f"Question: {q}")
        percentage, new_remaining_keywords_yes, new_remaining_keywords_no = test_keyword_percentage_eliminated(q, remaining_keywords, keyword_test_percentage)
        score = abs(0.5 - percentage)
        print(f"Percentage: {percentage}%\tScore: {score}\n")
        if score < best_question_score:
            best_question_score = score
            best_question = q
            best_remaining_keywords_yes = new_remaining_keywords_yes
            best_remaining_keywords_no = new_remaining_keywords_no
            if score < 0.15:
                break

    if current_depth == max_depth or best_question_score == 0.5:
        return DecisionNode(query=best_question)

    else:
        print(f"_______\nDepth {current_depth} -- Starting Yes branch for question: {best_question}")
        print(f"Remaining keywords yes: {best_remaining_keywords_yes}")
        yes_node = build_node(current_depth + 1, best_remaining_keywords_yes, questions + [best_question], answers + ["Yes"], max_depth, num_test_questions, keyword_test_percentage)
        
        print(f"_______\nDepth {current_depth} -- Starting No branch for question: {best_question}")
        print(f"Remaining keywords no: {best_remaining_keywords_no}")
        no_node = build_node(current_depth + 1, best_remaining_keywords_no, questions + [best_question], answers + ["No"], max_depth, num_test_questions, keyword_test_percentage)
        
        return DecisionNode(query=best_question, no_branch=no_node, yes_branch=yes_node)
    

In [224]:
def build_tree(starting_question, keywords, initial_starting_keywords_yes=None, initial_starting_keywords_no=None, max_depth=5, num_test_questions=5, keyword_test_percentage=0.25):
    if initial_starting_keywords_yes and initial_starting_keywords_no:
        print("Using provided initial starting keywords")
        starting_keywords_yes = initial_starting_keywords_yes
        starting_keywords_no = initial_starting_keywords_no
    else:    
        print(f"Determining keyword split for starting question: {starting_question}")
        starting_percentage_split, starting_keywords_yes, starting_keywords_no = test_keyword_percentage_eliminated(starting_question, keywords, keyword_test_percentage=1)
        print(f"Starting percentage split: {starting_percentage_split}")
    
    print(f"Starting Yes branch for question {starting_question}")
    print(f"Remaining keywords yes: {starting_keywords_yes}")
    yes_node = build_node(1, starting_keywords_yes, [starting_question], ["Yes"], max_depth, num_test_questions, keyword_test_percentage)

    print(f"Starting No branch for question {starting_question}")
    print(f"Remaining keywords no: {starting_keywords_no}")
    no_node = build_node(1, starting_keywords_no, [starting_question], ["No"], max_depth, num_test_questions, keyword_test_percentage)
    
    return DecisionNode(query=starting_question, no_branch=no_node, yes_branch=yes_node)

### Save these keywords for the starting question to save time since these won't change

In [174]:
culinary_starting_keywords_yes = ['seltzer', 'plastic fork', 'Udon Noodles', 'plastic tube', 'fish scale', 'pea plant', 'Cream cheese', 'Cauliflower', 'steak knife', 'Baguette', 'pumpkin', 'seagull', 'lid', 'Energy drink', 'Grape Vine', 'Bird seed', 'wine decanter', 'metal lid', 'Pad', 'juniper bush', 'Fried rice', 'cocktail glass', 'swedish fish', 'tin foil', 'frozen meat', 'plastic baggie', 'protein shake', 'lemon juice', 'tea leaf', 'beer stein', 'wonton', 'aperol spritz', 'rusty nail', 'Hibiscus', 'corkscrew', 'Soybeans', 'tea bag', 'rainbow cake', 'Wine Tasting Kit', 'foil paper', 'raspberry', 'nightshade', 'Green beans', 'Edamame', 'finger', 'honey', 'Wine Aerator', 'Pesticide', 'parsley', 'vietnamese coffee', 'kelp', 'Red wine', 'Cutlery', 'ramen', 'Croissant', 'peanut butter', 'tea cup', 'grinder', 'rosemary', 'mortar', 'picnic basket', 'honeydew', 'coffee ground', 'potato chip', 'Cheesecake', 'wine glass', 'spray', 'ham', 'Chocolate cake', 'agave plant', 'Spatula', 'garden fork', 'Fridge magnet', 'peeler', 'Vanilla Extract', 'strainer', 'metal spoon', 'dishcloth', 'Silicone', 'unripe banana', 'Paper towels', 'hot tea', 'cannoli', 'yeast', 'almond milk', 'cooler', 'brussel sprout', 'pizza box', 'unripe fruit', 'coffee mug', 'jasmine', 'Coffee grounds', 'gelatin', 'frozen waffle', 'pomegranate', 'cotton candy', 'cupcake', 'Champagne flute', 'cilantro', 'pear tree', 'broth', 'vegetarian chili', 'salt shaker', 'Wine Opener', 'wild garlic', 'Dandelion', 'Habanero pepper', 'beer glass', 'honeycrisp', 'rhubarb', 'French toast', 'Potato', 'Yogurt', 'shaker', 'Ice Water', 'grill', 'Salad Dressing', 'lemon wedge', 'kiwifruit', 'gin', 'Measuring cup', 'Plastic wrap', 'noodle', 'lemon balm', 'coffee machine', 'cruise ship', 'Plate', 'acorn', 'coffee bag', 'chutney', 'frappe', 'pineapple', 'Protein Shakes', 'parrotfish', 'cold pack', 'steak', 'doughnut', 'juniper tree', 'Red velvet cake', 'Rolling pin', 'Wire rack', 'aerator', 'Water Heater', 'wonton soup', 'carnation', 'compost', 'Honeybee', 'jam', 'butter', 'bean plant', 'Seed packet', 'paper roll', 'Sippy cup', 'Hot dog', 'pot holder', 'rice krispie', 'Tofu', 'mango', 'Popcorn machine', 'Unleavened Bread', 'melon', 'beer tap', 'Popcorn', 'kheer', 'chocolate bar', 'canning jar', 'hummus', 'fruit cup', 'Oven rack', 'sealing machine', 'wooden spoon', 'Microwave', 'soup', 'kitchen knife', 'goose', 'acacia', 'Mushroom', 'mango tree', 'Bread knife', 'mesquite tree', 'maple tree', 'towel basket', 'ice tray', 'Muffin Tin', 'tupperware', 'Quesadilla', 'Marshmallow', 'pellet', 'Gadget', 'Fungi', 'Utility Cart', 'bar towel', 'Finger food', 'ice crusher', 'Orange tree', 'Conveyor belt', 'fanta', 'Iron', 'wing', 'gel', 'squid', 'banana', 'honeydew melon', 'cane', 'raisin', 'granola', 'fudge brownie', 'Vegetable', 'Silicone mat', 'Eucalyptus', 'ginger', 'Frappuccino', 'oyster', 'chiller', 'Food bowl', 'plastic tub', 'gift box', 'water chestnut', 'ginger ale', 'Snacks', 'lime', 'refrigerator', 'skewer', 'pitcher', 'Rice pudding', 'camp stove', 'Rat', 'milling', 'Piping tips', 'flour', 'enamel', 'pudding', 'Mixing Bowl', 'centerpiece', 'syrup', 'vegan chocolate', 'pipette', 'Oven door', 'apricot tree', 'Cinnamon roll', 'apple', 'peppermint', 'serving bowl', 'Water Buffalo', 'laxative', 'Iced coffee', 'Coffee Grinder', 'butter knife', 'ricotta', 'nitrogen', 'hamburger helper', 'tomato plant', 'sushi', 'bowl', 'Cutting Board', 'jaffa cake', 'fennel', 'Stove', 'caribou', 'sauerkraut', 'Water', 'Vending Machine', 'grape', 'Brewery merchandise', 'ice tea', 'coconut', 'Radish', 'Mini fridge', 'Donuts', 'pea soup', 'Bread pudding', 'colander', 'Apple pie', 'dishwasher', 'dill pickle', 'leaf', 'Hash browns', 'Cantaloupe', 'Heating Element', 'measuring spoon', 'jello', 'Veggie Burger', 'Nachos', 'Roots', 'dishtowel', 'cereal bar', 'Soda', 'Plastic Gloves', 'water jug', 'tiramisu', 'peach tree', 'bread maker', 'Latte', 'hazelnut tree', 'coke', 'anchovy', 'saltwater', 'lemon zest', 'Muffin', 'orange slice', 'Scones', 'Placemat', 'Protein bar', 'Orange peel', 'Cereal', 'Water Bottle', 'crayfish', 'yarrow', 'Bottled Water', 'bean', 'Olive Oil', 'martini', 'Sieve', 'grape juice', 'cake', 'applesauce', 'Bacteria', 'saucer', 'bamboo leaf', 'tongue', 'ziploc', 'aluminum foil', 'sumantri', 'smoothie', 'Kitchen cabinet', 'ketchup', 'Tomato', 'juicer', 'duckweed', 'Pumpkin pie', 'Edible flowers', 'Earl Grey tea', 'Brownies', 'electric oven', 'gas stove', 'Garbage Disposal', 'beer coaster', 'Coffee beans', 'seafoam', 'Sugar packet', 'water bowl', 'Orange zest', 'Toaster Oven', 'weighing scale', 'bacon', 'Agave', 'Food warmers', 'cherry tree', 'High Chair', 'dye', 'candy wrapper', 'oatmeal cookie', 'Grapefruit', 'toothpick', 'aquarium salt', 'humus', 'culture medium', 'Kale smoothie', 'ice pop', 'yule log', 'Coffee Makers', 'thermometer probe', 'Hazelnut', 'espresso machine', 'water dish', 'whisk', 'orange juice', 'meatloaf', 'yogurt container', 'juice box', 'Dish soap', 'Ice cube tray', 'sweet tea', 'squash', 'Aprons', 'alfalfa', 'Relish', 'Bonding Agent', 'metal foil', 'empty can', 'serving platter', 'cutter', 'Fortune cookie', 'Ice cream maker']
culinary_starting_keywords_no = [i for i in all_keywords if i not in culinary_starting_keywords_yes]

### Generate the entire tree

In [289]:
test_tree  = build_tree(
    "Is it related to food, beverages or cooking?", 
    all_keywords, 
    initial_starting_keywords_yes=culinary_starting_keywords_yes, 
    initial_starting_keywords_no=culinary_starting_keywords_no, 
    max_depth=5, 
    num_test_questions=5,
    keyword_test_percentage=1
)

Using provided initial starting keywords
Starting Yes branch for question Is it related to food, beverages or cooking?
Remaining keywords yes: ['seltzer', 'plastic fork', 'Udon Noodles', 'plastic tube', 'fish scale', 'pea plant', 'Cream cheese', 'Cauliflower', 'steak knife', 'Baguette', 'pumpkin', 'seagull', 'lid', 'Energy drink', 'Grape Vine', 'Bird seed', 'wine decanter', 'metal lid', 'Pad', 'juniper bush', 'Fried rice', 'cocktail glass', 'swedish fish', 'tin foil', 'frozen meat', 'plastic baggie', 'protein shake', 'lemon juice', 'tea leaf', 'beer stein', 'wonton', 'aperol spritz', 'rusty nail', 'Hibiscus', 'corkscrew', 'Soybeans', 'tea bag', 'rainbow cake', 'Wine Tasting Kit', 'foil paper', 'raspberry', 'nightshade', 'Green beans', 'Edamame', 'finger', 'honey', 'Wine Aerator', 'Pesticide', 'parsley', 'vietnamese coffee', 'kelp', 'Red wine', 'Cutlery', 'ramen', 'Croissant', 'peanut butter', 'tea cup', 'grinder', 'rosemary', 'mortar', 'picnic basket', 'honeydew', 'coffee ground', 'pot

In [290]:
test_tree.print_tree()

-- Is it related to food, beverages or cooking?
  |-- Is it edible?
  |  |-- Is it a processed food?
  |  |  |-- Does it require refrigeration?
  |  |  |  |-- Does it come in a solid form?
  |  |  |  |  |-- Is it typically consumed by itself rather than as an ingredient in other dishes?
  |  |  |  |   -- Is it typically used as a condiment or topping?
  |  |  |   -- Is it a type of baked good?
  |  |  |     |-- Is it typically found in the bread aisle?
  |  |  |      -- Is it typically consumed as a snack?
  |  |   -- Is it a type of fruit or vegetable?
  |  |     |-- Is it a fruit?
  |  |     |  |-- Does it have a single large seed or pit?
  |  |     |   -- Is it typically cooked before eating?
  |  |      -- Is it of animal origin?
  |  |        |-- Is it commonly consumed raw?
  |  |         -- Is it typically consumed raw?
  |   -- Can it be classified as a utensil or tool?
  |     |-- Is it typically made of metal?
  |     |  |-- Is it primarily used for handling hot items?
  |   

### Save to text file

In [291]:
test_tree.write_tree_to_file('20qs-data/gpt_decision_tree_v2.txt')

### Load tree from txt file (doesn't load the whole tree)

In [292]:
tree2 = DecisionNode.read_tree_from_file('20qs-data/gpt_decision_tree_v2.txt')

### Save with pickle

In [294]:
import pickle
with open('20qs-data/decision_trees/gpt_decision_tree_v2.p', 'wb') as f:
    pickle.dump(test_tree, f)

### Example of loading decision tree with Pickle

In [295]:
with open('20qs-data/decision_trees/gpt_decision_tree_v2.p', 'rb') as f:
    tree3 = pickle.load(f)

### Testing some alternative starting questions

In [247]:
result, _, _ = test_keyword_percentage_eliminated("Is it something you can hold in your hand?", all_keywords, 0.2)
print(f"Question: Is it something you can hold in your hand?  {result}")

result, _, _ = test_keyword_percentage_eliminated("Is it used primarily for entertainment purposes?", all_keywords, 0.2)
print(f"Question: Is it used primarily for entertainment purposes?  {result}")

result, _, _ = test_keyword_percentage_eliminated("Is it found in nature (not man-made)?", all_keywords, 0.2)
print(f"Question: Is it found in nature (not man-made)?  {result}")

result, _, _ = test_keyword_percentage_eliminated("Is it something you would typically find in a kitchen?", all_keywords, 0.2)
print(f"Question: Is it something you would typically find in a kitchen?  {result}")

result, _, _ = test_keyword_percentage_eliminated("Does it require electricity to function?", all_keywords, 0.2)
print(f"Question: Does it require electricity to function?  {result}")

Num keywords in test set: 269
1) Keyword: unripe banana. Result: True
2) Keyword: colander. Result: True
3) Keyword: pail. Result: True
4) Keyword: flashlight. Result: True
5) Keyword: water snake. Result: True
6) Keyword: weight plate. Result: True
7) Keyword: power adapter. Result: True
8) Keyword: valve. Result: True
9) Keyword: Lawnmower. Result: False
10) Keyword: ice tea. Result: True
11) Keyword: plastic fork. Result: True
12) Keyword: martini. Result: True
13) Keyword: Plastic wrap. Result: True
14) Keyword: hole puncher. Result: True
15) Keyword: centerpiece. Result: True
16) Keyword: travel pillow. Result: True
17) Keyword: waterslide. Result: False
18) Keyword: teddy bear. Result: True
19) Keyword: parking pass. Result: True
20) Keyword: Mushroom. Result: True
21) Keyword: Bird seed. Result: True
22) Keyword: bedspread. Result: False
23) Keyword: shower mat. Result: True
24) Keyword: groundhog. Result: True
25) Keyword: plant saucer. Result: True
26) Keyword: Hot water bottl