In [None]:
from openai import OpenAI
from google.colab import userdata
from typing import Union, Optional, List
from collections.abc import Callable
import re

In [None]:
OPENAI_API_KEY = ""
client = OpenAI(api_key=OPENAI_API_KEY)

In [None]:
def get_thought_gen_prompt(input_seq: str, state: str) -> str:
    """Get thought generation prompt.

    Keyword arguments:
    input_seq -- the input sequence (comprising four numbers, e.g., '1 1 1 8')
    state -- concatenation of all the thoughts so far (separated by '\n')
    """

    # Reference: https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/tasks/game24.py
    def get_remaining_numbers(thought: str) -> str:
        return thought.split('left: ')[-1].split(')')[0]

    if state == '': # Root node; no thoughts have been generated yet.
        remaining_numbers = input_seq
    else:
        last_thought = state.strip().split('\n')[-1]
        remaining_numbers = get_remaining_numbers(last_thought)

    if remaining_numbers != '24': # Intermediate step.
        # Reference: https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py
        prompt = f'''Input: 2 8 8 14
Possible next steps:
2 + 8 = 10 (left: 8 10 14)
8 / 2 = 4 (left: 4 8 14)
14 + 2 = 16 (left: 8 8 16)
2 * 8 = 16 (left: 8 14 16)
8 - 2 = 6 (left: 6 8 14)
14 - 8 = 6 (left: 2 6 8)
14 /  2 = 7 (left: 7 8 8)
14 - 2 = 12 (left: 8 8 12)
Input: {remaining_numbers}
Possible next steps:
'''
    else: # Last (output generation) step.
        # Reference: https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py
        prompt = f'''Use numbers and basic arithmetic operations (+ - * /) to obtain 24. Each step, you are only allowed to choose two of the remaining numbers to obtain a new number.
Input: 4 4 6 8
Steps:
4 + 8 = 12 (left: 4 6 12)
6 - 4 = 2 (left: 2 12)
2 * 12 = 24 (left: 24)
Answer: (6 - 4) * (4 + 8) = 24
Input: 2 9 10 12
Steps:
12 * 2 = 24 (left: 9 10 24)
10 - 9 = 1 (left: 1 24)
24 * 1 = 24 (left: 24)
Answer: (12 * 2) * (10 - 9) = 24
Input: 4 9 10 13
Steps:
13 - 10 = 3 (left: 3 4 9)
9 - 3 = 6 (left: 4 6)
4 * 6 = 24 (left: 24)
Answer: 4 * (9 - (13 - 10)) = 24
Input: 1 4 8 8
Steps:
8 / 4 = 2 (left: 1 2 8)
1 + 2 = 3 (left: 3 8)
3 * 8 = 24 (left: 24)
Answer: (1 + 8 / 4) * 8 = 24
Input: 5 5 5 9
Steps:
5 + 5 = 10 (left: 5 9 10)
10 + 5 = 15 (left: 9 15)
15 + 9 = 24 (left: 24)
Answer: ((5 + 5) + 5) + 9 = 24
Input: {input_seq}
Steps:
{state}
'''
    return prompt

In [None]:
def get_state_eval_prompt(input_seq: str, state: str) -> str:
    """Get state evaluation prompt.

    Keyword arguments:
    input_seq -- the input sequence (comprising four numbers, e.g., '1 1 1 8')
    state -- concatenation of all the thoughts so far (separated by '\n')
    """

    # Reference: https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/tasks/game24.py
    def get_remaining_numbers(thought: str) -> str:
        return thought.split('left: ')[-1].split(')')[0]

    last_line = state.strip().split('\n')[-1]

    if 'left: ' not in last_line: # Last (output generation) step.
        ans = last_line.lower().replace('answer: ', '')
        # Reference: https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py
        prompt = f'''Use numbers and basic arithmetic operations (+ - * /) to obtain 24. Given an input and an answer, give a judgement (sure/impossible) if the answer is correct, i.e. it uses each input exactly once and no other numbers, and reach 24.
Input: 4 4 6 8
Answer: (4 + 8) * (6 - 4) = 24
Judge:
sure
Input: 2 9 10 12
Answer: 2 * 12 * (10 - 9) = 24
Judge:
sure
Input: 4 9 10 13
Answer: (13 - 9) * (10 - 4) = 24
Judge:
sure
Input: 4 4 6 8
Answer: (4 + 8) * (6 - 4) + 1 = 25
Judge:
impossible
Input: 2 9 10 12
Answer: 2 * (12 - 10) = 24
Judge:
impossible
Input: 4 9 10 13
Answer: (13 - 4) * (10 - 9) = 24
Judge:
impossible
Input: {input_seq}
Answer: {ans}
Judge:'''
    else: # Intermediate step.
        remaining_numbers = get_remaining_numbers(last_line)
        # Reference: https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/prompts/game24.py
        prompt = f'''Evaluate if given numbers can reach 24 (sure/likely/impossible)
10 14
10 + 14 = 24
sure
11 12
11 + 12 = 23
12 - 11 = 1
11 * 12 = 132
11 / 12 = 0.91
impossible
4 4 10
4 + 4 + 10 = 8 + 10 = 18
4 * 10 - 4 = 40 - 4 = 36
(10 - 4) * 4 = 6 * 4 = 24
sure
4 9 11
9 + 11 + 4 = 20 + 4 = 24
sure
5 7 8
5 + 7 + 8 = 12 + 8 = 20
(8 - 5) * 7 = 3 * 7 = 21
I cannot obtain 24 now, but numbers are within a reasonable range
likely
5 6 6
5 + 6 + 6 = 17
(6 - 5) * 6 = 1 * 6 = 6
I cannot obtain 24 now, but numbers are within a reasonable range
likely
10 10 11
10 + 10 + 11 = 31
(11 - 10) * 10 = 10
10 10 11 are all too big
impossible
1 3 3
1 * 3 * 3 = 9
(1 + 3) * 3 = 12
1 3 3 are all too small
impossible
{remaining_numbers}
'''
    return prompt

In [None]:
# Reference: https://github.com/princeton-nlp/tree-of-thought-llm/blob/master/src/tot/tasks/game24.py
def heuristic_calculator(state: str, state_evals: List[str]) -> float:
    if len(state.strip().split('\n')) == 4 and 'answer' not in state.lower(): # Such a state is undesirable.
        return 0
    value_names = [_.split('\n')[-1].lower() for _ in state_evals] # A list containing 'impossible' / 'likely' / 'sure' values.
    value_map = {'impossible': 0.001, 'likely': 1, 'sure': 20} # Ad hoc.
    value = sum(value * value_names.count(name) for name, value in value_map.items())
    return value

In [None]:
system_prompt = """Your task is to role-play a particular type of tree search algorithm.

To help you mimic this algorithm successfully, you will be provided few-shot examples. Study these examples carefully to learn how to mimic the algorithm successfully.
You must adhere to the reasoning style and choice of words in these few-shot examples. Do not use any new words that aren't present in the few-shot examples.

If at any point, you think that you have found the final solution (i.e., the search is over), you must still backtrack all the way up the tree, and then outside the tree (as shown in the few-shot examples). Do not fail to do this."""

In [None]:
messages = [{'role': "system", 'content': system_prompt}]

In [None]:
# input_seq1 = '1 1 9 9'
input_seq1 = '''Four numbers: 1 1 9 9

Search parameters:-
Heuristic threshold: 3.0
Level limit: 3'''

In [None]:
messages.append({'role': "user", 'content': input_seq1})

In [None]:
assistant_msg1 = """The four numbers are: 1 1 9 9

The minimum required heuristic threshold is: 3.0
The level limit is: 3

Starting the tree search...
---
~~~
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('', '0')

Observation:
"""

In [None]:
user_msg1 = """1 + 1 = 2 (left: 2 9 9)
1 * 1 = 1 (left: 1 9 9)
9 - 1 = 8 (left: 1 8 9)
9 / 1 = 9 (left: 1 9 9)
9 + 1 = 10 (left: 1 9 10)
9 * 1 = 9 (left: 1 9 9)"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg1})
messages.append({'role': "user", 'content': user_msg1})

In [None]:
assistant_msg2 = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_0: 1 + 1 = 2 (left: 2 9 9)
Node 0_1: 1 * 1 = 1 (left: 1 9 9)
Node 0_2: 9 - 1 = 8 (left: 1 8 9)
Node 0_3: 9 / 1 = 9 (left: 1 9 9)
Node 0_4: 9 + 1 = 10 (left: 1 9 10)
Node 0_5: 9 * 1 = 9 (left: 1 9 9)

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_0
State of current child:
1 + 1 = 2 (left: 2 9 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''1 + 1 = 2 (left: 2 9 9)''', '''0_0''')

Observation:
"""

In [None]:
user_msg2 = """2.001"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg2})
messages.append({'role': "user", 'content': user_msg2})

In [None]:
assistant_msg3 = """

Heuristic value of current child: 2.001

2.001 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_1
State of current child:
1 * 1 = 1 (left: 1 9 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''1 * 1 = 1 (left: 1 9 9)''', '''0_1''')

Observation:
"""

In [None]:
user_msg3 = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg3})
messages.append({'role': "user", 'content': user_msg3})

In [None]:
assistant_msg4 = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_2
State of current child:
9 - 1 = 8 (left: 1 8 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''9 - 1 = 8 (left: 1 8 9)''', '''0_2''')

Observation:
"""

In [None]:
user_msg4 = """1.002"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg4})
messages.append({'role': "user", 'content': user_msg4})

In [None]:
assistant_msg5 = """

Heuristic value of current child: 1.002

1.002 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_3
State of current child:
9 / 1 = 9 (left: 1 9 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''9 / 1 = 9 (left: 1 9 9)''', '''0_3''')

Observation:
"""

In [None]:
user_msg5 = """1.002"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg5})
messages.append({'role': "user", 'content': user_msg5})

In [None]:
assistant_msg6 = """

Heuristic value of current child: 1.002

1.002 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_4
State of current child:
9 + 1 = 10 (left: 1 9 10)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''9 + 1 = 10 (left: 1 9 10)''', '''0_4''')

Observation:
"""

In [None]:
user_msg6 = """3.0"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg6})
messages.append({'role': "user", 'content': user_msg6})

In [None]:
assistant_msg7 = """

Heuristic value of current child: 3.0

3.0 >= 3.0

Heuristic value satisfies the minimum required threshold of 3.0.

Checking whether the current level is greater than or equal to the level limit...

Current level: 0

Level limit: 3

0 < 3

No it isn't. The search isn't over yet. Searching subtree.
---
~~~
Current level: 1
Index of current node: 0_4
State of current node:
9 + 1 = 10 (left: 1 9 10)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('''9 + 1 = 10 (left: 1 9 10)''', '''0_4''')

Observation:
"""

In [None]:
user_msg7 = """1 + 9 = 10 (left: 10 10)
9 - 1 = 8 (left: 8 10)
10 - 9 = 1 (left: 1 1)
10 / 1 = 10 (left: 9 10)
10 - 1 = 9 (left: 9 9)"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg7})
messages.append({'role': "user", 'content': user_msg7})

In [None]:
assistant_msg8 = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_4_0: 1 + 9 = 10 (left: 10 10)
Node 0_4_1: 9 - 1 = 8 (left: 8 10)
Node 0_4_2: 10 - 9 = 1 (left: 1 1)
Node 0_4_3: 10 / 1 = 10 (left: 9 10)
Node 0_4_4: 10 - 1 = 9 (left: 9 9)

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 1
Index of current node: 0_4
State of current node:
9 + 1 = 10 (left: 1 9 10)

Index of current child: 0_4_0
State of current child:
9 + 1 = 10 (left: 1 9 10)
1 + 9 = 10 (left: 10 10)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''9 + 1 = 10 (left: 1 9 10)\n1 + 9 = 10 (left: 10 10)''', '''0_4_0''')

Observation:
"""

In [None]:
user_msg8 = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg8})
messages.append({'role': "user", 'content': user_msg8})

In [None]:
assistant_msg9 = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_4
State of current node:
9 + 1 = 10 (left: 1 9 10)

Index of current child: 0_4_1
State of current child:
9 + 1 = 10 (left: 1 9 10)
9 - 1 = 8 (left: 8 10)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''9 + 1 = 10 (left: 1 9 10)\n9 - 1 = 8 (left: 8 10)''', '''0_4_1''')

Observation:
"""

In [None]:
user_msg9 = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg9})
messages.append({'role': "user", 'content': user_msg9})

In [None]:
assistant_msg10 = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_4
State of current node:
9 + 1 = 10 (left: 1 9 10)

Index of current child: 0_4_2
State of current child:
9 + 1 = 10 (left: 1 9 10)
10 - 9 = 1 (left: 1 1)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''9 + 1 = 10 (left: 1 9 10)\n10 - 9 = 1 (left: 1 1)''', '''0_4_2''')

Observation:
"""

In [None]:
user_msg10 = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg10})
messages.append({'role': "user", 'content': user_msg10})

In [None]:
assistant_msg11 = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_4
State of current node:
9 + 1 = 10 (left: 1 9 10)

Index of current child: 0_4_3
State of current child:
9 + 1 = 10 (left: 1 9 10)
10 / 1 = 10 (left: 9 10)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''9 + 1 = 10 (left: 1 9 10)\n10 / 1 = 10 (left: 9 10)''', '''0_4_3''')

Observation:
"""

In [None]:
user_msg11 = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg11})
messages.append({'role': "user", 'content': user_msg11})

In [None]:
assistant_msg12 = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_4
State of current node:
9 + 1 = 10 (left: 1 9 10)

Index of current child: 0_4_4
State of current child:
9 + 1 = 10 (left: 1 9 10)
10 - 1 = 9 (left: 9 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''9 + 1 = 10 (left: 1 9 10)\n10 - 1 = 9 (left: 9 9)''', '''0_4_4''')

Observation:
"""

In [None]:
user_msg12 = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg12})
messages.append({'role': "user", 'content': user_msg12})

In [None]:
assistant_msg13 = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
No more children remaining. All child nodes have been traversed.

None of the child nodes led to success. Seems like a dead end.

Reminder:-
Current level: 1
Index of current node: 0_4

Backtracking with flag 'FALSE'.
Action: backtracking('''False''', '''0_4''', '''0''')

Observation:
"""

In [None]:
user_msg13  = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg13})
messages.append({'role': "user", 'content': user_msg13})

In [None]:
assistant_msg14 = """
---
~~~
We're back at node: 0

Flag received: 'FALSE'

That is, searching the subtree starting at child node 0_4 was unsuccessful!

Trying the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_5
State of current child:
9 * 1 = 9 (left: 1 9 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''9 * 1 = 9 (left: 1 9 9)''', '''0_5''')

Observation:
"""

In [None]:
user_msg14 = """1.002"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg14})
messages.append({'role': "user", 'content': user_msg14})

In [None]:
assistant_msg15 = """

Heuristic value of current child: 1.002

1.002 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
No more children remaining. All child nodes have been traversed.

None of the child nodes led to success. Seems like a dead end.

Reminder:-
Current level: 0
Index of current node: 0

Backtracking with flag 'FALSE'.
Action: backtracking('''False''', '''0''', '''None''')

Observation:
"""

In [None]:
user_msg15 = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg15})
messages.append({'role': "user", 'content': user_msg15})

In [None]:
assistant_msg16 = """
---
~~~
We're now outside the tree.

Flag received: False"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg16})

In [None]:
# input_seq2 = '1 1 4 9'
input_seq2 = '''Four numbers: 1 1 4 9

Search parameters:-
Heuristic threshold: 3.0
Level limit: 3'''

In [None]:
messages.append({'role': "user", 'content': input_seq2})

In [None]:
assistant_msg1_ = """The four numbers are: 1 1 4 9

The minimum required heuristic threshold is: 3.0
The level limit is: 3

Starting the tree search...
---
~~~
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('', '0')

Observation:
"""

In [None]:
user_msg1_ = """1 + 1 = 2 (left: 2 4 9)
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 1 4 8)
1 * 1 = 1 (left: 1 4 9)
9 / 1 = 9 (left: 1 4 9)
4 / 1 = 4 (left: 1 4 9)
9 - 4 = 5 (left: 1 1 5)
4 - 1 = 3 (left: 1 3 9)"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg1_})
messages.append({'role': "user", 'content': user_msg1_})

In [None]:
assistant_msg2_ = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_0: 1 + 1 = 2 (left: 2 4 9)
Node 0_1: 4 - 1 = 3 (left: 1 3 9)
Node 0_2: 9 - 1 = 8 (left: 1 4 8)
Node 0_3: 1 * 1 = 1 (left: 1 4 9)
Node 0_4: 9 / 1 = 9 (left: 1 4 9)
Node 0_5: 4 / 1 = 4 (left: 1 4 9)
Node 0_6: 9 - 4 = 5 (left: 1 1 5)
Node 0_7: 4 - 1 = 3 (left: 1 3 9)

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_0
State of current child:
1 + 1 = 2 (left: 2 4 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''1 + 1 = 2 (left: 2 4 9)''', '''0_0''')

Observation:
"""

In [None]:
user_msg2_ = """3.0"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg2_})
messages.append({'role': "user", 'content': user_msg2_})

In [None]:
assistant_msg3_ = """

Heuristic value of current child: 3.0

3.0 >= 3.0

Heuristic value satisfies the minimum required threshold of 3.0.

Checking whether the current level is greater than or equal to the level limit...

Current level: 0

Level limit: 3

0 < 3

No it isn't. The search isn't over yet. Searching subtree.
---
~~~
Current level: 1
Index of current node: 0_0
State of current node:
1 + 1 = 2 (left: 2 4 9)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('''1 + 1 = 2 (left: 2 4 9)''', '''0_0''')

Observation:
"""

In [None]:
user_msg3_ = """2 + 4 = 6 (left: 6 9)
4 - 2 = 2 (left: 2 9)
9 - 2 = 7 (left: 4 7)
9 - 4 = 5 (left: 2 5)
2 * 4 = 8 (left: 8 9)
9 / 2 = 4.5 (left: 4 4.5)
9 / 4 = 2.25 (left: 2 2.25)
4 * 2 = 8 (left: 8 9)"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg3_})
messages.append({'role': "user", 'content': user_msg3_})

In [None]:
assistant_msg4_ = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_0_0: 2 + 4 = 6 (left: 6 9)
Node 0_0_1: 4 - 2 = 2 (left: 2 9)
Node 0_0_2: 9 - 2 = 7 (left: 4 7)
Node 0_0_3: 9 - 4 = 5 (left: 2 5)
Node 0_0_4: 2 * 4 = 8 (left: 8 9)
Node 0_0_5: 9 / 2 = 4.5 (left: 4 4.5)
Node 0_0_6: 9 / 4 = 2.25 (left: 2 2.25)
Node 0_0_7: 4 * 2 = 8 (left: 8 9)

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 1
Index of current node: 0_0
State of current node:
1 + 1 = 2 (left: 2 4 9)

Index of current child: 0_0_0
State of current child:
1 + 1 = 2 (left: 2 4 9)
2 + 4 = 6 (left: 6 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''1 + 1 = 2 (left: 2 4 9)\n2 + 4 = 6 (left: 6 9)''', '''0_0_0''')

Observation:
"""

In [None]:
user_msg4_ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg4_})
messages.append({'role': "user", 'content': user_msg4_})

In [None]:
assistant_msg5_ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_0
State of current node:
1 + 1 = 2 (left: 2 4 9)

Index of current child: 0_0_1
State of current child:
1 + 1 = 2 (left: 2 4 9)
4 - 2 = 2 (left: 2 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''1 + 1 = 2 (left: 2 4 9)\n4 - 2 = 2 (left: 2 9)''', '''0_0_1''')

Observation:
"""

In [None]:
user_msg5_ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg5_})
messages.append({'role': "user", 'content': user_msg5_})

In [None]:
assistant_msg6_ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_0
State of current node:
1 + 1 = 2 (left: 2 4 9)

Index of current child: 0_0_2
State of current child:
1 + 1 = 2 (left: 2 4 9)
9 - 2 = 7 (left: 4 7)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''1 + 1 = 2 (left: 2 4 9)\n9 - 2 = 7 (left: 4 7)''', '''0_0_2''')

Observation:
"""

In [None]:
user_msg6_ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg6_})
messages.append({'role': "user", 'content': user_msg6_})

In [None]:
assistant_msg7_ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_0
State of current node:
1 + 1 = 2 (left: 2 4 9)

Index of current child: 0_0_3
State of current child:
1 + 1 = 2 (left: 2 4 9)
9 - 4 = 5 (left: 2 5)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''1 + 1 = 2 (left: 2 4 9)\n9 - 4 = 5 (left: 2 5)''', '''0_0_3''')

Observation:
"""

In [None]:
user_msg7_ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg7_})
messages.append({'role': "user", 'content': user_msg7_})

In [None]:
assistant_msg8_ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_0
State of current node:
1 + 1 = 2 (left: 2 4 9)

Index of current child: 0_0_4
State of current child:
1 + 1 = 2 (left: 2 4 9)
2 * 4 = 8 (left: 8 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''1 + 1 = 2 (left: 2 4 9)\n2 * 4 = 8 (left: 8 9)''', '''0_0_4''')

Observation:
"""

In [None]:
user_msg8_ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg8_})
messages.append({'role': "user", 'content': user_msg8_})

In [None]:
assistant_msg9_ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_0
State of current node:
1 + 1 = 2 (left: 2 4 9)

Index of current child: 0_0_5
State of current child:
1 + 1 = 2 (left: 2 4 9)
9 / 2 = 4.5 (left: 4 4.5)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''1 + 1 = 2 (left: 2 4 9)\n9 / 2 = 4.5 (left: 4 4.5)''', '''0_0_5''')

Observation:
"""

In [None]:
user_msg9_ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg9_})
messages.append({'role': "user", 'content': user_msg9_})

In [None]:
assistant_msg10_ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_0
State of current node:
1 + 1 = 2 (left: 2 4 9)

Index of current child: 0_0_6
State of current child:
1 + 1 = 2 (left: 2 4 9)
9 / 4 = 2.25 (left: 2 2.25)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''1 + 1 = 2 (left: 2 4 9)\n9 / 4 = 2.25 (left: 2 2.25)''', '''0_0_6''')

Observation:
"""

In [None]:
user_msg10_ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg10_})
messages.append({'role': "user", 'content': user_msg10_})

In [None]:
assistant_msg11_ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_0
State of current node:
1 + 1 = 2 (left: 2 4 9)

Index of current child: 0_0_7
State of current child:
1 + 1 = 2 (left: 2 4 9)
4 * 2 = 8 (left: 8 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''1 + 1 = 2 (left: 2 4 9)\n4 * 2 = 8 (left: 8 9)''', '''0_0_7''')

Observation:
"""

In [None]:
user_msg11_ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg11_})
messages.append({'role': "user", 'content': user_msg11_})

In [None]:
assistant_msg12_ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
No more children remaining. All child nodes have been traversed.

None of the child nodes led to success. Seems like a dead end.

Reminder:-
Current level: 1
Index of current node: 0_0

Backtracking with flag 'FALSE'.
Action: backtracking('''False''', '''0_0''', '''0''')

Observation:
"""

In [None]:
user_msg12_ = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg12_})
messages.append({'role': "user", 'content': user_msg12_})

In [None]:
assistant_msg13_ = """
---
~~~
We're back at node: 0

Flag received: 'FALSE'

That is, searching the subtree starting at child node 0_0 was unsuccessful!

Trying the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_1
State of current child:
4 - 1 = 3 (left: 1 3 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''4 - 1 = 3 (left: 1 3 9)''', '''0_1''')

Observation:
"""

In [None]:
user_msg13_ = """41.0"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg13_})
messages.append({'role': "user", 'content': user_msg13_})

In [None]:
assistant_msg14_ = """

Heuristic value of current child: 41.0

41.0 >= 3.0

Heuristic value satisfies the minimum required threshold of 3.0.

Checking whether the current level is greater than or equal to the level limit...

Current level: 0

Level limit: 3

0 < 3

No it isn't. The search isn't over yet. Searching subtree.
---
~~~
Current level: 1
Index of current node: 0_1
State of current node:
4 - 1 = 3 (left: 1 3 9)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('''4 - 1 = 3 (left: 1 3 9)''', '''0_1''')

Observation:
"""

In [None]:
user_msg14_ = """1 + 3 = 4 (left: 4 9)
3 - 1 = 2 (left: 2 9)
9 - 1 = 8 (left: 3 8)
9 - 3 = 6 (left: 1 6)
9 / 3 = 3 (left: 1 3)
3 * 1 = 3 (left: 3 9)
9 / 1 = 9 (left: 3 9)
1 * 3 = 3 (left: 3 9)"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg14_})
messages.append({'role': "user", 'content': user_msg14_})

In [None]:
assistant_msg15_ = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_1_0: 1 + 3 = 4 (left: 4 9)
Node 0_1_1: 3 - 1 = 2 (left: 2 9)
Node 0_1_2: 9 - 1 = 8 (left: 3 8)
Node 0_1_3: 9 - 3 = 6 (left: 1 6)
Node 0_1_4: 9 / 3 = 3 (left: 1 3)
Node 0_1_5: 3 * 1 = 3 (left: 3 9)
Node 0_1_6: 9 / 1 = 9 (left: 3 9)
Node 0_1_7: 1 * 3 = 3 (left: 3 9)

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 1
Index of current node: 0_1
State of current node:
4 - 1 = 3 (left: 1 3 9)

Index of current child: 0_1_0
State of current child:
4 - 1 = 3 (left: 1 3 9)
1 + 3 = 4 (left: 4 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''4 - 1 = 3 (left: 1 3 9)\n1 + 3 = 4 (left: 4 9)''', '''0_1_0''')

Observation:
"""

In [None]:
user_msg15_ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg15_})
messages.append({'role': "user", 'content': user_msg15_})

In [None]:
assistant_msg16_ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_1
State of current node:
4 - 1 = 3 (left: 1 3 9)

Index of current child: 0_1_1
State of current child:
4 - 1 = 3 (left: 1 3 9)
3 - 1 = 2 (left: 2 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''4 - 1 = 3 (left: 1 3 9)\n3 - 1 = 2 (left: 2 9)''', '''0_1_1''')

Observation:
"""

In [None]:
user_msg16_ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg16_})
messages.append({'role': "user", 'content': user_msg16_})

In [None]:
assistant_msg17_ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_1
State of current node:
4 - 1 = 3 (left: 1 3 9)

Index of current child: 0_1_2
State of current child:
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''4 - 1 = 3 (left: 1 3 9)\n9 - 1 = 8 (left: 3 8)''', '''0_1_2''')

Observation:
"""

In [None]:
user_msg17_ = """60.0"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg17_})
messages.append({'role': "user", 'content': user_msg17_})

In [None]:
assistant_msg18_ = """

Heuristic value of current child: 60.0

60.0 >= 3.0

Heuristic value satisfies the minimum required threshold of 3.0.

Checking whether the current level is greater than or equal to the level limit...

Current level: 1

Level limit: 3

1 < 3

No it isn't. The search isn't over yet. Searching subtree.
---
~~~
Current level: 2
Index of current node: 0_1_2
State of current node:
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('''4 - 1 = 3 (left: 1 3 9)\n9 - 1 = 8 (left: 3 8)''', '''0_1_2''')

Observation:
"""

In [None]:
user_msg18_ = """3 + 8 = 11 (left: 11)
8 - 3 = 5 (left: 5)
8 / 3 = 2.67 (left: 2.67)
3 * 8 = 24 (left: 24)"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg18_})
messages.append({'role': "user", 'content': user_msg18_})

In [None]:
assistant_msg19_ = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_1_2_0: 3 + 8 = 11 (left: 11)
Node 0_1_2_1: 8 - 3 = 5 (left: 5)
Node 0_1_2_2: 8 / 3 = 2.67 (left: 2.67)
Node 0_1_2_3: 3 * 8 = 24 (left: 24)

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 2
Index of current node: 0_1_2
State of current node:
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)

Index of current child: 0_1_2_0
State of current child:
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)
3 + 8 = 11 (left: 11)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''4 - 1 = 3 (left: 1 3 9)\n9 - 1 = 8 (left: 3 8)\n3 + 8 = 11 (left: 11)''', '''0_1_2_0''')

Observation:
"""

In [None]:
user_msg19_ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg19_})
messages.append({'role': "user", 'content': user_msg19_})

In [None]:
assistant_msg20_ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 2
Index of current node: 0_1_2
State of current node:
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)

Index of current child: 0_1_2_1
State of current child:
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)
8 - 3 = 5 (left: 5)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''4 - 1 = 3 (left: 1 3 9)\n9 - 1 = 8 (left: 3 8)\n8 - 3 = 5 (left: 5)''', '''0_1_2_1''')

Observation:
"""

In [None]:
user_msg20_ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg20_})
messages.append({'role': "user", 'content': user_msg20_})

In [None]:
assistant_msg21_ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 2
Index of current node: 0_1_2
State of current node:
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)

Index of current child: 0_1_2_2
State of current child:
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)
8 / 3 = 2.67 (left: 2.67)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''4 - 1 = 3 (left: 1 3 9)\n9 - 1 = 8 (left: 3 8)\n8 / 3 = 2.67 (left: 2.67)''', '''0_1_2_2''')

Observation:
"""

In [None]:
user_msg21_ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg21_})
messages.append({'role': "user", 'content': user_msg21_})

In [None]:
assistant_msg22_ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 2
Index of current node: 0_1_2
State of current node:
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)

Index of current child: 0_1_2_3
State of current child:
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)
3 * 8 = 24 (left: 24)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''4 - 1 = 3 (left: 1 3 9)\n9 - 1 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)''', '''0_1_2_3''')

Observation:
"""

In [None]:
user_msg22_ = """60.0"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg22_})
messages.append({'role': "user", 'content': user_msg22_})

In [None]:
assistant_msg23_ = """

Heuristic value of current child: 60.0

60.0 >= 3.0

Heuristic value satisfies the minimum required threshold of 3.0.

Checking whether the current level is greater than or equal to the level limit...

Current level: 2

Level limit: 3

2 < 3

No it isn't. The search isn't over yet. Searching subtree.
---
~~~
Current level: 3
Index of current node: 0_1_2_3
State of current node:
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)
3 * 8 = 24 (left: 24)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('''4 - 1 = 3 (left: 1 3 9)\n9 - 1 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)''', '''0_1_2_3''')

Observation:
"""

In [None]:
user_msg23_ = """Answer: (4 - 1) * (9 - 1) = 24"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg23_})
messages.append({'role': "user", 'content': user_msg23_})

In [None]:
assistant_msg24_ = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_1_2_3_0: Answer: (4 - 1) * (9 - 1) = 24

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 3
Index of current node: 0_1_2_3
State of current node:
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)
3 * 8 = 24 (left: 24)

Index of current child: 0_1_2_3_0
State of current child:
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)
3 * 8 = 24 (left: 24)
Answer: (4 - 1) * (9 - 1) = 24

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''4 - 1 = 3 (left: 1 3 9)\n9 - 1 = 8 (left: 3 8)\n3 * 8 = 24 (left: 24)\nAnswer: (4 - 1) * (9 - 1) = 24''', '''0_1_2_3_0''')

Observation:
"""

In [None]:
user_msg24_ = """60.0"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg24_})
messages.append({'role': "user", 'content': user_msg24_})

In [None]:
assistant_msg25_ = """

Heuristic value of current child: 60.0

60.0 >= 3.0

Heuristic value satisfies the minimum required threshold of 3.0.

Checking whether the current level is greater than or equal to the level limit...

Current level: 3

Level limit: 3

3 >= 3

Yes it is. The search is over. Printing the state of the current child as the final solution...

Final solution:-
4 - 1 = 3 (left: 1 3 9)
9 - 1 = 8 (left: 3 8)
3 * 8 = 24 (left: 24)
Answer: (4 - 1) * (9 - 1) = 24

Reminder:-
Current level: 3
Index of current node: 0_1_2_3

Backtracking with flag 'TRUE'.
Action: backtracking('''True''', '''0_1_2_3''', '''0_1_2''')

Observation:
"""

In [None]:
user_msg25_ = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg25_})
messages.append({'role': "user", 'content': user_msg25_})

In [None]:
assistant_msg26_ = """
---
~~~
We're back at node: 0_1_2

Flag received: 'TRUE'

That is, searching the subtree starting at child node 0_1_2_3 was successful!

Reminder:-
Current level: 2
Index of current node: 0_1_2

Backtracking with flag 'TRUE'.
Action: backtracking('''True''', '''0_1_2''', '''0_1''')

Observation:
"""

In [None]:
user_msg26_ = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg26_})
messages.append({'role': "user", 'content': user_msg26_})

In [None]:
assistant_msg27_ = """
---
~~~
We're back at node: 0_1

Flag received: 'TRUE'

That is, searching the subtree starting at child node 0_1_2 was successful!

Reminder:-
Current level: 1
Index of current node: 0_1

Backtracking with flag 'TRUE'.
Action: backtracking('''True''', '''0_1''', '''0''')

Observation:
"""

In [None]:
user_msg27_ = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg27_})
messages.append({'role': "user", 'content': user_msg27_})

In [None]:
assistant_msg28_ = """
---
~~~
We're back at node: 0

Flag received: 'TRUE'

That is, searching the subtree starting at child node 0_1 was successful!

Reminder:-
Current level: 0
Index of current node: 0

Backtracking with flag 'TRUE'.
Action: backtracking('''True''', '''0''', '''None''')

Observation:
"""

In [None]:
user_msg28_ = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg28_})
messages.append({'role': "user", 'content': user_msg28_})

In [None]:
assistant_msg29_ = """
---
~~~
We're now outside the tree.

Flag received: 'TRUE'"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg29_})

In [None]:
# input_seq3 = '4 6 8 11'
input_seq3 = '''Four numbers: 4 6 8 11

Search parameters:-
Heuristic threshold: 3.0
Level limit: 3'''

In [None]:
messages.append({'role': "user", 'content': input_seq3})

In [None]:
assistant_msg1__ = """The four numbers are: 4 6 8 11

The minimum required heuristic threshold is: 3.0
The level limit is: 3

Starting the tree search...
---
~~~
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('', '0')

Observation:
"""

In [None]:
user_msg1__ = """4 + 6 = 10 (left: 8 10 11)
6 - 4 = 2 (left: 2 8 11)
11 - 4 = 7 (left: 6 7 8)
4 * 6 = 24 (left: 8 11 24)
8 / 4 = 2 (left: 2 6 11)
11 + 4 = 15 (left: 6 8 15)
6 * 4 = 24 (left: 8 11 24)
8 - 4 = 4 (left: 4 6 11)"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg1__})
messages.append({'role': "user", 'content': user_msg1__})

In [None]:
assistant_msg2__ = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_0: 4 + 6 = 10 (left: 8 10 11)
Node 0_1: 6 - 4 = 2 (left: 2 8 11)
Node 0_2: 11 - 4 = 7 (left: 6 7 8)
Node 0_3: 4 * 6 = 24 (left: 8 11 24)
Node 0_4: 8 / 4 = 2 (left: 2 6 11)
Node 0_5: 11 + 4 = 15 (left: 6 8 15)
Node 0_6: 6 * 4 = 24 (left: 8 11 24)
Node 0_7: 8 - 4 = 4 (left: 4 6 11)

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_0
State of current child:
4 + 6 = 10 (left: 8 10 11)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''4 + 6 = 10 (left: 8 10 11)''', '''0_0''')

Observation:
"""

In [None]:
user_msg2__ = """2.001"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg2__})
messages.append({'role': "user", 'content': user_msg2__})

In [None]:
assistant_msg3__ = """

Heuristic value of current child: 2.001

2.001 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_1
State of current child:
6 - 4 = 2 (left: 2 8 11)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''6 - 4 = 2 (left: 2 8 11)''', '''0_1''')

Observation:
"""

In [None]:
user_msg3__ = """1.002"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg3__})
messages.append({'role': "user", 'content': user_msg3__})

In [None]:
assistant_msg4__ = """

Heuristic value of current child: 1.002

1.002 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_2
State of current child:
11 - 4 = 7 (left: 6 7 8)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 - 4 = 7 (left: 6 7 8)''', '''0_2''')

Observation:
"""

In [None]:
user_msg4__ = """3.0"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg4__})
messages.append({'role': "user", 'content': user_msg4__})

In [None]:
assistant_msg5__ = """

Heuristic value of current child: 3.0

3.0 >= 3.0

Heuristic value satisfies the minimum required threshold of 3.0.

Checking whether the current level is greater than or equal to the level limit...

Current level: 0

Level limit: 3

0 < 3

No it isn't. The search isn't over yet. Searching subtree.
---
~~~
Current level: 1
Index of current node: 0_2
State of current node:
11 - 4 = 7 (left: 6 7 8)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('''11 - 4 = 7 (left: 6 7 8)''', '''0_2''')

Observation:
"""

In [None]:
user_msg5__ = """6 + 7 = 13 (left: 8 13)
7 - 6 = 1 (left: 1 8)
8 - 6 = 2 (left: 2 7)
8 - 7 = 1 (left: 1 6)
6 * 7 = 42 (left: 8 42)
7 / 6 = 1.17 (left: 1.17 8)
8 / 6 = 1.33 (left: 1.33 7)
8 / 7 = 1.14 (left: 1.14 6)"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg5__})
messages.append({'role': "user", 'content': user_msg5__})

In [None]:
assistant_msg6__ = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_2_0: 6 + 7 = 13 (left: 8 13)
Node 0_2_1: 7 - 6 = 1 (left: 1 8)
Node 0_2_2: 8 - 6 = 2 (left: 2 7)
Node 0_2_3: 8 - 7 = 1 (left: 1 6)
Node 0_2_4: 6 * 7 = 42 (left: 8 42)
Node 0_2_5: 7 / 6 = 1.17 (left: 1.17 8)
Node 0_2_6: 8 / 6 = 1.33 (left: 1.33 7)
Node 0_2_7: 8 / 7 = 1.14 (left: 1.14 6)

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 1
Index of current node: 0_2
State of current node:
11 - 4 = 7 (left: 6 7 8)

Index of current child: 0_2_0
State of current child:
11 - 4 = 7 (left: 6 7 8)
6 + 7 = 13 (left: 8 13)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 - 4 = 7 (left: 6 7 8)\n6 + 7 = 13 (left: 8 13)''', '''0_2_0''')

Observation:
"""

In [None]:
user_msg6__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg6__})
messages.append({'role': "user", 'content': user_msg6__})

In [None]:
assistant_msg7__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_2
State of current node:
11 - 4 = 7 (left: 6 7 8)

Index of current child: 0_2_1
State of current child:
11 - 4 = 7 (left: 6 7 8)
7 - 6 = 1 (left: 1 8)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 - 4 = 7 (left: 6 7 8)\n7 - 6 = 1 (left: 1 8)''', '''0_2_1''')

Observation:
"""

In [None]:
user_msg7__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg7__})
messages.append({'role': "user", 'content': user_msg7__})

In [None]:
assistant_msg8__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_2
State of current node:
11 - 4 = 7 (left: 6 7 8)

Index of current child: 0_2_2
State of current child:
11 - 4 = 7 (left: 6 7 8)
8 - 6 = 2 (left: 2 7)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 - 4 = 7 (left: 6 7 8)\n8 - 6 = 2 (left: 2 7)''', '''0_2_2''')

Observation:
"""

In [None]:
user_msg8__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg8__})
messages.append({'role': "user", 'content': user_msg8__})

In [None]:
assistant_msg9__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_2
State of current node:
11 - 4 = 7 (left: 6 7 8)

Index of current child: 0_2_3
State of current child:
11 - 4 = 7 (left: 6 7 8)
8 - 7 = 1 (left: 1 6)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 - 4 = 7 (left: 6 7 8)\n8 - 7 = 1 (left: 1 6)''', '''0_2_3''')

Observation:
"""

In [None]:
user_msg9__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg9__})
messages.append({'role': "user", 'content': user_msg9__})

In [None]:
assistant_msg10__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_2
State of current node:
11 - 4 = 7 (left: 6 7 8)

Index of current child: 0_2_4
State of current child:
11 - 4 = 7 (left: 6 7 8)
6 * 7 = 42 (left: 8 42)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 - 4 = 7 (left: 6 7 8)\n6 * 7 = 42 (left: 8 42)''', '''0_2_4''')

Observation:
"""

In [None]:
user_msg10__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg10__})
messages.append({'role': "user", 'content': user_msg10__})

In [None]:
assistant_msg11__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_2
State of current node:
11 - 4 = 7 (left: 6 7 8)

Index of current child: 0_2_5
State of current child:
11 - 4 = 7 (left: 6 7 8)
7 / 6 = 1.17 (left: 1.17 8)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 - 4 = 7 (left: 6 7 8)\n7 / 6 = 1.17 (left: 1.17 8)''', '''0_2_5''')

Observation:
"""

In [None]:
user_msg11__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg11__})
messages.append({'role': "user", 'content': user_msg11__})

In [None]:
assistant_msg12__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_2
State of current node:
11 - 4 = 7 (left: 6 7 8)

Index of current child: 0_2_6
State of current child:
11 - 4 = 7 (left: 6 7 8)
8 / 6 = 1.33 (left: 1.33 7)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 - 4 = 7 (left: 6 7 8)\n8 / 6 = 1.33 (left: 1.33 7)''', '''0_2_6''')

Observation:
"""

In [None]:
user_msg12__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg12__})
messages.append({'role': "user", 'content': user_msg12__})

In [None]:
assistant_msg13__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_2
State of current node:
11 - 4 = 7 (left: 6 7 8)

Index of current child: 0_2_7
State of current child:
11 - 4 = 7 (left: 6 7 8)
8 / 7 = 1.14 (left: 1.14 6)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 - 4 = 7 (left: 6 7 8)\n8 / 7 = 1.14 (left: 1.14 6)''', '''0_2_7''')

Observation:
"""

In [None]:
user_msg13__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg13__})
messages.append({'role': "user", 'content': user_msg13__})

In [None]:
assistant_msg14__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
No more children remaining. All child nodes have been traversed.

None of the child nodes led to success. Seems like a dead end.

Reminder:-
Current level: 1
Index of current node: 0_2

Backtracking with flag 'FALSE'.
Action: backtracking('''False''', '''0_2''', '''0''')

Observation:
"""

In [None]:
user_msg14__ = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg14__})
messages.append({'role': "user", 'content': user_msg14__})

In [None]:
assistant_msg15__ = """
---
~~~
We're back at node: 0

Flag received: 'FALSE'

That is, searching the subtree starting at child node 0_2 was unsuccessful!

Trying the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_3
State of current child:
4 * 6 = 24 (left: 8 11 24)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''4 * 6 = 24 (left: 8 11 24)''', '''0_3''')

Observation:
"""

In [None]:
user_msg15__ = """1.002"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg15__})
messages.append({'role': "user", 'content': user_msg15__})

In [None]:
assistant_msg16__ = """

Heuristic value of current child: 1.002

1.002 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_4
State of current child:
8 / 4 = 2 (left: 2 6 11)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''8 / 4 = 2 (left: 2 6 11)''', '''0_4''')

Observation:
"""

In [None]:
user_msg16__ = """1.002"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg16__})
messages.append({'role': "user", 'content': user_msg16__})

In [None]:
assistant_msg17__ = """

Heuristic value of current child: 1.002

1.002 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_5
State of current child:
11 + 4 = 15 (left: 6 8 15)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 + 4 = 15 (left: 6 8 15)''', '''0_5''')

Observation:
"""

In [None]:
user_msg17__ = """3.0"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg17__})
messages.append({'role': "user", 'content': user_msg17__})

In [None]:
assistant_msg18__ = """

Heuristic value of current child: 3.0

3.0 >= 3.0

Heuristic value satisfies the minimum required threshold of 3.0.

Checking whether the current level is greater than or equal to the level limit...

Current level: 0

Level limit: 3

0 < 3

No it isn't. The search isn't over yet. Searching subtree.
---
~~~
Current level: 1
Index of current node: 0_5
State of current node:
11 + 4 = 15 (left: 6 8 15)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('''11 + 4 = 15 (left: 6 8 15)''', '''0_5''')

Observation:
"""

In [None]:
user_msg18__ = """6 + 8 = 14 (left: 14 15)
8 - 6 = 2 (left: 2 15)
15 - 6 = 9 (left: 8 9)
15 / 3 = 5 (left: 5 8)
15 - 8 = 7 (left: 6 7)
8 * 6 = 48 (left: 15 48)
15 + 6 = 21 (left: 8 21)
8 + 6 = 14 (left: 14 15)"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg18__})
messages.append({'role': "user", 'content': user_msg18__})

In [None]:
assistant_msg19__ = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_5_0: 6 + 8 = 14 (left: 14 15)
Node 0_5_1: 8 - 6 = 2 (left: 2 15)
Node 0_5_2: 15 - 6 = 9 (left: 8 9)
Node 0_5_3: 15 / 3 = 5 (left: 5 8)
Node 0_5_4: 15 - 8 = 7 (left: 6 7)
Node 0_5_5: 8 * 6 = 48 (left: 15 48)
Node 0_5_6: 15 + 6 = 21 (left: 8 21)
Node 0_5_7: 8 + 6 = 14 (left: 14 15)

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 1
Index of current node: 0_5
State of current node:
11 + 4 = 15 (left: 6 8 15)

Index of current child: 0_5_0
State of current child:
11 + 4 = 15 (left: 6 8 15)
6 + 8 = 14 (left: 14 15)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 + 4 = 15 (left: 6 8 15)\n6 + 8 = 14 (left: 14 15)''', '''0_5_0''')

Observation:
"""

In [None]:
user_msg19__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg19__})
messages.append({'role': "user", 'content': user_msg19__})

In [None]:
assistant_msg20__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_5
State of current node:
11 + 4 = 15 (left: 6 8 15)

Index of current child: 0_5_1
State of current child:
11 + 4 = 15 (left: 6 8 15)
8 - 6 = 2 (left: 2 15)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 + 4 = 15 (left: 6 8 15)\n8 - 6 = 2 (left: 2 15)''', '''0_5_1''')

Observation:
"""

In [None]:
user_msg20__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg20__})
messages.append({'role': "user", 'content': user_msg20__})

In [None]:
assistant_msg21__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_5
State of current node:
11 + 4 = 15 (left: 6 8 15)

Index of current child: 0_5_2
State of current child:
11 + 4 = 15 (left: 6 8 15)
15 - 6 = 9 (left: 8 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 + 4 = 15 (left: 6 8 15)\n15 - 6 = 9 (left: 8 9)''', '''0_5_2''')

Observation:
"""

In [None]:
user_msg21__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg21__})
messages.append({'role': "user", 'content': user_msg21__})

In [None]:
assistant_msg22__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_5
State of current node:
11 + 4 = 15 (left: 6 8 15)

Index of current child: 0_5_3
State of current child:
11 + 4 = 15 (left: 6 8 15)
15 / 3 = 5 (left: 5 8)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 + 4 = 15 (left: 6 8 15)\n15 / 3 = 5 (left: 5 8)''', '''0_5_3''')

Observation:
"""

In [None]:
user_msg22__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg22__})
messages.append({'role': "user", 'content': user_msg22__})

In [None]:
assistant_msg23__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_5
State of current node:
11 + 4 = 15 (left: 6 8 15)

Index of current child: 0_5_4
State of current child:
11 + 4 = 15 (left: 6 8 15)
15 - 8 = 7 (left: 6 7)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 + 4 = 15 (left: 6 8 15)\n15 - 8 = 7 (left: 6 7)''', '''0_5_4''')

Observation:
"""

In [None]:
user_msg23__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg23__})
messages.append({'role': "user", 'content': user_msg23__})

In [None]:
assistant_msg24__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_5
State of current node:
11 + 4 = 15 (left: 6 8 15)

Index of current child: 0_5_5
State of current child:
11 + 4 = 15 (left: 6 8 15)
8 * 6 = 48 (left: 15 48)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 + 4 = 15 (left: 6 8 15)\n8 * 6 = 48 (left: 15 48)''', '''0_5_5''')

Observation:
"""

In [None]:
user_msg24__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg24__})
messages.append({'role': "user", 'content': user_msg24__})

In [None]:
assistant_msg25__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_5
State of current node:
11 + 4 = 15 (left: 6 8 15)

Index of current child: 0_5_6
State of current child:
11 + 4 = 15 (left: 6 8 15)
15 + 6 = 21 (left: 8 21)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 + 4 = 15 (left: 6 8 15)\n15 + 6 = 21 (left: 8 21)''', '''0_5_6''')

Observation:
"""

In [None]:
user_msg25__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg25__})
messages.append({'role': "user", 'content': user_msg25__})

In [None]:
assistant_msg26__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_5
State of current node:
11 + 4 = 15 (left: 6 8 15)

Index of current child: 0_5_7
State of current child:
11 + 4 = 15 (left: 6 8 15)
8 + 6 = 14 (left: 14 15)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''11 + 4 = 15 (left: 6 8 15)\n8 + 6 = 14 (left: 14 15)''', '''0_5_7''')

Observation:
"""

In [None]:
user_msg26__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg26__})
messages.append({'role': "user", 'content': user_msg26__})

In [None]:
assistant_msg27__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
No more children remaining. All child nodes have been traversed.

None of the child nodes led to success. Seems like a dead end.

Reminder:-
Current level: 1
Index of current node: 0_5

Backtracking with flag 'FALSE'.
Action: backtracking('''False''', '''0_5''', '''0''')

Observation:
"""

In [None]:
user_msg27__ = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg27__})
messages.append({'role': "user", 'content': user_msg27__})

In [None]:
assistant_msg28__ = """
---
~~~
We're back at node: 0

Flag received: 'FALSE'

That is, searching the subtree starting at child node 0_5 was unsuccessful!

Trying the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_6
State of current child:
6 * 4 = 24 (left: 8 11 24)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''6 * 4 = 24 (left: 8 11 24)''', '''0_6''')

Observation:
"""

In [None]:
user_msg28__ = """2.001"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg28__})
messages.append({'role': "user", 'content': user_msg28__})

In [None]:
assistant_msg29__ = """

Heuristic value of current child: 2.001

2.001 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_7
State of current child:
8 - 4 = 4 (left: 4 6 11)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''8 - 4 = 4 (left: 4 6 11)''', '''0_7''')

Observation:
"""

In [None]:
user_msg29__ = """3.0"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg29__})
messages.append({'role': "user", 'content': user_msg29__})

In [None]:
assistant_msg30__ = """

Heuristic value of current child: 3.0

3.0 >= 3.0

Heuristic value satisfies the minimum required threshold of 3.0.

Checking whether the current level is greater than or equal to the level limit...

Current level: 0

Level limit: 3

0 < 3

No it isn't. The search isn't over yet. Searching subtree.
---
~~~
Current level: 1
Index of current node: 0_7
State of current node:
8 - 4 = 4 (left: 4 6 11)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('''8 - 4 = 4 (left: 4 6 11)''', '''0_7''')

Observation:
"""

In [None]:
user_msg30__ = """4 + 6 = 10 (left: 10 11)
6 - 4 = 2 (left: 2 11)
11 - 4 = 7 (left: 6 7)
11 - 6 = 5 (left: 4 5)
4 * 6 = 24 (left: 11 24)
11 / 4 = 2.75 (left: 2.75 6)
6 / 4 = 1.5 (left: 1.5 11)"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg30__})
messages.append({'role': "user", 'content': user_msg30__})

In [None]:
assistant_msg31__ = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_7_0: 4 + 6 = 10 (left: 10 11)
Node 0_7_1: 6 - 4 = 2 (left: 2 11)
Node 0_7_2: 11 - 4 = 7 (left: 6 7)
Node 0_7_3: 11 - 6 = 5 (left: 4 5)
Node 0_7_4: 4 * 6 = 24 (left: 11 24)
Node 0_7_5: 11 / 4 = 2.75 (left: 2.75 6)
Node 0_7_6: 6 / 4 = 1.5 (left: 1.5 11)

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 1
Index of current node: 0_7
State of current node:
8 - 4 = 4 (left: 4 6 11)

Index of current child: 0_7_0
State of current child:
8 - 4 = 4 (left: 4 6 11)
4 + 6 = 10 (left: 10 11)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''8 - 4 = 4 (left: 4 6 11)\n4 + 6 = 10 (left: 10 11)''', '''0_7_0''')

Observation:
"""

In [None]:
user_msg31__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg31__})
messages.append({'role': "user", 'content': user_msg31__})

In [None]:
assistant_msg32__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_7
State of current node:
8 - 4 = 4 (left: 4 6 11)

Index of current child: 0_7_1
State of current child:
8 - 4 = 4 (left: 4 6 11)
6 - 4 = 2 (left: 2 11)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''8 - 4 = 4 (left: 4 6 11)\n6 - 4 = 2 (left: 2 11)''', '''0_7_1''')

Observation:
"""

In [None]:
user_msg32__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg32__})
messages.append({'role': "user", 'content': user_msg32__})

In [None]:
assistant_msg33__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_7
State of current node:
8 - 4 = 4 (left: 4 6 11)

Index of current child: 0_7_2
State of current child:
8 - 4 = 4 (left: 4 6 11)
11 - 4 = 7 (left: 6 7)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''8 - 4 = 4 (left: 4 6 11)\n11 - 4 = 7 (left: 6 7)''', '''0_7_2''')

Observation:
"""

In [None]:
user_msg33__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg33__})
messages.append({'role': "user", 'content': user_msg33__})

In [None]:
assistant_msg34__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_7
State of current node:
8 - 4 = 4 (left: 4 6 11)

Index of current child: 0_7_3
State of current child:
8 - 4 = 4 (left: 4 6 11)
11 - 6 = 5 (left: 4 5)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''8 - 4 = 4 (left: 4 6 11)\n11 - 6 = 5 (left: 4 5)''', '''0_7_3''')

Observation:
"""

In [None]:
user_msg34__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg34__})
messages.append({'role': "user", 'content': user_msg34__})

In [None]:
assistant_msg35__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_7
State of current node:
8 - 4 = 4 (left: 4 6 11)

Index of current child: 0_7_4
State of current child:
8 - 4 = 4 (left: 4 6 11)
4 * 6 = 24 (left: 11 24)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''8 - 4 = 4 (left: 4 6 11)\n4 * 6 = 24 (left: 11 24)''', '''0_7_4''')

Observation:
"""

In [None]:
user_msg35__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg35__})
messages.append({'role': "user", 'content': user_msg35__})

In [None]:
assistant_msg36__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_7
State of current node:
8 - 4 = 4 (left: 4 6 11)

Index of current child: 0_7_5
State of current child:
8 - 4 = 4 (left: 4 6 11)
11 / 4 = 2.75 (left: 2.75 6)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''8 - 4 = 4 (left: 4 6 11)\n11 / 4 = 2.75 (left: 2.75 6)''', '''0_7_5''')

Observation:
"""

In [None]:
user_msg36__ = """2.001"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg36__})
messages.append({'role': "user", 'content': user_msg36__})

In [None]:
assistant_msg37__ = """

Heuristic value of current child: 2.001

2.001 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_7
State of current node:
8 - 4 = 4 (left: 4 6 11)

Index of current child: 0_7_6
State of current child:
8 - 4 = 4 (left: 4 6 11)
6 / 4 = 1.5 (left: 1.5 11)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''8 - 4 = 4 (left: 4 6 11)\n6 / 4 = 1.5 (left: 1.5 11)''', '''0_7_6''')

Observation:
"""

In [None]:
user_msg37__ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg37__})
messages.append({'role': "user", 'content': user_msg37__})

In [None]:
assistant_msg38__ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
No more children remaining. All child nodes have been traversed.

None of the child nodes led to success. Seems like a dead end.

Reminder:-
Current level: 1
Index of current node: 0_7

Backtracking with flag 'FALSE'.
Action: backtracking('''False''', '''0_7''', '''0''')

Observation:
"""

In [None]:
user_msg38__ = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg38__})
messages.append({'role': "user", 'content': user_msg38__})

In [None]:
assistant_msg39__ = """
---
~~~
We're back at node: 0

Flag received: 'FALSE'

That is, searching the subtree starting at child node 0_7 was unsuccessful!

Trying the next child.
---
No more children remaining. All child nodes have been traversed.

None of the child nodes led to success. Seems like a dead end.

Reminder:-
Current level: 0
Index of current node: 0

Backtracking with flag 'FALSE'.
Action: backtracking('''False''', '''0''', '''None''')

Observation:
"""

In [None]:
user_msg39__ = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg39__})
messages.append({'role': "user", 'content': user_msg39__})

In [None]:
assistant_msg40__ = """
---
~~~
We're now outside the tree.

Flag received: 'FALSE'"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg40__})

In [None]:
# input_seq4 = '2 9 10 12'
input_seq4 = '''Four numbers: 2 9 10 12

Search parameters:-
Heuristic threshold: 3.0
Level limit: 3'''

In [None]:
messages.append({'role': "user", 'content': input_seq4})

In [None]:
assistant_msg1___ = """The four numbers are: 2 9 10 12

The minimum required heuristic threshold is: 3.0
The level limit is: 3

Starting the tree search...
---
~~~
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('', '0')

Observation:
"""

In [None]:
user_msg1___ = """2 + 9 = 11 (left: 10 11 12)
12 - 2 = 10 (left: 9 10 10)
10 - 2 = 8 (left: 8 9 12)
12 / 2 = 6 (left: 6 9 10)
10 / 2 = 5 (left: 5 9 12)
9 - 2 = 7 (left: 7 10 12)
12 - 9 = 3 (left: 2 3 10)
10 - 9 = 1 (left: 1 2 12)"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg1___})
messages.append({'role': "user", 'content': user_msg1___})

In [None]:
assistant_msg2___ = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_0: 2 + 9 = 11 (left: 10 11 12)
Node 0_1: 12 - 2 = 10 (left: 9 10 10)
Node 0_2: 10 - 2 = 8 (left: 8 9 12)
Node 0_3: 12 / 2 = 6 (left: 6 9 10)
Node 0_4: 10 / 2 = 5 (left: 5 9 12)
Node 0_5: 9 - 2 = 7 (left: 7 10 12)
Node 0_6: 12 - 9 = 3 (left: 2 3 10)
Node 0_7: 10 - 9 = 1 (left: 1 2 12)

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_0
State of current child:
2 + 9 = 11 (left: 10 11 12)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''2 + 9 = 11 (left: 10 11 12)''', '''0_0''')

Observation:
"""

In [None]:
user_msg2___ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg2___})
messages.append({'role': "user", 'content': user_msg2___})

In [None]:
assistant_msg3___ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_1
State of current child:
12 - 2 = 10 (left: 9 10 10)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''12 - 2 = 10 (left: 9 10 10)''', '''0_1''')

Observation:
"""

In [None]:
user_msg3___ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg3___})
messages.append({'role': "user", 'content': user_msg3___})

In [None]:
assistant_msg4___ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Index of current child: 0_2
State of current child:
10 - 2 = 8 (left: 8 9 12)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''10 - 2 = 8 (left: 8 9 12)''', '''0_2''')

Observation:
"""

In [None]:
user_msg4___ = """3.0"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg4___})
messages.append({'role': "user", 'content': user_msg4___})

In [None]:
assistant_msg5___ = """

Heuristic value of current child: 3.0

3.0 >= 3.0

Heuristic value satisfies the minimum required threshold of 3.0.

Checking whether the current level is greater than or equal to the level limit...

Current level: 0

Level limit: 3

0 < 3

No it isn't. The search isn't over yet. Searching subtree.
---
~~~
Current level: 1
Index of current node: 0_2
State of current node:
10 - 2 = 8 (left: 8 9 12)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('''10 - 2 = 8 (left: 8 9 12)''', '''0_2''')

Observation:
"""

In [None]:
user_msg5___ = """8 + 9 = 17 (left: 12 17)
9 - 8 = 1 (left: 1 12)
12 - 8 = 4 (left: 4 9)
12 - 9 = 3 (left: 3 8)
12 / 8 = 1.5 (left: 1.5 9)
9 * 8 = 72 (left: 12 72)
8 * 12 = 96 (left: 9 96)"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg5___})
messages.append({'role': "user", 'content': user_msg5___})

In [None]:
assistant_msg6___ = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_2_0: 8 + 9 = 17 (left: 12 17)
Node 0_2_1: 9 - 8 = 1 (left: 1 12)
Node 0_2_2: 12 - 8 = 4 (left: 4 9)
Node 0_2_3: 12 - 9 = 3 (left: 3 8)
Node 0_2_4: 12 / 8 = 1.5 (left: 1.5 9)
Node 0_2_5: 9 * 8 = 72 (left: 12 72)
Node 0_2_6: 8 * 12 = 96 (left: 9 96)

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 1
Index of current node: 0_2
State of current node:
10 - 2 = 8 (left: 8 9 12)

Index of current child: 0_2_0
State of current child:
10 - 2 = 8 (left: 8 9 12)
8 + 9 = 17 (left: 12 17)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''10 - 2 = 8 (left: 8 9 12)\n8 + 9 = 17 (left: 12 17)''', '''0_2_0''')

Observation:
"""

In [None]:
user_msg6___ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg6___})
messages.append({'role': "user", 'content': user_msg6___})

In [None]:
assistant_msg7___ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_2
State of current node:
10 - 2 = 8 (left: 8 9 12)

Index of current child: 0_2_1
State of current child:
10 - 2 = 8 (left: 8 9 12)
9 - 8 = 1 (left: 1 12)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''10 - 2 = 8 (left: 8 9 12)\n9 - 8 = 1 (left: 1 12)''', '''0_2_1''')

Observation:
"""

In [None]:
user_msg7___ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg7___})
messages.append({'role': "user", 'content': user_msg7___})

In [None]:
assistant_msg8___ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_2
State of current node:
10 - 2 = 8 (left: 8 9 12)

Index of current child: 0_2_2
State of current child:
10 - 2 = 8 (left: 8 9 12)
12 - 8 = 4 (left: 4 9)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''10 - 2 = 8 (left: 8 9 12)\n12 - 8 = 4 (left: 4 9)''', '''0_2_2''')

Observation:
"""

In [None]:
user_msg8___ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg8___})
messages.append({'role': "user", 'content': user_msg8___})

In [None]:
assistant_msg9___ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 1
Index of current node: 0_2
State of current node:
10 - 2 = 8 (left: 8 9 12)

Index of current child: 0_2_3
State of current child:
10 - 2 = 8 (left: 8 9 12)
12 - 9 = 3 (left: 3 8)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''10 - 2 = 8 (left: 8 9 12)\n12 - 9 = 3 (left: 3 8)''', '''0_2_3''')

Observation:
"""

In [None]:
user_msg9___ = """60.0"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg9___})
messages.append({'role': "user", 'content': user_msg9___})

In [None]:
assistant_msg10___ = """

Heuristic value of current child: 60.0

60.0 >= 3.0

Heuristic value satisfies the minimum required threshold of 3.0.

Checking whether the current level is greater than or equal to the level limit...

Current level: 1

Level limit: 3

1 < 3

No it isn't. The search isn't over yet. Searching subtree.
---
~~~
Current level: 2
Index of current node: 0_2_3
State of current node:
10 - 2 = 8 (left: 8 9 12)
12 - 9 = 3 (left: 3 8)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('''10 - 2 = 8 (left: 8 9 12)\n12 - 9 = 3 (left: 3 8)''', '''0_2_3''')

Observation:
"""

In [None]:
user_msg10___ = """3 + 8 = 11 (left: 11)
8 - 3 = 5 (left: 5)
3 * 8 = 24 (left: 24)
8 / 3 = 2.67 (left: 2.67)"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg10___})
messages.append({'role': "user", 'content': user_msg10___})

In [None]:
assistant_msg11___ = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_2_3_0: 3 + 8 = 11 (left: 11)
Node 0_2_3_1: 8 - 3 = 5 (left: 5)
Node 0_2_3_2: 3 * 8 = 24 (left: 24)
Node 0_2_3_3: 8 / 3 = 2.67 (left: 2.67)

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 2
Index of current node: 0_2_3
State of current node:
10 - 2 = 8 (left: 8 9 12)
12 - 9 = 3 (left: 3 8)

Index of current child: 0_2_3_0
State of current child:
10 - 2 = 8 (left: 8 9 12)
12 - 9 = 3 (left: 3 8)
3 + 8 = 11 (left: 11)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''10 - 2 = 8 (left: 8 9 12)\n12 - 9 = 3 (left: 3 8)\n3 + 8 = 11 (left: 11)''', '''0_2_3_0''')

Observation:
"""

In [None]:
user_msg11___ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg11___})
messages.append({'role': "user", 'content': user_msg11___})

In [None]:
assistant_msg12___ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 2
Index of current node: 0_2_3
State of current node:
10 - 2 = 8 (left: 8 9 12)
12 - 9 = 3 (left: 3 8)

Index of current child: 0_2_3_1
State of current child:
10 - 2 = 8 (left: 8 9 12)
12 - 9 = 3 (left: 3 8)
8 - 3 = 5 (left: 5)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''10 - 2 = 8 (left: 8 9 12)\n12 - 9 = 3 (left: 3 8)\n8 - 3 = 5 (left: 5)''', '''0_2_3_1''')

Observation:
"""

In [None]:
user_msg12___ = """0.003"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg12___})
messages.append({'role': "user", 'content': user_msg12___})

In [None]:
assistant_msg13___ = """

Heuristic value of current child: 0.003

0.003 < 3.0

Heuristic value fails to satisfy the minimum required threshold of 3.0. Moving on to the next child.
---
Reminder:-
Current level: 2
Index of current node: 0_2_3
State of current node:
10 - 2 = 8 (left: 8 9 12)
12 - 9 = 3 (left: 3 8)

Index of current child: 0_2_3_2
State of current child:
10 - 2 = 8 (left: 8 9 12)
12 - 9 = 3 (left: 3 8)
3 * 8 = 24 (left: 24)

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''10 - 2 = 8 (left: 8 9 12)\n12 - 9 = 3 (left: 3 8)\n3 * 8 = 24 (left: 24)''', '''0_2_3_2''')

Observation:
"""

In [None]:
user_msg13___ = """60.0"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg13___})
messages.append({'role': "user", 'content': user_msg13___})

In [None]:
assistant_msg14___ = """

Heuristic value of current child: 60.0

60.0 >= 3.0

Heuristic value satisfies the minimum required threshold of 3.0.

Checking whether the current level is greater than or equal to the level limit...

Current level: 2

Level limit: 3

2 < 3

No it isn't. The search isn't over yet. Searching subtree.
---
~~~
Current level: 3
Index of current node: 0_2_3_2
State of current node:
10 - 2 = 8 (left: 8 9 12)
12 - 9 = 3 (left: 3 8)
3 * 8 = 24 (left: 24)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('''10 - 2 = 8 (left: 8 9 12)\n12 - 9 = 3 (left: 3 8)\n3 * 8 = 24 (left: 24)''', '''0_2_3_2''')

Observation:
"""

In [None]:
user_msg14___ = """Answer: (10 - 2) * (12 - 9) = 24"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg14___})
messages.append({'role': "user", 'content': user_msg14___})

In [None]:
assistant_msg15___ = """

Each of the above thought candidates has been added as a child node of the current node:

Node 0_2_3_2_0: Answer: (10 - 2) * (12 - 9) = 24

Looping through the child nodes one at a time...
---
Reminder:-
Current level: 3
Index of current node: 0_2_3_2
State of current node:
10 - 2 = 8 (left: 8 9 12)
12 - 9 = 3 (left: 3 8)
3 * 8 = 24 (left: 24)

Index of current child: 0_2_3_2_0
State of current child:
10 - 2 = 8 (left: 8 9 12)
12 - 9 = 3 (left: 3 8)
3 * 8 = 24 (left: 24)
Answer: (10 - 2) * (12 - 9) = 24

Using the state evaluator tool to obtain heuristic value...

Action: state_evaluator('''10 - 2 = 8 (left: 8 9 12)\n12 - 9 = 3 (left: 3 8)\n3 * 8 = 24 (left: 24)\nAnswer: (10 - 2) * (12 - 9) = 24''', '''0_2_3_2_0''')

Observation:
"""

In [None]:
user_msg15___ = """40.001"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg15___})
messages.append({'role': "user", 'content': user_msg15___})

In [None]:
assistant_msg16___ = """

Heuristic value of current child: 40.001

40.001 >= 3.0

Heuristic value satisfies the minimum required threshold of 3.0.

Checking whether the current level is greater than or equal to the level limit...

Current level: 3

Level limit: 3

3 >= 3

Yes it is. The search is over. Printing the state of the current child as the final solution...

Final solution:-
10 - 2 = 8 (left: 8 9 12)
12 - 9 = 3 (left: 3 8)
3 * 8 = 24 (left: 24)
Answer: (10 - 2) * (12 - 9) = 24

Reminder:-
Current level: 3
Index of current node: 0_2_3_2

Backtracking with flag 'TRUE'.
Action: backtracking('''True''', '''0_2_3_2''', '''0_2_3''')

Observation:
"""

In [None]:
user_msg16___ = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg16___})
messages.append({'role': "user", 'content': user_msg16___})

In [None]:
assistant_msg17___ = """
---
~~~
We're back at node: 0_2_3

Flag received: 'TRUE'

That is, searching the subtree starting at child node 0_2_3_2 was successful!

Reminder:-
Current level: 2
Index of current node: 0_2_3

Backtracking with flag 'TRUE'.
Action: backtracking('''True''', '''0_2_3''', '''0_2''')

Observation:
"""

In [None]:
user_msg17___ = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg17___})
messages.append({'role': "user", 'content': user_msg17___})

In [None]:
assistant_msg18___ = """
---
~~~
We're back at node: 0_2

Flag received: 'TRUE'

That is, searching the subtree starting at child node 0_2_3 was successful!

Reminder:-
Current level: 1
Index of current node: 0_2

Backtracking with flag 'TRUE'.
Action: backtracking('''True''', '''0_2''', '''0''')

Observation:
"""

In [None]:
user_msg18___ = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg18___})
messages.append({'role': "user", 'content': user_msg18___})

In [None]:
assistant_msg19___ = """
---
~~~
We're back at node: 0

Flag received: 'TRUE'

That is, searching the subtree starting at child node 0_2 was successful!

Reminder:-
Current level: 0
Index of current node: 0

Backtracking with flag 'TRUE'.
Action: backtracking('''True''', '''0''', '''None''')

Observation:
"""

In [None]:
user_msg19___ = """Ok."""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg19___})
messages.append({'role': "user", 'content': user_msg19___})

In [None]:
assistant_msg20___ = """
---
~~~
We're now outside the tree.

Flag received: 'TRUE'"""

In [None]:
messages.append({'role': "assistant", 'content': assistant_msg20___})

### React Class with Evaluation

In [None]:
from collections import OrderedDict
import re
from typing import Optional, Callable, List, Dict, Any
from openai import OpenAI
from typing import Dict, Any
from enum import Enum



class ValidationResult(Enum):
    PASS = "PASS"
    FAIL = "FAIL"

class FewShotTreeSearchStackReAct:
    level_limit = 3
    hurestic_threshold = 3.0
    level_limit_triggered = True # Is this logic correct of keeping a flag to check if level limit is triggered?

    def __init__(
            self,
            client: OpenAI,
            model: str,
            messages: List[dict],
            input_seq: str,
            get_thought_gen_prompt: Callable,
            get_state_eval_prompt: Callable,
            heuristic_calculator: Callable,
            n_evals: int = 3,
            input_seq_id: str = None,
            run_name: str = None
    ):
        self.client = client
        self.model = model
        self.messages = messages
        self.input_seq = input_seq
        self.messages.append({'role': "user", 'content': input_seq})
        self.get_thought_gen_prompt = get_thought_gen_prompt
        self.get_state_eval_prompt = get_state_eval_prompt
        self.heuristic_calculator = heuristic_calculator
        self.n_evals = n_evals
        self.trace_tree: Dict[str, Any] = {}

        # Stack and validation log for evaluation
        self.stack: List[str] = []
        self.validation_log: List[Dict[str, Any]] = []
        self.validation_report = []
        self.last_popped_node: Optional[str] = None # Track last popped node for sequential validation
        self.true_backtrack_mode = False # Track if we're in cascading True backtrack mode
        self.input_seq_id = input_seq_id
        self.run_name = run_name

    def chat_completion(
            self,
            messages: List[dict],
            model: str,
            temperature: float = 0.2,
            max_tokens: int = 4096,
            n: int = 1,
            stop: str = "\nObservation:",
            **kwargs
    ) -> str:
        response = self.client.chat.completions.create(
            messages=messages,
            model=model,
            temperature=temperature,
            max_tokens=max_tokens,
            n=n,
            stop=stop,
            **kwargs
        )
        return response.choices[0].message.content

    def thought_generator(self, state: str, node_id: str) -> str:
        prompt = self.get_thought_gen_prompt(self.input_seq, state)
        responses = self.chat_completion(
            [{'role': "user", 'content': prompt}],
            model="gpt-4",
            temperature=0.7,
            max_tokens=1000,
            n=1,
            stop=None
        )
        thoughts = responses  # newline-separated

        # record in trace_tree
        parts = node_id.split("_")
        tree_ptr = self.trace_tree
        current_path = []
        for part in parts:
            current_path.append(part)
            key = "_".join(current_path)
            tree_ptr.setdefault("children", OrderedDict())
            tree_ptr["children"].setdefault(key, {
                "state": None,
                "thought": None,
                "value": None,
                "opening_remark": None,
                "children": OrderedDict(),
                "closing_remark": None,
                "backtracked": False
            })
            tree_ptr = tree_ptr["children"][key]

        tree_ptr["thought"] = thoughts

        # push to stack when node_id is 0
        if node_id == '0':
            self.stack.append(node_id)
            self._validate_push(state, node_id, 1.0)

        if node_id != "0":
            # Validation: Check Node State
            self.check_node_state(node_id, state)

        return thoughts

    def state_evaluator(self, state: str, node_id: str) -> float:
        prompt = self.get_state_eval_prompt(self.input_seq, state)
        state_evals = []
        for _ in range(self.n_evals):
            state_eval = self.chat_completion(
                [{'role': "user", 'content': prompt}],
                model="gpt-4",
                temperature=0.7,
                max_tokens=1000,
                n=1,
                stop=None
            )
            state_evals.append(state_eval)
        value = self.heuristic_calculator(state, state_evals)

        # update trace_tree
        parts = node_id.split("_")
        tree_ptr = self.trace_tree
        current_path = []
        for idx, part in enumerate(parts):
            current_path.append(part)
            key = "_".join(current_path)
            tree_ptr.setdefault("children", OrderedDict())
            if key not in tree_ptr["children"]:
                tree_ptr["children"][key] = {
                    "state": None,
                    "thought": None,
                    "value": None,
                    "opening_remark": None,
                    "children": OrderedDict(),
                    "closing_remark": None,
                    "backtracked": False
                }
            tree_ptr = tree_ptr["children"][key]

        tree_ptr.update({
            "state": state,
            "value": value,
            "opening_remark": (
                "Value exceeds heuristic threshold, childs will be traversed." if value >= self.hurestic_threshold else
                "Value does not exceed heuristic threshold, childs will not be traversed."
            ),
            "closing_remark": None
        })

        print(f"✅ Trace updated for node {node_id} — value: {value}")

        # PUSH to Evaluation stack if the value is more than the hurestic threshold and the length of stack is not exceeding the level limit 4<= 3
        if (value >= self.hurestic_threshold) and (len(self.stack) <= self.level_limit) :
            self.stack.append(node_id)
            self._validate_push(state, node_id, value)

        # Validation 4: Check sequential sibling validation after backtracking
        if self.last_popped_node:
            self.check_sequential_sibling(node_id, state)

        # Validation 9: Check correctness of the the decision to prune a child
        self.check_correctness_of_pruning_recursive_call(node_id, value)

        # Validation 3: Check if the node id assigned to the state matches the position in the thought generated.
        self.check_node_id_assignment(node_id, state)

        return value

    def _validate_push(self, state: str, node_id: str, value: float):
        # Validation after push
        issues = []
        log = []

        if node_id == '0':
            pass

        else:

            # Validation 1.1: Checking if Pushing or Recursive call is performed if value less than hurestic by checking in the stack
            if value >= self.hurestic_threshold:
                if node_id in self.stack:
                    log.append(f"✅ Recursive call happened correctly: node {node_id} with value {value} more than or equal to {self.hurestic_threshold} should be traversed.")
                    self._create_validation_entry(
                        "recursive_call_check", node_id, ValidationResult.PASS,
                        f"Value {value} more than or equal to threshold {self.hurestic_threshold}",
                        {"value": value, "threshold": self.hurestic_threshold}
                    )
                else:
                    issues.append(f"Recursive call Error: Node {node_id} with value {value} more than or equal {self.hurestic_threshold} not be traversed.")
                    self._create_validation_entry(
                        "recursive_call_check", node_id, ValidationResult.FAIL,
                        f"Value {value} more than or equal to threshold {self.hurestic_threshold} but node id not traversed",
                        {"value": value, "threshold": self.hurestic_threshold}
                    )

            # Validation 2: Check if the state that is selected is part of the thought
            # Last operation should be in parent's thoughts, previous operations should match parent's state
            parent_id = "_".join(node_id.split("_")[:-1]) or node_id
            parent_node = self._get_node(parent_id)
            parent_thoughts = [line.strip() for line in parent_node.get("thought", "").splitlines()]
            parent_state = None # Handles the case in which for node 0_5 when we check that parent_state of 0 it is None
            current_state_lines = [line.strip() for line in state.splitlines()]

            validation_errors = []
            validation_success = []

            if current_state_lines:
                # Check the last thought against parent's thoughts
                last_thought = current_state_lines[-1].strip()
                if last_thought not in parent_thoughts:
                    validation_errors.append(f"Last thought '{last_thought}' not found in parent's thoughts")
                else:
                    validation_success.append(f"Last thought '{last_thought}' found in parent's thoughts")

                # Check previous thought against parent's state
                if len(current_state_lines) > 1:
                    parent_state = [line.strip() for line in parent_node.get("state", "").splitlines()]
                    previous_thought = [line.strip() for line in current_state_lines[:-1]]
                    for prev_op in previous_thought:
                        if prev_op and prev_op not in parent_state:
                            validation_errors.append(f"Previous thought '{prev_op}' not found in parent's state")
                        elif prev_op:
                            validation_success.append(f"Previous thought '{prev_op}' found in parent's state")

            if validation_errors:
                issues.append(f"State validation error: {'; '.join(validation_errors)}")
                self._create_validation_entry(
                    "child_state_check", node_id, ValidationResult.FAIL,
                    f"State validation failed: {'; '.join(validation_errors)}",
                    {"validation_errors": validation_errors, "parent_thoughts": parent_thoughts, "parent_state": parent_state, "current_state": current_state_lines}
                )
            else:
                log.append(f"✅ State validation passed: {'; '.join(validation_success)}")
                self._create_validation_entry(
                    "child_state_check", node_id, ValidationResult.PASS,
                    f"State validation passed: {'; '.join(validation_success)}",
                    {"validation_success": validation_success, "parent_thoughts": parent_thoughts, "parent_state": parent_state}
                )

        # if issues:
        self.validation_log.append({"event": "push", "node_id": node_id, "issues": issues, "log": log})

    def backtracking(self, flag: str, node_id: str, backtrack_to_id: Optional[str]):
        # record pop event
        if self.stack:
            popped = self.stack.pop()
            self.last_popped_node = popped # Track the last popped node for sequential validation

            # Handle True backtrack mode logic
            if flag.upper() == "TRUE":
                self.true_backtrack_mode = True

            self._validate_pop(flag, popped, node_id, backtrack_to_id)

        # existing backtracking logic
        tree_ptr = self.trace_tree
        if node_id != "0":
            parts = node_id.split("_")
            # Navigate to the root child first
            if '0' in tree_ptr.get('children', {}):
                tree_ptr = tree_ptr['children']['0']
            # Build cumulative path for navigation
            current_path = parts[0]  # Start with "0"
            for part in parts[1:]:
                current_path += f"_{part}"  # Build cumulative path: "0_0", "0_0_1", "0_0_1_3"
                if current_path in tree_ptr.get('children', {}):
                    tree_ptr = tree_ptr['children'][current_path]

        tree_ptr["backtracked"] = (flag.upper() == "TRUE")
        tree_ptr["backtracked_to"] = backtrack_to_id.strip() if backtrack_to_id else None

        print(f"✅ Successfully marked node {node_id} as backtracked with flag {flag}")
        return "Ok."

    def _validate_pop(self, flag:str, popped_id: str, backtracked_id: str, backtrack_to_id: str):

        issues = []
        log = []

        popped_node = self._get_node(popped_id)
        all_thoughts = popped_node.get("thought", "").splitlines()
        explored_children = popped_node.get("children", {})
        evaluated = [child for child in explored_children.values() if child["state"]]

        # Combined Validation 4 & 4.1: Check both popped node and backtrack parent
        pop_validation_passed = True
        validation_messages = []
        validation_errors = []

        # Check if correct node is popped
        if popped_id.strip() != backtracked_id.strip():
            pop_validation_passed = False
            error_msg = f"Expected to pop {backtracked_id}, but popped {popped_id}"
            validation_errors.append(error_msg)
            issues.append(f"Pop error: {error_msg}.")
        else:
            success_msg = f"Correctly popped expected node {popped_id}"
            validation_messages.append(success_msg)
            log.append(f"✅ Popped {popped_id} successfully.")

        # Check if backtrack parent is correct
        popped_parts = popped_id.split("_")
        if len(popped_parts) > 1:
            expected_parent = "_".join(popped_parts[:-1])
        else:
            expected_parent = "None"  # Root node's parent concept

        if backtrack_to_id.strip() != expected_parent.strip():
            pop_validation_passed = False
            error_msg = f"Expected to backtrack to '{expected_parent}', but got '{backtrack_to_id}'"
            validation_errors.append(error_msg)
            issues.append(f"Backtrack parent error: {error_msg}.")
        else:
            success_msg = f"Correctly backtracking to parent '{backtrack_to_id}'"
            validation_messages.append(success_msg)
            log.append(f"✅ Backtrack parent validation passed: correctly backtracking to '{backtrack_to_id}'.")

        # Create single validation entry based on combined result
        if pop_validation_passed:
            combined_message = "; ".join(validation_messages)
            log.append(f"✅ Complete backtracking validation passed: {combined_message}")
            self._create_validation_entry(
                "backtracking_III_check", popped_id, ValidationResult.PASS,
                f"Complete backtracking validation passed: {combined_message}",
                {
                    "popped_node": popped_id,
                    "expected_popped": backtracked_id,
                    "backtrack_to": backtrack_to_id,
                    "expected_parent": expected_parent,
                    "validation_details": validation_messages
                }
            )
        else:
            combined_errors = "; ".join(validation_errors)
            issues.append(f"Complete backtracking validation failed: {combined_errors}")
            self._create_validation_entry(
                "backtracking_III_check", popped_id, ValidationResult.FAIL,
                f"Complete backtracking validation failed: {combined_errors}",
                {
                    "popped_node": popped_id,
                    "expected_popped": backtracked_id,
                    "backtrack_to": backtrack_to_id,
                    "expected_parent": expected_parent,
                    "validation_errors": validation_errors
                }
            )


        # Determine if level limit was exceeded.
        # len(self.stack) + 1 because we are checking after it is popped from the stack. 4 >= 4
        # ['0', '0_0', '0_0_0', '0_0_0_2'] -> 5 True
        # ['0', '0_0', '0_0_0'] -> 4 >= 4 True
        # ['0', '0_0'] -> 3 >= 4 False
        # Validation 5: Check if backtracking happens if level limit is reached or all children are traversed.
        # It should only be called when level limits are reached.
        if flag == "True":
            if self.level_limit_triggered:
                level_limit_exceeded = len(self.stack) >= self.level_limit
                self.level_limit_triggered = False
                if level_limit_exceeded:
                    log.append(f"✅ Level limit exceeded — valid reason for backtracking.")
                    self._create_validation_entry(
                        "backtracking_I_check", popped_id, ValidationResult.PASS,
                        f"Level limit {self.level_limit} exceeded - valid backtrack",
                        {"stack_size": len(self.stack), "level_limit": self.level_limit}
                    )

                else:
                    issues.append(f"❌ Level limit not exceeded — invalid reason for backtracking.")
                    self._create_validation_entry(
                        "backtracking_I_check", popped_id, ValidationResult.FAIL,
                        f"Level limit not exceeded but backtracking with True flag",
                        {"stack_size": len(self.stack), "level_limit": self.level_limit}
                    )

        if flag == 'False': # Which means the search was unsuccessful after search all the children
            # Determine if all children of parent node were explored
            all_children_explored = len(evaluated) == len(all_thoughts)
            if all_children_explored:
                log.append(f"✅ All children of node {popped_id} were explored — valid reason for backtracking.")
                self._create_validation_entry(
                    "backtracking_II_check", popped_id, ValidationResult.PASS,
                    f"All {len(all_thoughts)} children explored - valid backtrack",
                    {"total_thoughts": len(all_thoughts), "evaluated_children": len(evaluated)}
                )
            else:
                issues.append(f"❌ All children of node {popped_id} were not explored — invalid reason for backtracking.")
                self._create_validation_entry(
                    "backtracking_II_check", popped_id, ValidationResult.FAIL,
                    f"Only {len(evaluated)} of {len(all_thoughts)} children explored",
                    {"total_thoughts": len(all_thoughts), "evaluated_children": len(evaluated)}
                )

        # Validation 6: Checking if once it is True the subsequent calls are True. It shouldn't be False
        if self.true_backtrack_mode and flag.upper() == "FALSE":
            issues.append(f"❌ Subsequent backtrack error: once True backtrack mode is triggered, all subsequent backtracks should be True until stack is empty, but got False flag for {popped_id}.")
            self._create_validation_entry(
                "backtracking_flag_sequence_check", popped_id, ValidationResult.FAIL,
                "False flag used while in True backtrack mode",
                {"flag": flag, "true_backtrack_mode": self.true_backtrack_mode}
            )
        elif self.true_backtrack_mode and flag.upper() == "TRUE":
            log.append(f"✅ Subsequent backtrack validation: correctly using True flag in Subsequent mode for {popped_id}.")
            self._create_validation_entry(
                "backtracking_flag_sequence_check", popped_id, ValidationResult.PASS,
                "Correct True flag used in True backtrack mode",
                {"flag": flag, "true_backtrack_mode": self.true_backtrack_mode}
            )

        self.validation_log.append({"event": "pop", "popped": popped_id, "expected": backtracked_id, "issues": issues, "log": log})

    def _get_node(self, node_id: str) -> Dict[str, Any]:
        ptr = self.trace_tree
        if not node_id:
            return ptr
        parts = node_id.split("_")
        current_id = parts[0]
        ptr = ptr.get('children', {}).get(current_id, {})
        for part in parts[1:]:
            current_id += "_" + part
            ptr = ptr.get('children', {}).get(current_id, {})
        return ptr

    def check_node_state(self, node_id: str, state: str):
        try:
            issues = []
            log = []

            node_state = self._get_node(node_id).get("state", "")
            if state.strip() != node_state.strip():
                # State mismatch - log the validation error
                issues.append(f"State mismatch for node {node_id}: expected '{node_state}' but got '{state}'")
                self._create_validation_entry(
                    "node_state_check", node_id, ValidationResult.FAIL,
                    f"State mismatch: expected vs actual state don't match",
                    {"expected_state": node_state, "actual_state": state}
                )
            else:
                # State matches - log success
                log.append(f"✅ State consistency check passed for node {node_id}")
                self._create_validation_entry(
                    "node_state_check", node_id, ValidationResult.PASS,
                    f"State consistency check passed",
                    {"expected_state": node_state, "actual_state": state}
                )
            self.validation_log.append({"event": "push", "node_id": node_id, "issues": issues, "log": log})

        except Exception as e:
            raise e

    def check_node_id_assignment(self, node_id: str, state: str):

        issues = []
        log = []

        parent_id = "_".join(node_id.split("_")[:-1]) or node_id
        thoughts = self._get_node(parent_id).get("thought", "").splitlines()
        state = state.splitlines()[-1]

        # Validation 3: Check if the node id assigned to the state matches the position in the thought generated.
        try:
            index = int(node_id.split("_")[-1])
            # Normalize whitespace for comparison
            thought_normalized = re.sub(r'\s+', ' ', thoughts[index].strip())
            state_normalized = re.sub(r'\s+', ' ', state.strip())

            if index >= len(thoughts) or thought_normalized != state_normalized:
                issues.append(f"Order error: node_id {node_id} does not match thought position.")
                self._create_validation_entry(
                    "node_id_assignment_check", node_id, ValidationResult.FAIL,
                    f"node_id {node_id} does not match thought position.",
                    {"node_id": node_id, "state": state, "thoughts": thoughts}
                )
            else:
                log.append(f"✅ Node {node_id} matches thought position.")
                self._create_validation_entry(
                    "node_id_assignment_check", node_id, ValidationResult.PASS,
                    f"node_id {node_id} matches thought position.",
                    {"node_id": node_id, "state": state, "thoughts": thoughts}
                )
        except Exception as e:
            issues.append(f"Node ID parse error for {node_id}.")
            self._create_validation_entry(
                "node_id_assignment_check", node_id, ValidationResult.FAIL,
                f"Could not parse node ID {node_id} with error {e}",
                {"node_id": node_id, "state": state, "thoughts": thoughts}
            )

        # if issues:
        self.validation_log.append({"event": "push", "node_id": node_id, "issues": issues, "log": log})

    def check_correctness_of_pruning_recursive_call(self, node_id: str, value: str):
        issues = []
        log = []

        # Validation 1: Checking if Pruning is performed if value less than hurestic by checking in the stack
        if value < self.hurestic_threshold:
            if node_id in self.stack:
                issues.append(f"Pruning error: node {node_id} with value {value} less than {self.hurestic_threshold} should be pruned.")
                self._create_validation_entry(
                    "pruning_check", node_id, ValidationResult.FAIL,
                    f"Value {value} is below threshold {self.hurestic_threshold}",
                    {"value": value, "threshold": self.hurestic_threshold}
                )
            else:
                log.append(f"✅ Node {node_id} with value {value} less than {self.hurestic_threshold} pruned successfully.")
                self._create_validation_entry(
                    "pruning_check", node_id, ValidationResult.PASS,
                    f"Value {value} meets threshold {self.hurestic_threshold}",
                    {"value": value, "threshold": self.hurestic_threshold}
                )

        self.validation_log.append({"event": "push", "node_id": node_id, "issues": issues, "log": log})

    def check_sequential_sibling(self, node_id, state):
        log = []
        issues = []

        # Validation 4: Check sequential sibling validation after backtracking
        # This check should not happen for the last node in the thought candidate
        # As no other candidate exists for it to check.

        parent_id = "_".join(node_id.split("_")[:-1]) or node_id
        thoughts = [thought.strip() for thought in self._get_node(parent_id).get("thought", "").splitlines()]
        state = state.splitlines()[-1].strip()

        # Check if state exists in thoughts
        if state not in thoughts:
            issues.append(f"State '{state}' is not found in thoughts for node {node_id}")
            self._create_validation_entry(
                "backtracking_IV_check", node_id, ValidationResult.FAIL,
                f"State not present within the thoughts: for {node_id} got '{state}'",
                {"node_id": node_id, "state": state, "thoughts": thoughts}
            )
            # Reset since we can't proceed with validation
            self.last_popped_node = None
            self.validation_log.append({"event": "push", "node_id": node_id, "issues": issues, "log": log})
            return

        state_index = thoughts.index(state)

        # Skip validation if this is the last thought (no sequential sibling expected)
        if state_index == len(thoughts) - 1:
            log.append(f"Sequential sibling validation skipped: {node_id} corresponds to the last thought")
            # Reset since no more validation needed
            self.last_popped_node = None
            self.validation_log.append({"event": "push", "node_id": node_id, "issues": issues, "log": log})
            return

        # Only proceed if we have a popped node to compare against
        if not self.last_popped_node:
            log.append(f"Sequential sibling validation skipped: no previous popped node to compare against")
            self.validation_log.append({"event": "push", "node_id": node_id, "issues": issues, "log": log})
            return

        popped_node_id = self.last_popped_node

        # Check if the popped node and current node are siblings (same parent)
        popped_parts = popped_node_id.split("_")
        current_parts = node_id.split("_")

        # They are siblings if they have the same parent path and same depth
        if len(popped_parts) != len(current_parts):
            log.append(f"Sequential sibling validation skipped: nodes have different depths ({popped_node_id} vs {node_id})")
            self.last_popped_node = None  # Reset for different depths
            self.validation_log.append({"event": "push", "node_id": node_id, "issues": issues, "log": log})
            return

        popped_parent = "_".join(popped_parts[:-1]) if len(popped_parts) > 1 else ""
        current_parent = "_".join(current_parts[:-1]) if len(current_parts) > 1 else ""

        if popped_parent != current_parent:
            log.append(f"Sequential sibling validation skipped: nodes have different parents ({popped_parent} vs {current_parent})")
            self.last_popped_node = None  # Reset for different parents
            self.validation_log.append({"event": "push", "node_id": node_id, "issues": issues, "log": log})
            return

        # They are siblings, check if current node is the next sequential sibling
        try:
            popped_index = int(popped_parts[-1])
            current_index = int(current_parts[-1])
            expected_index = popped_index + 1

            if current_index == expected_index:
                log.append(f"✅ Sequential child node validation: pushing {node_id} after popping {popped_node_id} is correct.")
                self._create_validation_entry(
                    "backtracking_IV_check", node_id, ValidationResult.PASS,
                    f"Correct sequential sibling: {node_id} follows {popped_node_id}",
                    {"popped_node": popped_node_id, "current_node": node_id, "expected_index": expected_index, "actual_index": current_index}
                )
            else:
                issues.append(f"Sequential child node error: after popping {popped_node_id}, expected to push {popped_parent}_{expected_index} but got {node_id}.")
                self._create_validation_entry(
                    "backtracking_IV_check", node_id, ValidationResult.FAIL,
                    f"Incorrect sequential sibling: expected {popped_parent}_{expected_index}, got {node_id}",
                    {"popped_node": popped_node_id, "expected": f"{popped_parent}_{expected_index}", "actual": node_id}
                )

            # Always reset after attempting validation (whether pass or fail)
            self.last_popped_node = None

        except ValueError:
            # If error during validation then skip and reset
            log.append(f"Sequential child node validation skipped: unable to parse indices for {popped_node_id} and {node_id}.")
            self.last_popped_node = None  # Reset on parsing error

        self.validation_log.append({"event": "push", "node_id": node_id, "issues": issues, "log": log})

    def validate_children_order(self, node_id: str) -> Dict[str, Any]:
        """
        Validation 7: Check if the order of children in OrderedDict matches
        the order they were created in the thought generation.
        """
        issues = []
        log = []

        node = self._get_node(node_id)
        if not node:
            self._create_validation_entry(
                "looping_order", node_id, ValidationResult.FAIL,
                f"Node {node_id} not found",
                {"node_id": node_id}
            )
            return {"issues": [f"Node {node_id} not found"], "log": []}

        # Get the thought candidates (original order)
        thought_content = node.get("thought")
        if thought_content is None or thought_content == "":
            return {"issues": [], "log": [f"No thoughts found for node {node_id}. Node might be pruned."]}

        thoughts = thought_content.splitlines()
        if not thoughts:
            return {"issues": [], "log": [f"No thoughts found for node {node_id} (empty after splitlines)"]}

        # Get the children from OrderedDict (actual traversal order)
        children = node.get("children", OrderedDict())

        # Extract child indices from the children keys
        child_indices = []
        for child_key in children.keys():
            try:
                # Extract the last part of the node_id (the index)
                parts = child_key.split("_")
                if len(parts) > len(node_id.split("_")) if node_id != "0" else len(parts) > 1:
                    child_index = int(parts[-1])
                    child_indices.append(child_index)
            except (ValueError, IndexError):
                issues.append(f"Could not parse child index from {child_key}")
                continue

        # Check if children indices are in sequential order starting from 0
        expected_indices = list(range(len(child_indices)))
        if child_indices != expected_indices:
            issues.append(f"Children order mismatch for node {node_id}: expected indices {expected_indices}, got {child_indices}")
        else:
            log.append(f"✅ Children order validation passed for node {node_id}: indices {child_indices} are in correct sequential order")

        # Verify that explored children match their corresponding thoughts
        for i, child_key in enumerate(children.keys()):
            try:
                parts = child_key.split("_")
                child_index = int(parts[-1])
                child_node = children[child_key]
                child_state = child_node.get("state", "")

                if child_state and child_index < len(thoughts):
                    # Extract the last line of the state (the actual thought/action)
                    state_line = child_state.strip().split('\n')[-1] if child_state.strip() else ""
                    expected_thought = thoughts[child_index].strip()

                    if state_line != expected_thought:
                        issues.append(f"State-thought mismatch for child {child_key}: state='{state_line}', expected thought='{expected_thought}'")
                        self._create_validation_entry(
                            "looping_order", child_key, ValidationResult.FAIL,
                            f"State doesn't match expected thought",
                            {"state": state_line, "expected_thought": expected_thought, "child_index": child_index}
                        )
                    else:
                        log.append(f"✅ State-thought match verified for child {child_key}")
                        self._create_validation_entry(
                            "looping_order", child_key, ValidationResult.PASS,
                            f"State matches expected thought",
                            {"state": state_line, "thought": expected_thought}
                        )

            except (ValueError, IndexError) as e:
                issues.append(f"Error validating child {child_key}: {str(e)}")
                self._create_validation_entry(
                    "looping_order", child_key, ValidationResult.FAIL,
                    f"Error during validation: {str(e)}",
                    {"error": str(e)}
                )

        return {"issues": issues, "log": log}

    def validate_final_answer_correctness(self, final_answer: str, expected_answer: str = None) -> Dict[str, Any]:
        """
        Validation 8: Check the correctness of the final answer.
        Assumes final_answer is a pure math expression (e.g., "(10 - 2) * (12 - 9)").
        """
        issues = []
        log = []

        if not final_answer or final_answer.strip() == "":
            issues.append("Final answer is empty or None")
            return {"issues": issues, "log": log}

        expression = final_answer.strip()
        log.append(f"🔍 Using final answer expression: {expression}")

        try:
            result = eval(expression)
            log.append(f"🧮 Evaluated result: {result}")
        except Exception as e:
            issues.append(f"Failed to evaluate expression: {str(e)}")
            self._create_validation_entry(
                "final_answer_check", "final", ValidationResult.FAIL,
                f"Error {e} while running expression {expression}",
                {"result": result, "expected": expected_val, "difference": abs(result - expected_val)}
            )
            return {"issues": issues, "log": log}

        if expected_answer is not None:
            try:
                expected_val = float(expected_answer)
                if abs(result - expected_val) < 1e-6:
                    log.append(f"✅ Result {result} matches expected answer {expected_val}")
                    self._create_validation_entry(
                        "final_answer_check", "final", ValidationResult.PASS,
                        f"Result {result} matches expected answer {expected_val}",
                        {"result": result, "expected": expected_val, "difference": abs(result - expected_val)}
                    )
                else:
                    issues.append(f"❌ Result {result} does not match expected answer {expected_val}")
                    self._create_validation_entry(
                        "final_answer_check", "final", ValidationResult.FAIL,
                        f"Result {result} does not match expected answer {expected_val}",
                        {"result": result, "expected": expected_val, "difference": abs(result - expected_val)}
                    )
            except ValueError:
                issues.append(f"Expected answer '{expected_answer}' is not a valid number")
                self._create_validation_entry(
                        "final_answer_check", "final", ValidationResult.FAIL,
                        f"Expected answer '{expected_answer}' is not a valid number",
                        {"expected_answer": expected_answer}
                    )

        return {"issues": issues, "log": log}

    def run_end_of_execution_validation(self, final_answer: str = None, expected_answer: str = None) -> Dict[str, Any]:
        """
        Run all end-of-execution validations on the entire trace tree.
        This should be called after react_loop completes.
        """
        all_validations = {
            "children_order_validation": {},
            "final_answer_validation": {},
            "summary": {"total_issues": 0, "total_logs": 0}
        }

        # Validation 7: Check children order for all nodes in the trace tree
        def traverse_and_validate_order(node_dict, current_path):
            # Validate current node's children order
            validation_result = self.validate_children_order(current_path)
            all_validations["children_order_validation"][current_path] = validation_result

            # Recursively validate children
            children = node_dict.get("children", {})
            for child_key, child_node in children.items():
                traverse_and_validate_order(child_node, child_key)

        # Start traversal from the children of the root
        if self.trace_tree and self.trace_tree.get("children"):
            root_children = self.trace_tree["children"]  # This gives us OrderedDict([('0', {...})])

            # Traverse each top-level child (like '0', '1', etc.)
            for child_key, child_node in root_children.items():
                traverse_and_validate_order(child_node, child_key)

        # Validation 8: Final answer correctness
        if final_answer is not None:
            final_answer_validation = self.validate_final_answer_correctness(final_answer, expected_answer)
            all_validations["final_answer_validation"] = final_answer_validation

        return all_validations

    def _create_validation_entry(self, check_name: str, node_id: str, result: ValidationResult,
                               reason: str, details: Dict[str, Any] = None, event_type: str = None):
        """Create a standardized validation entry for the report"""
        entry = {
            "check_name": check_name,
            "node_id": node_id,
            "result": result.value,
            "reason": reason,
            "timestamp": self._get_timestamp(),
            "event_type": event_type or "validation",
            "details": details or {}
        }
        self.validation_report.append(entry)
        return entry

    def _get_timestamp(self):
        """Get current timestamp (implement as needed)"""
        import datetime
        return datetime.datetime.now().isoformat()

    def aggregate_validation_logs(self, validation_logs):
        """
        Aggregates validation logs by event and node_id combination.
        Combines issues and logs from multiple entries with same event+node_id into single entry.

        Args:
            validation_logs (list): List of validation log dictionaries

        Returns:
            list: Aggregated validation logs with unique event+node_id combinations
        """
        aggregated = {}

        for log_entry in validation_logs:
            event = log_entry.get("event", "")
            node_id = log_entry.get("node_id", "")
            issues = log_entry.get("issues", [])
            logs = log_entry.get("log", [])

            # Create unique key from event and node_id
            key = f"{event}_{node_id}"

            if key not in aggregated:
                # First occurrence - create new entry
                aggregated[key] = {
                    "event": event,
                    "node_id": node_id,
                    "issues": [],
                    "log": []
                }

            # Aggregate issues and logs in sequence
            aggregated[key]["issues"].extend(issues)
            aggregated[key]["log"].extend(logs)

        # Convert back to list while preserving order of first occurrence
        result = []
        seen_keys = set()

        for log_entry in validation_logs:
            event = log_entry.get("event", "")
            node_id = log_entry.get("node_id", "")
            key = f"{event}_{node_id}"

            if key not in seen_keys:
                result.append(aggregated[key])
                seen_keys.add(key)

        return result

    def update_search_trace_validity(self, validation_summary):
        """
        Updates the Valid Search Trace status based on validation results.
        If any check type has FAIL count > 0, sets success_trace to False, otherwise True.

        Args:
            validation_summary (dict): The validation summary dictionary containing checks_by_type

        Returns:
            dict: Updated validation summary with Valid Search Trace status
        """

        # Get the checks_by_type section
        checks_by_type = validation_summary.get("checks_by_type", {})

        # Check if any validation type has failures
        has_failures = False

        for check_type, check_data in checks_by_type.items():
            if isinstance(check_data, dict):
                fail_count = check_data.get("FAIL", 0)
                if fail_count > 0:
                    has_failures = True
                    break

        # Update or create the Valid Search Trace entry
        if "valid_search_trace" not in checks_by_type:
            checks_by_type["valid_search_trace"] = {}

        # Set success_trace based on whether there are any failures
        checks_by_type["valid_search_trace"]["success_trace"] = not has_failures

        return validation_summary

    def generate_validation_report(self) -> Dict[str, Any]:
        """Generate a comprehensive validation report"""
        total_checks = len(self.validation_report)
        passed_checks = len([r for r in self.validation_report if r["result"] == "PASS"])
        failed_checks = len([r for r in self.validation_report if r["result"] == "FAIL"])

        # Group by check type
        checks_by_type = {}
        for report in self.validation_report:
            check_name = report["check_name"]
            if check_name not in checks_by_type:
                # checks_by_type[check_name] = {"PASS": 0, "FAIL": 0, "SKIP": 0, "details": []}
                checks_by_type[check_name] = {"FAIL": 0, "details": []}

            # Only increment for FAIL
            if report["result"] == "FAIL":
                checks_by_type[check_name]["FAIL"] += 1
            # checks_by_type[check_name][report["result"]] += 1
            checks_by_type[check_name]["details"].append(report)

        report = {
            "summary": {
                "total_checks": total_checks,
                "passed": passed_checks,
                "failed": failed_checks,
            },
            "checks_by_type": checks_by_type,
            "detailed_results": self.validation_report,
            "legacy_log": self.aggregate_validation_logs(self.validation_log),  # Keep the old format for compatibility. Aggregate the common action and node_id
            "tree_trace": self.trace_tree['children'] # Display from the children key just for better viz.
        }

        # Adding the Valid Search Trace by checking the overall report for a FAILURE
        report = self.update_search_trace_validity(report)
        trace_log = self.trace_tree['children']

        return report, trace_log

    def react_loop(self, step_limit: Optional[int] = None):
        i = 0
        final_answer = None
        while True:

            # if i == 15:
            #     break

            print("######"*5, self.stack, "######"*5)

            if step_limit is not None and i >= step_limit:
                print("Step limit exceeded! Breaking the ReAct loop...")
                break
            completion = self.chat_completion(self.messages, model=self.model)

            print(completion)
            print("---")

            try:
                if "We're now outside the tree" in completion:
                    break

                if 'Final solution' in completion:
                    # for call_args in backtracking_calls:
                    #     eval(f"self.backtracking({call_args})")
                    tool_call_string = completion.split('Action: ')[-1].strip()
                    if 'backtracking' in tool_call_string:
                        tool_call_string = tool_call_string.replace('backtracking', 'self.backtracking')
                        result = eval(tool_call_string)
                        print(result)
                        print("---")

                    final_answer = completion.split('Final solution:-')[-1].split("Answer:")[-1].strip().split("=")[0].strip()
                    if result is not None:
                        completion += "\nObservation:\n"
                        self.messages.append({'role': "assistant", 'content': completion})
                        self.messages.append({'role': "user", 'content': str(result)})

                    # return completion.split('Final solution:-')[-1].split("\n\n")[0].strip()

                else:
                    tool_call_string = completion.split('Action: ')[-1].strip()
                    if 'thought_generator' in tool_call_string or 'state_evaluator' in tool_call_string or 'backtracking' in tool_call_string:
                        tool_call_string = tool_call_string.replace('thought_generator', 'self.thought_generator')
                        tool_call_string = tool_call_string.replace('state_evaluator', 'self.state_evaluator')
                        tool_call_string = tool_call_string.replace('backtracking', 'self.backtracking')
                        result = eval(tool_call_string)
                        print(result)
                        print("---")
                    else:
                        # Log eveything to track the error
                        raise AssertionError("Invalid tool call string!")

                    if result is not None:
                        completion += "\nObservation:\n"
                        self.messages.append({'role': "assistant", 'content': completion})
                        self.messages.append({'role': "user", 'content': str(result)})
            except Exception as e:
                raise e

            i+=1
        # Run end-of-execution validations
        print("\n🔍 Running end-of-execution validations...")
        validation_results = self.run_end_of_execution_validation(final_answer = final_answer, expected_answer = "24")
        print("🔍 End-of-execution validations completed.")

        # Generate Evaluation Report
        report, trace_log = self.generate_validation_report()

        # Save to Eval json
        import json
        with open(f"/content/drive/MyDrive/001_projects/agentx/gpt-4o-mini-run-1/eval/{self.input_seq_id}_eval_report_{self.run_name}.json", "w") as f:
            json.dump(report, f, indent=4)

        # Save Trace log json
        with open(f"/content/drive/MyDrive/001_projects/agentx/gpt-4o-mini-run-1/logs/{self.input_seq_id}_trace_log_{self.run_name}.json", "w") as f:
            json.dump(trace_log, f, indent=4)


In [None]:
# input_seq5 = '1 1 1 8'
input_seq5 = '''Four numbers: 4 5 7 11

Search parameters:-
Heuristic threshold: 3.0
Level limit: 3'''

In [None]:
react = FewShotTreeSearchStackReAct(client, "gpt-4o-mini", messages, input_seq5, get_thought_gen_prompt, get_state_eval_prompt, heuristic_calculator, input_seq_id = "4_5_7_11", run_name = "run_1")

In [None]:
react.react_loop()

############################## [] ##############################
The four numbers are: 4 5 7 11

The minimum required heuristic threshold is: 3.0
The level limit is: 3

Starting the tree search...
---
~~~
Current level: 0
Index of current node: 0
State of current node:
'' (empty string; no thoughts have been generated yet as we're still at the root node)

Using the thought generator tool to obtain thought candidates...

Action: thought_generator('', '0')

---
4 + 5 = 9 (left: 7 9 11)
5 + 7 = 12 (left: 4 11 12)
7 + 11 = 18 (left: 4 5 18)
4 * 5 = 20 (left: 7 11 20)
11 - 4 = 7 (left: 5 7 7)
11 - 5 = 6 (left: 4 6 7)
7 - 4 = 3 (left: 3 5 11)
11 / 4 = 2.75 (left: 2.75 5 7)
7 / 4 = 1.75 (left: 1.75 5 11)
5 / 4 = 1.25 (left: 1.25 7 11)
---
############################## ['0'] ##############################


Each of the above thought candidates has been added as a child node of the current node:

Node 0_0: 4 + 5 = 9 (left: 7 9 11)
Node 0_1: 5 + 7 = 12 (left: 4 11 12)
Node 0_2: 7 + 11 = 18 (lef

In [None]:
react.last_popped_node

'0'

## Running Automated Loop

In [None]:
import pandas as pd
import copy
import json

df = pd.read_csv("/content/drive/MyDrive/001_projects/agentx/gpt-4o-mini-run-1/eval_set_24.csv")

# Save the original base messages (length 211)
base_messages = copy.deepcopy(messages)  # Deep copy to ensure independence
# print(f"Base messages length: {len(base_messages)}")

# Results storage
results = []

# Check if results file already exists and load it
csv_filename = '/content/drive/MyDrive/001_projects/agentx/gpt-4o-mini-run-1/game_24_puzzle_4o-mini_results.csv'
try:
    existing_results_df = pd.read_csv(csv_filename)
    results = existing_results_df.to_dict('records')  # Convert back to list of dicts
    completed_ranks = set(existing_results_df['rank'].tolist())
    print(f"📂 Loaded existing results: {len(results)} puzzles already completed")
    print(f"   Completed ranks: {sorted(completed_ranks)}")
except FileNotFoundError:
    completed_ranks = set()
    print(f"🆕 Starting fresh - no existing results file found")

# Loop through each puzzle
for index, row in df[28:].iterrows():

    if index < 30:
        break

    puzzle = row['Puzzles']
    rank = "1"

    print(f"\n{'='*50}")
    print(f"Running Puzzle {rank}: {puzzle}")
    print(f"{'='*50}")

    # Reset messages to base for each puzzle
    messages = copy.deepcopy(base_messages)
    print(f"Messages reset to base length: {len(messages)}")

    # Create input sequence
    input_seq = f'''Four numbers: {puzzle}

Search parameters:-
Heuristic threshold: 3.0
Level limit: 3'''

    # Create input_seq_id (replace spaces with underscores)
    input_seq_id = puzzle.replace(' ', '_')

    try:
        # Run the ReAct system
        react = FewShotTreeSearchStackReAct(
            client,
            "gpt-4o-mini",
            messages,
            input_seq,
            get_thought_gen_prompt,
            get_state_eval_prompt,
            heuristic_calculator,
            input_seq_id=input_seq_id,
            run_name=f"run_{rank}"
        )

        # Run the react loop (assuming this triggers the JSON file creation)
        react.react_loop()

        # Read the evaluation report JSON file
        eval_report_filename = f"/content/drive/MyDrive/001_projects/agentx/gpt-4o-mini-run-1/eval/{input_seq_id}_eval_report_run_{rank}.json"
        validation_summary = {}

        try:
            with open(eval_report_filename, 'r') as f:
                eval_report = json.load(f)
                validation_summary = eval_report.get('summary', {})
                print(f"Validation Summary: {validation_summary}")
        except FileNotFoundError:
            print(f"⚠️ Evaluation report file not found: {eval_report_filename}")
            validation_summary = {'total_checks': 0, 'passed': 0, 'failed': 'file_not_found'}
        except json.JSONDecodeError as e:
            print(f"⚠️ Error reading evaluation report: {e}")
            validation_summary = {'total_checks': 0, 'passed': 0, 'failed': 'json_error'}

        # Store results with validation information
        result = {
            'rank': rank,
            'puzzle': puzzle,
            'input_seq_id': input_seq_id,
            'run_name': f"run_{rank}",
            'status': 'completed',
            'final_messages_length': len(messages),
            'validation_total_checks': validation_summary.get('total_checks', 0),
            'validation_passed': validation_summary.get('passed', 0),
            'validation_failed': validation_summary.get('failed', 0),
            'validation_status': 'PASS' if validation_summary.get('failed', 1) == 0 else 'FAIL',
        }

        results.append(result)

        # Enhanced logging
        validation_status = result['validation_status']
        failed_count = result['validation_failed']
        if validation_status == 'PASS':
            print(f"✅ Puzzle {rank} completed successfully - All validations passed")
        else:
            print(f"⚠️ Puzzle {rank} completed with validation issues - {failed_count} failed checks")

    except Exception as e:
        print(f"❌ Error running puzzle {rank}: {str(e)}")
        result = {
            'rank': rank,
            'puzzle': puzzle,
            'input_seq_id': input_seq_id,
            'run_name': f"run_rank_{rank}",
            'status': 'failed',
            'error': str(e),
            'final_messages_length': len(messages),
            'validation_total_checks': 0,
            'validation_passed': 0,
            'validation_failed': 'error',
            'validation_status': 'ERROR',
        }
        results.append(result)


    # Convert results to DataFrame for analysis
    results_df = pd.DataFrame(results)
    print(f"\n{'='*50}")
    print("SUMMARY")
    print(f"{'='*50}")
    print(results_df[['rank', 'puzzle', 'status', 'validation_status', 'validation_failed']].to_string())

    # Optional: Save results to file
    results_df.to_csv('/content/drive/MyDrive/001_projects/agentx/gpt-4o-mini-run-1/game_24_puzzle_4o-mini_results.csv', index=False)

    print(f"Messages length after puzzle {rank}: {len(messages)}")
