In [1]:
#
# Getting started with MAKER
#
# This notebook illustrates the core components of MAKER on
# the Towers of Hanoi domain illustrated in the paper:
# Solving a Million-step LLM Task with Zero Errors (arxiv.org/abs/2511.09030).
#
# Practical experiments for solving the full 20 disk (1M+ step) task
# require some method of parallelization not included here,
# e.g., via asynchronous or batch APIs.
#
# We hope this example serves as a starting point for experimenting with
# different aspects of the method, and applying to new domains.
#
# Enjoy!
# 

In [2]:
from openai import OpenAI
from prompts import create_prompts
from parsers import parse_move_state_flag as parse_move_state
from toh_simulator import TowerOfHanoi

In [3]:
# Set up method to call API

client = OpenAI() # Assumes OPENAI_KEY is set as environment variable

def call_llm(model, system_prompt, user_prompt, max_tokens=750, temperature=0.1):

    response = client.chat.completions.create(
                    model=model,
                    max_tokens=max_tokens,
                    temperature=temperature,
                    messages=[
                        {"role": "system", "content": system_prompt},
                        {"role": "user", "content": user_prompt}
                    ],
                )
    
    return response.choices[0].message.content

In [4]:
# Set up task with some even number of disks.
n_disks = 4
state = str([[i+1 for i in range(n_disks)][::-1], [], []])
print("Initial State:", state)

Initial State: [[4, 3, 2, 1], [], []]


In [5]:
# Set parameters for MAKER
model = 'gpt-4.1-mini'
temperature = 0.1
max_tokens = 750
n_steps = 2**n_disks - 1
k = 2

In [6]:
# Algorithms from Figure 2 in the paper

In [7]:
def get_vote(x, 
             model, 
             max_tokens=750, 
             temperature=0.1, 
             max_tries=10):

    for i in range(max_tries): # max_tries can be useful for debugging
        prev_move, state = x
        system_prompt, user_prompt = create_prompts(prev_move, state)
        response = call_llm(model, system_prompt, user_prompt, max_tokens, temperature)
        try:
            move, state = parse_move_state(response, n_disks)
            return move, state
        except ValueError:
            print(f'Response {i} flagged')

    raise ValueError(f'No valid response in {max_tries} samples')

In [8]:
def do_voting(x, 
              model, 
              k, 
              max_tokens=750, 
              temperature=0.1,
              max_tries=10):
    
    vote_counts = {}
    for i in range(max_tries): # max_tries can be useful for debugging
        print("Vote", i)
        
        # Get new vote
        y = get_vote(x, model, max_tokens, temperature, max_tries)
        if y not in vote_counts:
            vote_counts[y] = 1
        else:
            vote_counts[y] += 1

        print(vote_counts)

        # Check decision condition
        if len(vote_counts) == 1:
            if vote_counts[y] == k:
                return y
        else:
            alt_max = max([vote_counts[z] for z in vote_counts if z != y])
            if vote_counts[y] >= alt_max + k:
                return y

    raise ValueError(f'No decision reached in {max_tries} votes')

In [9]:
def generate_solution(initial_state, 
                      model, 
                      k, 
                      n_steps, 
                      max_tokens=750, 
                      temperature=0.1, 
                      max_tries=10):
    actions = []
    state = initial_state[:]
    action = None
    for i in range(n_steps):
        print(f"Step {i} of {n_steps}")
        x = (action, state)
        action, state = do_voting(x, model, k, max_tokens, temperature, max_tries)
        actions.append(action)
        print()
    print("Done.")
    return actions

In [11]:
actions = generate_solution(state, model, k, n_steps, max_tokens, temperature)

Step 0 of 15
Vote 0
{((1, 0, 1), ((4, 3, 2), (1,), ())): 1}
Vote 1
{((1, 0, 1), ((4, 3, 2), (1,), ())): 2}

Step 1 of 15
Vote 0
{((2, 0, 2), ((4, 3), (1,), (2,))): 1}
Vote 1
{((2, 0, 2), ((4, 3), (1,), (2,))): 2}

Step 2 of 15
Vote 0
Response 0 flagged
{((1, 1, 2), ((4, 3), (), (2, 1))): 1}
Vote 1
Response 0 flagged
{((1, 1, 2), ((4, 3), (), (2, 1))): 2}

Step 3 of 15
Vote 0
{((3, 0, 1), ((4,), (3,), (2, 1))): 1}
Vote 1
{((3, 0, 1), ((4,), (3,), (2, 1))): 2}

Step 4 of 15
Vote 0
Response 0 flagged
Response 1 flagged
Response 2 flagged
{((1, 2, 0), ((4, 1), (3,), (2,))): 1}
Vote 1
Response 0 flagged
Response 1 flagged
{((1, 2, 0), ((4, 1), (3,), (2,))): 2}

Step 5 of 15
Vote 0
{((2, 2, 1), ((4, 1), (3, 2), ())): 1}
Vote 1
{((2, 2, 1), ((4, 1), (3, 2), ())): 2}

Step 6 of 15
Vote 0
{((1, 0, 1), ((4,), (3, 2, 1), ())): 1}
Vote 1
{((1, 0, 1), ((4,), (3, 2, 1), ())): 2}

Step 7 of 15
Vote 0
{((4, 0, 2), ((), (3, 2, 1), (4,))): 1}
Vote 1
{((4, 0, 2), ((), (3, 2, 1), (4,))): 2}

Step 8 of 15


In [12]:
# Test solution
toh = TowerOfHanoi(n_disks)
print("Initial State:", toh.get_state())
for action in actions:
    
    sim_state, move_valid, done, sim_message = toh.act(action)

    print("Action:", action)
    print("State:", sim_state)
    
    if not move_valid:
        print("INVALID MOVE!")
        break
        
    if toh.is_solved():
        print("SOLVED!")
        break

Initial State: {'0': [4, 3, 2, 1], '1': [], '2': []}
Action: (1, 0, 1)
State: {'0': [4, 3, 2], '1': [1], '2': []}
Action: (2, 0, 2)
State: {'0': [4, 3], '1': [1], '2': [2]}
Action: (1, 1, 2)
State: {'0': [4, 3], '1': [], '2': [2, 1]}
Action: (3, 0, 1)
State: {'0': [4], '1': [3], '2': [2, 1]}
Action: (1, 2, 0)
State: {'0': [4, 1], '1': [3], '2': [2]}
Action: (2, 2, 1)
State: {'0': [4, 1], '1': [3, 2], '2': []}
Action: (1, 0, 1)
State: {'0': [4], '1': [3, 2, 1], '2': []}
Action: (4, 0, 2)
State: {'0': [], '1': [3, 2, 1], '2': [4]}
Action: (1, 1, 2)
State: {'0': [], '1': [3, 2], '2': [4, 1]}
Action: (2, 1, 0)
State: {'0': [2], '1': [3], '2': [4, 1]}
Action: (1, 2, 0)
State: {'0': [2, 1], '1': [3], '2': [4]}
Action: (3, 1, 2)
State: {'0': [2, 1], '1': [], '2': [4, 3]}
Action: (1, 0, 1)
State: {'0': [2], '1': [1], '2': [4, 3]}
Action: (2, 0, 2)
State: {'0': [], '1': [1], '2': [4, 3, 2]}
Action: (1, 1, 2)
State: {'0': [], '1': [], '2': [4, 3, 2, 1]}
SOLVED!
