# The simulator

In [1]:
from stormvogel.show import show
from stormvogel.layout import Layout
import stormvogel.model
import stormvogel.simulator

In [2]:
# We create the monty hall mdp
mdp = stormvogel.model.new_mdp("Monty Hall")

init = mdp.get_initial_state()

# first choose car position
init.set_transitions(
    [(1 / 3, mdp.new_state("carchosen", {"car_pos": i})) for i in range(3)]
)

# we choose a door in each case
for s in mdp.get_states_with_label("carchosen"):
    s.set_transitions(
        [
            (
                mdp.action(f"open{i}"),
                mdp.new_state("open", s.features | {"chosen_pos": i}),
            )
            for i in range(3)
        ]
    )

# the other goat is revealed
for s in mdp.get_states_with_label("open"):
    car_pos = s.features["car_pos"]
    chosen_pos = s.features["chosen_pos"]
    other_pos = {0, 1, 2} - {car_pos, chosen_pos}
    s.set_transitions(
        [
            (
                1 / len(other_pos),
                mdp.new_state("goatrevealed", s.features | {"reveal_pos": i}),
            )
            for i in other_pos
        ]
    )

# we must choose whether we want to switch
for s in mdp.get_states_with_label("goatrevealed"):
    car_pos = s.features["car_pos"]
    chosen_pos = s.features["chosen_pos"]
    reveal_pos = s.features["reveal_pos"]
    other_pos = list({0, 1, 2} - {reveal_pos, chosen_pos})[0]
    s.set_transitions(
        [
            (
                mdp.action("stay"),
                mdp.new_state(
                    ["done"] + (["target"] if chosen_pos == car_pos else []),
                    s.features | {"chosen_pos": chosen_pos},
                ),
            ),
            (
                mdp.action("switch"),
                mdp.new_state(
                    ["done"] + (["target"] if other_pos == car_pos else []),
                    s.features | {"chosen_pos": other_pos},
                ),
            ),
        ]
    )

# we add self loops to all states with no outgoing transitions
mdp.add_self_loops()

We show what our mdp model looks like.

In [3]:
vis = show(mdp, layout=Layout("layouts/monty.json"), save_and_embed=True)

We want to simulate this model. That is, we start at the initial state and then we walk through the model by choosing random actions.

When we do this, we get a partial model as a result that contains everything we discovered during this walk. 

Try running this multiple times, and observe that sometimes we get to the target and sometimes we do not.

In [4]:
# we can choose how many steps we take:
steps = 4

# and we can specify a seed if we want:
seed = 12345676346

# then we run the simulator:
partial_model = stormvogel.simulator.simulate(mdp, steps=steps, seed=seed)
# We could also provide a seed.
#partial_model = stormvogel.simulator.simulate(mdp, steps=steps, seed=seed)

vis = show(partial_model, save_and_embed=True, layout=Layout("layouts/small_monty.json"))

We can also provide a scheduler (i.e. policy) which chooses what actions we should take at all time.

In this case, we always take the first action, which means that we open door 0, and don't switch doors.

In [5]:
#it still chooses random actions but we can prevent this by providing a scheduler:
taken_actions = {}
for id, state in mdp.states.items():
    taken_actions[id] = state.available_actions()[0]
scheduler = stormvogel.result.Scheduler(mdp, taken_actions)

partial_model = stormvogel.simulator.simulate(mdp, steps=steps, scheduler=scheduler, seed=seed)
vis = show(partial_model, save_and_embed=True, layout=Layout("layouts/small_monty.json"))

We can highlight the scheduled states in the visualization of the entire model.

In [6]:
vis = show(mdp, show_editor=True, layout=Layout("layouts/monty.json"), scheduler=scheduler, save_and_embed=True)

We can also get a path from the simulator function.

In [7]:
#we can also use another simulator function that returns a path instead of a partial model:
path = stormvogel.simulator.simulate_path(mdp, steps=4, scheduler=scheduler, seed=123456)

print(path)

initial state --(action: empty)--> state: 2 --(action: open0)--> state: 7 --(action: empty)--> state: 17 --(action: stay)--> state: 33


We can even visualize this path interactively! This works with any Path, not just a scheduler path. TODO.

In [8]:
from stormvogel.show import show
from time import sleep

vis = show(mdp, save_and_embed=True, layout=Layout("layouts/monty.json"))
vis.show_path(path)
for state in path:
    vis.highlight_state(state, color="red")
    sleep(1)
    # TODO should crash
    

AttributeError: 'Visualization' object has no attribute 'show_path'