# The simulator

In [7]:
from stormvogel import show
import stormvogel.model

In [8]:
# We create the monty hall mdp
mdp = stormvogel.model.new_mdp("Monty Hall")

init = mdp.get_initial_state()

# first choose car position
init.set_transitions(
    [(1 / 3, mdp.new_state("carchosen", {"car_pos": i})) for i in range(3)]
)

# we choose a door in each case
for s in mdp.get_states_with_label("carchosen"):
    s.set_transitions(
        [
            (
                mdp.action(f"open{i}"),
                mdp.new_state("open", s.features | {"chosen_pos": i}),
            )
            for i in range(3)
        ]
    )

# the other goat is revealed
for s in mdp.get_states_with_label("open"):
    car_pos = s.features["car_pos"]
    chosen_pos = s.features["chosen_pos"]
    other_pos = {0, 1, 2} - {car_pos, chosen_pos}
    s.set_transitions(
        [
            (
                1 / len(other_pos),
                mdp.new_state("goatrevealed", s.features | {"reveal_pos": i}),
            )
            for i in other_pos
        ]
    )

# we must choose whether we want to switch
for s in mdp.get_states_with_label("goatrevealed"):
    car_pos = s.features["car_pos"]
    chosen_pos = s.features["chosen_pos"]
    reveal_pos = s.features["reveal_pos"]
    other_pos = list({0, 1, 2} - {reveal_pos, chosen_pos})[0]
    s.set_transitions(
        [
            (
                mdp.action("stay"),
                mdp.new_state(
                    ["done"] + (["target"] if chosen_pos == car_pos else []),
                    s.features | {"chosen_pos": chosen_pos},
                ),
            ),
            (
                mdp.action("switch"),
                mdp.new_state(
                    ["done"] + (["target"] if other_pos == car_pos else []),
                    s.features | {"chosen_pos": other_pos},
                ),
            ),
        ]
    )

# we add self loops to all states with no outgoing transitions
mdp.add_self_loops()

In [9]:
#we want to simulate this model. That is, we start at the initial state and then
#we walk through the model according to transition probabilities.
#When we do this, we get a partial model as a result that contains everything we discovered
#during this walk.

#we can choose how many steps we take:
steps = 4

#and we can specify a seed if we want:
seed = 123456

#then we run the simulator:
partial_model = stormvogel.simulator.simulate(mdp, steps=steps, seed=seed)
print(partial_model)

ModelType.MDP with name None

States:
State 0 with labels ['init'] and features {}
State 1 with labels ['carchosen'] and features {}
State 2 with labels ['open'] and features {}
State 3 with labels ['goatrevealed'] and features {}
State 4 with labels ['done', 'target'] and features {}

Transitions:
0.3333333333333333 -> State 1 with labels ['carchosen'] and features {}
1.0 -> State 2 with labels ['open'] and features {}
1.0 -> State 3 with labels ['goatrevealed'] and features {}
1.0 -> State 4 with labels ['done', 'target'] and features {}


In [10]:
#it still chooses random actions but we can prevent this by providing a scheduler:
taken_actions = {}
for id, state in mdp.states.items():
    taken_actions[id] = state.available_actions()[0]
scheduler = stormvogel.result.Scheduler(mdp, taken_actions)

partial_model = stormvogel.simulator.simulate(mdp, steps=steps, scheduler=scheduler, seed=seed)
print(partial_model)

ModelType.MDP with name None

States:
State 0 with labels ['init'] and features {}
State 1 with labels ['carchosen'] and features {}
State 2 with labels ['open'] and features {}
State 3 with labels ['goatrevealed'] and features {}
State 4 with labels ['done'] and features {}

Transitions:
0.3333333333333333 -> State 1 with labels ['carchosen'] and features {}
1.0 -> State 2 with labels ['open'] and features {}
1.0 -> State 3 with labels ['goatrevealed'] and features {}
1.0 -> State 4 with labels ['done'] and features {}


In [11]:
#we can also visualize the partial model that we get from the simulator:
vis = show.show(partial_model, save_and_embed=True)

In [12]:
#we can also use another simulator function that returns a path instead of a partial model:
path = stormvogel.simulator.simulate_path(mdp, steps=4, scheduler=scheduler, seed=123456)

print(path)

initial state --(action: empty)--> state: 2 --(action: open0)--> state: 7 --(action: empty)--> state: 17 --(action: stay)--> state: 33
