In [1]:
import stormvogel.model
import stormvogel.visualization
from stormvogel.visualization import Visualization, show
from stormvogel.layout import Layout, DEFAULT
from stormvogel.result import Result

mdp = stormvogel.model.new_mdp("Monty Hall")

init = mdp.get_initial_state()
study = mdp.new_action_with_labels("study", frozenset(["study"]))
not_study = mdp.new_action_with_labels("don't study", frozenset(["don't study"]))

studied = mdp.new_state("studied")
not_studied = mdp.new_state("didn't study")
pass_test = mdp.new_state("pass test")
fail_test = mdp.new_state("fail test")
end = mdp.new_state("end")

# From the initial state, you can choose to study, or not to study.
init.set_transitions([
    (study, studied),
    (not_study, not_studied)
])

# If you studied, then there is a 90% chance that you pass the test.
studied.set_transitions([
    (9/10, pass_test),
    (1/10, fail_test)
])

# If you did not study, then there is only a 40% chance that you pass the test.
not_studied.set_transitions([
    (4/10, pass_test),
    (6/10, fail_test)
])

# After the test you are done
pass_test.set_transitions([(1, end)])
fail_test.set_transitions([(1, end)])

# Add reward for passing the test and reward for not studying
reward_model = mdp.add_rewards("R")
reward_model.set(pass_test, 100)
reward_model.set(fail_test, 0)
reward_model.set(not_studied, 15)

# Add a result
class FakeResult:
    scheduler = {0: "study"}
    def __init__(self):
        pass
    def get_result_of_state(self, s):
        return 99
result = FakeResult()

vis = show(mdp, name="study", show_editor=True, separate_edit_labels=["init", "didn't study"], result=result)

VBox(children=(interactive(children=(Checkbox(value=True, description='Auto apply changes'), Output()), _dom_c…

Button(button_style='success', description='Save', style=ButtonStyle())

Output()

Button(button_style='info', description='Apply', style=ButtonStyle())

Output()