# Step Four: MDPs with More Uncertainty

## Preparation

In [1]:
# Import prism model from the previous step
import stormvogel
import stormpy

from copy import deepcopy
from examples.orchard_game_stormvogel import Orchard, available_actions, delta, labels, rewards, Fruit, GameState, DiceOutcome

## Interval MDP

### Adapted delta function

In [None]:
# The transition function
def delta(state, action):
    if state.game_state() != GameState.NOT_ENDED:
        # Game has ended -> self loop
        return [(1, state)]

    if state.dice is None:
        # Player throws dice and considers outcomes
        outcomes = []
        # Probability of fair dice throw over
        # each fruit type + 1 basket + 1 raven
        fair_dice_prob = 1 / (len(state.trees.keys()) + 2)
        # NEW: adapted probability
        fair_dice_prob = stormvogel.model.Interval(fair_dice_prob-(1/36), fair_dice_prob+(1/36))

        # 1. Dice shows fruit
        for fruit in state.trees.keys():
            next_state = deepcopy(state)
            next_state.dice = DiceOutcome.FRUIT, fruit
            outcomes.append((fair_dice_prob, next_state))

        # 2. Dice shows basket
        next_state = deepcopy(state)
        next_state.dice = DiceOutcome.BASKET, None
        outcomes.append((fair_dice_prob, next_state))

        # 3. Dice shows raven
        next_state = deepcopy(state)
        next_state.dice = DiceOutcome.RAVEN, None
        outcomes.append((fair_dice_prob, next_state))
        return outcomes

    elif state.dice[0] == DiceOutcome.FRUIT:
        # Player picks specified fruit
        fruit = state.dice[1]
        next_state = deepcopy(state)
        next_state.pick_fruit(fruit)
        next_state.next_round()
        return [(1, next_state)]

    elif state.dice[0] == DiceOutcome.BASKET:
        assert action.startswith("choose")
        # Player chooses fruit specified by action
        fruit = Fruit[action.removeprefix("choose")]
        next_state = deepcopy(state)
        next_state.pick_fruit(fruit)
        next_state.next_round()
        return [(1, next_state)]

    elif state.dice[0] == DiceOutcome.RAVEN:
        next_state = deepcopy(state)
        next_state.move_raven()
        next_state.next_round()
        return [(1, next_state)]

    assert False

In [None]:
init_game = Orchard([Fruit.APPLE, Fruit.CHERRY, Fruit.PEAR, Fruit.PLUM],
                    num_fruits=4,
                    raven_distance=5)
    
# For the full model, we only set the relevant labels for the winning conditions
# and do not expose the internal state information
def labels_full(state):
    labels = []
    if state.game_state() == GameState.PLAYERS_WON:
        labels.append("PlayersWon")
    elif state.game_state() == GameState.RAVEN_WON:
        labels.append("RavenWon")
    return labels

orchard = stormvogel.bird.build_bird(
    modeltype=stormvogel.ModelType.MDP,
    init=init_game,
    available_actions=available_actions,
    delta=delta,
    labels=labels_full,
    max_size=100000
)

# Convert to stormpy model
orchard_storm = stormvogel.mapping.stormvogel_to_stormpy(orchard)

## Analysis

In [None]:
# Parse properties
properties = stormpy.parse_properties('Pmax=? [F "PlayersWon"]')
task = stormpy.CheckTask(properties[0].raw_formula)
# Set cooperative resolution mode, alternatively: ROBUST
task.set_uncertainty_resolution_mode(
    stormpy.UncertaintyResolutionMode.COOPERATIVE
)
# Check model
env = stormpy.Environment()
stormpy_result = stormpy.check_interval_mdp(orchard_storm, task, env)
print(stormpy_result.at(orchard_storm.initial_states[0]))

# Set cooperative resolution mode
task.set_uncertainty_resolution_mode(
    stormpy.UncertaintyResolutionMode.ROBUST
)
# Check model
env = stormpy.Environment()
stormpy_result = stormpy.check_interval_mdp(orchard_stormpy, task, env)
print(stormpy_result.at(orchard_storm.initial_states[0]))

# Parametric MDP

# POMDP

In [2]:
import stormpy
import stormpy.pomdp

prism_program = stormpy.parse_prism_program("examples/orchard_pomdp.pm")
formula_str = 'Pmax=? [!"RavenWon" U "PlayersWon"]'
properties = stormpy.parse_properties_for_prism_program(formula_str, prism_program)
prism_program, properties = stormpy.preprocess_symbolic_input(prism_program, properties, "")
prism_program = prism_program.as_prism_program()
options = stormpy.BuilderOptions([p.raw_formula for p in properties])
options.set_build_state_valuations()
options.set_build_choice_labels()
pomdp = stormpy.build_model(prism_program, properties)
pomdp = stormpy.pomdp.make_canonic(pomdp)
print(pomdp)

-------------------------------------------------------------- 
Model type: 	POMDP (sparse)
States: 	22469
Transitions: 	44954
Choices: 	29354
Observations: 	546
Reward Models:  none
State Labels: 	4 labels
   * deadlock -> 0 item(s)
   * init -> 1 item(s)
   * RavenWon -> 624 item(s)
   * PlayersWon -> 5 item(s)
Choice Labels: 	11 labels
   * player0 -> 3749 item(s)
   * pickAPPLE -> 3120 item(s)
   * pickPEAR -> 3120 item(s)
   * chooseAPPLE -> 2500 item(s)
   * moveRaven -> 3120 item(s)
   * pickCHERRY -> 3120 item(s)
   * pickPLUM -> 3120 item(s)
   * choosePEAR -> 2500 item(s)
   * chooseCHERRY -> 2500 item(s)
   * choosePLUM -> 2500 item(s)
   * noChoice -> 5 item(s)
-------------------------------------------------------------- 



In [None]:
belexpl_options = stormpy.pomdp.BeliefExplorationModelCheckerOptionsDouble(True, True)
belexpl_options.use_clipping = False
belexpl_options.refine = True

belmc = stormpy.pomdp.BeliefExplorationModelCheckerDouble(pomdp, belexpl_options)
result = belmc.check(properties[0].raw_formula, [])
print(f"Result in: [{result.lower_bound}, {result.upper_bound}]")

In [4]:
# Load model with stealing
prism_program = stormpy.parse_prism_program("examples/orchard_pomdp_steal.pm")
formula_str = 'Pmax=? [!"RavenWon" U "PlayersWon"]'
properties = stormpy.parse_properties_for_prism_program(formula_str, prism_program)
prism_program, properties = stormpy.preprocess_symbolic_input(prism_program, properties, "")
prism_program = prism_program.as_prism_program()
options = stormpy.BuilderOptions([p.raw_formula for p in properties])
options.set_build_state_valuations()
options.set_build_choice_labels()
pomdp_steal = stormpy.build_model(prism_program, properties)
pomdp_steal = stormpy.pomdp.make_canonic(pomdp_steal)
print(pomdp_steal)

-------------------------------------------------------------- 
Model type: 	POMDP (sparse)
States: 	22434
Transitions: 	44882
Choices: 	29304
Observations: 	547
Reward Models:  none
State Labels: 	4 labels
   * deadlock -> 0 item(s)
   * init -> 1 item(s)
   * RavenWon -> 623 item(s)
   * PlayersWon -> 5 item(s)
Choice Labels: 	12 labels
   * steal -> 1 item(s)
   * player0 -> 3743 item(s)
   * pickAPPLE -> 3115 item(s)
   * pickPEAR -> 3115 item(s)
   * chooseAPPLE -> 2495 item(s)
   * moveRaven -> 3115 item(s)
   * pickCHERRY -> 3115 item(s)
   * pickPLUM -> 3115 item(s)
   * choosePEAR -> 2495 item(s)
   * chooseCHERRY -> 2495 item(s)
   * choosePLUM -> 2495 item(s)
   * noChoice -> 5 item(s)
-------------------------------------------------------------- 



In [None]:
# Fully observable
mdp_res = stormpy.model_checking(pomdp_steal, properties[0], force_fully_observable=True)
print(mdp_res.at(pomdp_steal.initial_states[0]))

In [None]:
belexpl_options = stormpy.pomdp.BeliefExplorationModelCheckerOptionsDouble(True, True)
belexpl_options.use_clipping = False
belexpl_options.refine = True

belmc = stormpy.pomdp.BeliefExplorationModelCheckerDouble(pomdp_steal, belexpl_options)
result = belmc.check(properties[0].raw_formula, [])
print(f"Result in: [{result.lower_bound}, {result.upper_bound}]")