In [3]:
import time, json
from memory_profiler import profile
from stochastic_service_composition.declare_utils import *
from stochastic_service_composition.composition_mdp import composition_mdp
from stochastic_service_composition.composition_mdp import comp_mdp
from mdp_dp_rl.algorithms.dp.dp_analytic import DPAnalytic
from docs.notebooks.setup import *
from docs.notebooks.utils import print_policy_data, print_value_function, print_q_value_function

### Setup

In [4]:
mode = "automata"
phase = 2
size = "manageable2"
encode = False
binary = False

In [5]:
if phase == 1:
    all_services = all_services_phase1
    
    if mode == "automata":
        target = target_service_phase1_automata()
        tf = transition_function_phase1_automata
    elif mode == "ltlf":
        target, automaton = target_service_phase1_ltlf()
elif phase == 2:
    assert size in ["small", "manageable1", "manageable2", "complex"]
    all_services = services_phase2(size)
        
    if mode == "automata":
        target = target_service_phase2_automata()
        tf = transition_function_phase2_automata
    elif mode == "ltlf":
        target, automaton = target_service_phase2_ltlf()
else:
    print("Error")

In [6]:
print(f"mode {mode}, phase {phase}, size {size}")
print(f"n_services: {len(all_services)}")

mode automata, phase 2, size manageable2
n_services: 20


### Composition MDP and optimal policy

In [7]:
print("Starting computing composition MDP...")
services = all_services
if mode == "automata":
    mdp = composition_mdp(target, *services, tf=tf, gamma=0.9, encode=encode, binary=binary)
    states = len(mdp.all_states)
elif mode == "ltlf":
    mdp = comp_mdp(declare_automaton, services, automaton=automaton, gamma=0.9, encode=encode)
    states = len(mdp.all_states)
print("Number of states: ", states)
print("Composition MDP computed.")

Starting computing composition MDP...
System service created.
Number of states:  132
Composition MDP computed.


In [8]:
print("Starting computing policy and value function...")
opn = DPAnalytic(mdp, 1e-4)
opt_policy = opn.get_optimal_policy_vi()
value_function = opn.get_value_func_dict(opt_policy)
q_value_function = opn.get_act_value_func_dict(opt_policy)

# remove '0' state to sort output
opt_policy.policy_data.pop(0, None)
value_function.pop(0, None)
_ = q_value_function.pop(0, None)
print("Policy and value function computed.")

Starting computing policy and value function...
Policy and value function computed.


### Optimal policy

In [9]:
print_policy_data(opt_policy)

Policy:
State=(('ready', 'ready', 'available', 'available', 'available', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'ready', 'ready', 'ready'), 's0', 'cleaning'),	Action=0
State=(('ready', 'ready', 'available', 'available', 'available', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'ready', 'ready', 'ready'), 's1', 'film_deposition'),	Action=2
State=(('ready', 'ready', 'done', 'available', 'available', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'ready', 'ready', 'ready'), 's2', 'check_film_deposition'),	Action=2
State=(('ready', 'ready', 'broken', 'available', 'available', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'ready', 'ready', 'ready

### Value function

In [10]:
print_value_function(value_function)

Value function:
State=(('ready', 'ready', 'available', 'available', 'available', 'available', 'available', 'ready', 'broken', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'ready', 'ready', 'ready'), 's14', 'activation'),	value=-8.107159148099669
State=(('ready', 'ready', 'available', 'available', 'available', 'available', 'available', 'broken', 'ready', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'ready', 'ready', 'ready'), 's1', 'film_deposition'),	value=-8.442555646214188
State=(('ready', 'ready', 'available', 'available', 'done', 'available', 'available', 'broken', 'ready', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'ready', 'ready', 'ready'), 's4', 'check_resist_coating'),	value=-7.980871942680644
State=(('ready', 'ready', 'available', 'available', 'broken', 'available', 'available', 'broken', 'ready', 'available', 'available', 'ready', 'ready', 'avail

In [11]:
print_q_value_function(q_value_function)

Q-value function:
State=(('ready', 'ready', 'available', 'available', 'available', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'ready', 'ready', 'ready'), 's0', 'cleaning'):
	Action=0,	Value=-7.228776610004064
	Action=1,	Value=-11.228776610004065

State=(('ready', 'ready', 'available', 'available', 'available', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'ready', 'ready', 'ready'), 's1', 'film_deposition'):
	Action=2,	Value=-6.920862900004514
	Action=3,	Value=-10.920862900004515

State=(('ready', 'ready', 'done', 'available', 'available', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'available', 'available', 'ready', 'ready', 'ready', 'ready', 'ready'), 's2', 'check_film_deposition'):
	Action=2,	Value=-6.078736555560571

State=(('ready', 'ready', 'broken', 'available', '