In [16]:
from multiprocessing import Pool
from functools import partial

from mdp_sequence_reader import MDPSequenceReader
from training_pomdp_util import *
from observable_markov_model import ObservableMarkovModel

In [2]:
# read the sequence
reader = MDPSequenceReader('sample_sequence/sharp_0_0.txt')
print("Num states: %d" % reader.get_num_states())
print("Num steps: %d" % reader.get_num_steps())

Num states: 12
Num steps: 10000


In [3]:
# get the sequences
observations = reader.get_observation_sequence()
actions = reader.get_action_sequence()

In [4]:
# get the initial model
action_space = [0, 1, 2, 3]   # (up, right, down, left)
num_observables = 16   # binary measurements of 4 directions

In [13]:
# model training
def train_model(num_states, round_idx, action_space, num_observables, actions, observations,
                converge_improvement_threshold=2., converge_improve_retry=3):
    
    # initialize model
    init_model = initialize_random_pomdp_model(num_states, action_space, num_observables)
    
    # iterate the model
    model = init_model
    log_likelihood = -1e100  # very small
    best_log_likelihood = log_likelihood
    convergence_count = 0
    for r in range(50):
        alist, c = improve_params(xs=actions, ys=observations, m=model)
        new_model = ObservableMarkovModel(alist, c, model.init)

        next_log_likelihood = get_log_likelihood(make_tableaus(xs=actions, ys=observations, m=new_model))
        #print("round=%d, log likelihood=%f" % (r+1, next_log_likelihood))

        model = new_model
        log_likelihood = next_log_likelihood
    
        # check convergence condition
        if log_likelihood - best_log_likelihood > converge_improvement_threshold:
            convergence_count = 0
        else:
            convergence_count += 1
            if convergence_count == converge_improve_retry:
                break
        
        best_log_likelihood = max(best_log_likelihood, log_likelihood)
    
    print("num_states=%d, round_idx=%d, log_likelihood=%f" % (num_states, round_idx, log_likelihood))
    return num_states, round_idx,  model, log_likelihood, best_log_likelihood

In [21]:
# configuration
min_num_states = 2
max_num_states = 4
model_retries = 3

In [22]:
# generate task info for multi-processing
tasks = []
for i_num_states in range(min_num_states, max_num_states+1):
    for i_round in range(model_retries):
        tasks.append((i_num_states, i_round))

In [23]:
# do it so fast!
partially_fed_train_model_func = partial(
    train_model,
    action_space=action_space,
    num_observables=num_observables,
    actions=actions,
    observations=observations,
)
results = Pool().starmap(partially_fed_train_model_func, tasks)

num_states=3, round_idx=1, log_likelihood=-23491.878680
num_states=3, round_idx=2, log_likelihood=-23491.878680
num_states=3, round_idx=0, log_likelihood=-23491.878680
num_states=4, round_idx=0, log_likelihood=-22559.361303
num_states=4, round_idx=1, log_likelihood=-22559.361303
num_states=2, round_idx=1, log_likelihood=-24627.814917
num_states=2, round_idx=2, log_likelihood=-24627.814917
num_states=2, round_idx=0, log_likelihood=-24627.814917
num_states=4, round_idx=2, log_likelihood=-22279.021798


In [24]:
result_table = [[None for _ in range(model_retries)] for _ in range(max_num_states+1)]
for result in results:
    num_states, round_idx, *rest = result
    result_table[num_states][round_idx] = rest
for i_num_states in range(min_num_states, max_num_states+1):
    best_likelihood = max([logl for _, logl, _ in result_table[i_num_states]])
    print("%d states, best likelihood=%f" % (i_num_states, best_likelihood))

2 states, best likelihood=-24627.814917
3 states, best likelihood=-23491.878680
4 states, best likelihood=-22279.021798
