In [1]:
from mdp_sequence_reader import MDPSequenceReader
from training_pomdp_util import *
from observable_markov_model import ObservableMarkovModel

In [2]:
# read the sequence
reader = MDPSequenceReader('sample_sequence/o_0_0.txt')
print("Num states: %d" % reader.get_num_states())
print("Num steps: %d" % reader.get_num_steps())

Num states: 8
Num steps: 10000


In [3]:
# get the sequences
observations = reader.get_observation_sequence()
actions = reader.get_action_sequence()

In [4]:
# get the initial model
action_space = [0, 1, 2, 3]   # (up, right, down, left)
num_states = reader.get_num_states()
num_observables = 16   # binary measurements of 4 directions

init_model = initialize_random_pomdp_model(num_states, action_space, num_observables)

In [5]:
model = init_model
for r in range(1):
    alist, c = improve_params(xs=actions, ys=observations, m=model)
    new_model = ObservableMarkovModel(alist, c, model.init)
    
    log_likelihood = get_log_likelihood(make_tableaus(xs=actions, ys=observations, m=new_model))
    print("round=%d, log likelihood=%f" % (r+1, log_likelihood))
    
    model = new_model

<class 'numpy.ndarray'>
(10000, 8)
[[0.01075644 0.04783689 0.60385933 0.11365482 0.16497539 0.02892415
  0.00969056 0.02030241]
 [0.15851476 0.47947929 0.24085749 0.01063576 0.00579281 0.08713454
  0.00794051 0.00964485]
 [0.10543255 0.04054984 0.03072127 0.10115795 0.05202526 0.31876234
  0.34910058 0.00225022]
 [0.16369731 0.04175037 0.16011173 0.12216837 0.04071468 0.24068659
  0.22575579 0.00511515]
 [0.16337659 0.04459949 0.14904932 0.08607985 0.04362884 0.23209305
  0.27615581 0.00501706]
 [0.16302205 0.05213822 0.12449552 0.03259635 0.07315529 0.14690834
  0.40485028 0.00283395]
 [0.18130733 0.02937124 0.12420873 0.03281668 0.08189286 0.20652385
  0.34191573 0.00196357]
 [0.14908131 0.0083244  0.07064832 0.11408594 0.05194836 0.38744886
  0.21394725 0.00451557]
 [0.18666194 0.02013709 0.17330184 0.02547166 0.3151284  0.05977697
  0.1092093  0.1103128 ]
 [0.1938316  0.01719841 0.10391948 0.04497446 0.25493151 0.03282897
  0.07257838 0.27973718]]
round=1, log likelihood=-16080.684

In [None]:
print(alist, c)

In [31]:
a = np.zeros((3, 5, 7))
print(a[2, :, :])
print('--')
print(a[2:3, :, :])
print('--')
print(a)
a[:, 2, :] = 3
print('--')
print(a)
print('--')
a[2] = 7
print(a)
a[:,:,2] = 9
print('--')
print(a)

[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]
--
[[[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]]
--
[[[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]]
--
[[[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [3. 3. 3. 3. 3. 3. 3.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [3. 3. 3. 3. 3. 3. 3.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [3. 3. 3. 3. 3. 3. 3.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0.

In [8]:
sum([0.01075644, 0.04783689, 0.60385933, 0.11365482, 0.16497539, 0.02892415,
  0.00969056,0.02030241])

0.9999999900000001