In [1]:
from mdp_sequence_reader import MDPSequenceReader
from training_pomc_util import *
from pomc import PartiallyObservableMarkovChain

In [2]:
# read the sequence
reader = MDPSequenceReader('sample_sequence/o_0_0.txt')
print("Num states: %d" % reader.get_num_states())
print("Num steps: %d" % reader.get_num_steps())

Num states: 8
Num steps: 10000


In [3]:
# get the sequences
observations = reader.get_observation_sequence()
actions = reader.get_action_sequence()

In [4]:
# get the initial model
action_space = [0, 1, 2, 3]   # (up, right, down, left)
num_observables = 16   # binary measurements of 4 directions

In [19]:
# given the setup, repeat the model training process a couple times because different
# initial setup may give different performance
def search_model(num_states, action_spaces, num_observables, actions, observations,
                 converge_improvement_threshold=2., converge_improve_retry=3, model_retry=5):
    
    best_model = None
    best_log_likelihood = -1e100
    for r in range(model_retry):
        print("========= %d states, round %d =========" % (num_states, r+1))
        model, log_likelihood, _ = train_model(
                num_states, action_space, num_observables, actions, observations,
                converge_improvement_threshold, converge_improve_retry)
        print("Final log likelihood: %f" % log_likelihood)
        if log_likelihood > best_log_likelihood:
            best_model = model
            best_log_likelihood = log_likelihood
    
    return best_model, best_log_likelihood

In [24]:
# model training
def train_model(num_states, action_spaces, num_observables, actions, observations,
                converge_improvement_threshold, converge_improve_retry):
    
    # initialize model
    init_model = initialize_random_pomc(num_states, action_space, num_observables)
    
    # iterate the model
    model = init_model
    log_likelihood = -1e100  # very small
    best_log_likelihood = log_likelihood
    convergence_count = 0
    for r in range(50):
        alist, c = improve_params(xs=actions, ys=observations, m=model)
        new_model = PartiallyObservableMarkovChain(alist, c, model.init)

        next_log_likelihood = get_log_likelihood(make_tableaus(xs=actions, ys=observations, m=new_model))
        #print("round=%d, log likelihood=%f" % (r+1, next_log_likelihood))

        model = new_model
        log_likelihood = next_log_likelihood
    
        # check convergence condition
        if log_likelihood - best_log_likelihood > converge_improvement_threshold:
            convergence_count = 0
        else:
            convergence_count += 1
            if convergence_count == converge_improve_retry:
                break
        
        best_log_likelihood = max(best_log_likelihood, log_likelihood)
                
    return model, log_likelihood, best_log_likelihood

In [25]:
# study the performance difference when we change number of states
for i_num_states in range(2, 12):
    _, best_likelihood = search_model(i_num_states, action_space, num_observables, actions, observations)

Final log likelihood: -13914.754997
Final log likelihood: -13914.050824
Final log likelihood: -14225.361066
Final log likelihood: -14009.376390
Final log likelihood: -13817.077530
Final log likelihood: -10895.266434
Final log likelihood: -11737.111412
Final log likelihood: -11024.589996
Final log likelihood: -12229.982600
Final log likelihood: -9274.596367
Final log likelihood: -7819.642475
Final log likelihood: -7676.016788
Final log likelihood: -7590.444281
Final log likelihood: -7731.993332
Final log likelihood: -6143.651323
Final log likelihood: -5249.007853
Final log likelihood: -3691.055693
Final log likelihood: -3919.105104
Final log likelihood: -6142.651900
Final log likelihood: -5124.654572
Final log likelihood: -5729.999559
Final log likelihood: -3347.895064
Final log likelihood: -3962.424587
Final log likelihood: -1688.081712
Final log likelihood: -4536.140649
Final log likelihood: -820.335321
Final log likelihood: -3920.870257
Final log likelihood: -875.313413
Final log lik

In [None]:
print(alist, c)

In [31]:
a = np.zeros((3, 5, 7))
print(a[2, :, :])
print('--')
print(a[2:3, :, :])
print('--')
print(a)
a[:, 2, :] = 3
print('--')
print(a)
print('--')
a[2] = 7
print(a)
a[:,:,2] = 9
print('--')
print(a)

[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]]
--
[[[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]]
--
[[[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]]
--
[[[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [3. 3. 3. 3. 3. 3. 3.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [3. 3. 3. 3. 3. 3. 3.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]]

 [[0. 0. 0. 0. 0. 0. 0.]
  [0. 0. 0. 0. 0. 0. 0.]
  [3. 3. 3. 3. 3. 3. 3.]
  [0. 0. 0. 0. 0. 0. 0.]
  [0. 0.

In [11]:
a = np.zeros((3, 5))
for i in range(3):
    for j in range(5):
        a[i][j] = i * 100 + 3
        
b = np.zeros((2, 5, 3))
b[1:2, :, 0] = a[0:1, :]
b[0:1, :, 1] = a[1:2, :]
b[1:2, :, 2] = a[2:3, :]
print(b)
print('-----')
c = np.zeros((2, 5, 3))
c[1, :, 0] = a[0:1, :]
c[0, :, 1] = a[1:2, :]
c[1, :, 2] = a[2:3, :]
print(c)
print('-----')
d = np.zeros((2, 5, 3))
d[1, :, 0] = a[0, :]
d[0, :, 1] = a[1, :]
d[1, :, 2] = a[2, :]
print(d)
print('=====')
print(np.array_equal(b, c), np.array_equal(b, d))
print('=====')
print(a[0, :])
print(a[0:1, :])

[[[  0. 103.   0.]
  [  0. 103.   0.]
  [  0. 103.   0.]
  [  0. 103.   0.]
  [  0. 103.   0.]]

 [[  3.   0. 203.]
  [  3.   0. 203.]
  [  3.   0. 203.]
  [  3.   0. 203.]
  [  3.   0. 203.]]]
-----
[[[  0. 103.   0.]
  [  0. 103.   0.]
  [  0. 103.   0.]
  [  0. 103.   0.]
  [  0. 103.   0.]]

 [[  3.   0. 203.]
  [  3.   0. 203.]
  [  3.   0. 203.]
  [  3.   0. 203.]
  [  3.   0. 203.]]]
-----
[[[  0. 103.   0.]
  [  0. 103.   0.]
  [  0. 103.   0.]
  [  0. 103.   0.]
  [  0. 103.   0.]]

 [[  3.   0. 203.]
  [  3.   0. 203.]
  [  3.   0. 203.]
  [  3.   0. 203.]
  [  3.   0. 203.]]]
=====
True True
=====
[3. 3. 3. 3. 3.]
[[3. 3. 3. 3. 3.]]


In [10]:
a = np.ones(3)
print(a)
print(a.shape)
print(a.T)
b = np.ones((3, 1))
print(b)
print(b.shape)
print(b.T)

[1. 1. 1.]
(3,)
[1. 1. 1.]
[[1.]
 [1.]
 [1.]]
(3, 1)
[[1. 1. 1.]]


In [11]:
a = np.zeros((3, 1))
print(a[0])

[0.]


In [6]:
a = np.zeros((3, 8))
b = np.array([0, 1, 2, 3, 4, 5, 6, 7])
a[1, :] = b
print(a)
c = np.array([10, 11, 12])
a[:, 1] = c
print(a)
print(np.sum(a, axis=0))
print(a * b)
d = np.ones((3, 1))
d[0][0] = 1
d[1][0] = 2
d[2][0] = 3
print(a * d)

[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 2. 3. 4. 5. 6. 7.]
 [0. 0. 0. 0. 0. 0. 0. 0.]]
[[ 0. 10.  0.  0.  0.  0.  0.  0.]
 [ 0. 11.  2.  3.  4.  5.  6.  7.]
 [ 0. 12.  0.  0.  0.  0.  0.  0.]]
[ 0. 33.  2.  3.  4.  5.  6.  7.]
[[ 0. 10.  0.  0.  0.  0.  0.  0.]
 [ 0. 11.  4.  9. 16. 25. 36. 49.]
 [ 0. 12.  0.  0.  0.  0.  0.  0.]]
[[ 0. 10.  0.  0.  0.  0.  0.  0.]
 [ 0. 22.  4.  6.  8. 10. 12. 14.]
 [ 0. 36.  0.  0.  0.  0.  0.  0.]]
