# Estimation part

## Task 13

In this task, we try to estimate the matrix Q based on available data about the cancer states of the women collected every 48th month. Because the data is collected with a large time interval, we don't know which states the women have gone through between the data collections. This information is needed to estimate the matrix Q. Therefore, we simulate state trajectories for each woman and compare the result with the data (as produced in task 12) in order to get data to work from. We start out with a guess for Q, and then simulate the trajectories as described in the project description by rejecting all trajectories that do not match the given data. This is then used to update Q until barely no changes happen with Q. 

In [2]:
# Initialization
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from numpy.linalg import matrix_power

In [52]:
# Test women
X = np.array([[1,2,3,4,5],
              [1,3, 3, 3, 5],
              [1, 2, 2, 4, 5],
              [1, 2, 3, 3, 5],
              [1, 4, 4, 5, 5]])

#X = np.array([[1,2,3,4,5],
#              [1,3, 3, 3, 5]])

# Initial Q guess
Q_test = np.array([[-0.004, 0.001, 0.001, 0.001, 0.001],
              [0, -0.003, 0.001, 0.001, 0.001], 
              [0, 0, -0.002, 0.001, 0.001],
              [0, 0, 0, -0.001, 0.001],
              [0, 0, 0, 0, 0]]) 

Q_test = np.array([[-0.008, 0.004, 0.002, 0, 0.002],
              [0, -0.015, 0.005, 0.005, 0.005], 
              [0, 0, -0.008, 0.004, 0.004],
              [0, 0, 0, -0.007, 0.007],
              [0, 0, 0, 0, 0]])

# Possible states
states = np.arange(1,6)


# N and S (for filling out)
N = np.array([[0, 0, 0, 0, 0],
              [0, 0, 0, 0, 0], 
              [0, 0, 0, 0, 0],
              [0, 0, 0, 0, 0],
              [0, 0, 0, 0, 0]]) 

S = np.zeros(4)

# Initial trajectory matrix
X_test = np.zeros(np.shape(X)[1] * 48)

k = 0

diff = 1

# K LOOP
while diff >= 10**(-3):
     
    # N and S
    N = np.array([[0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0], 
                  [0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0],
                  [0, 0, 0, 0, 0]]) 
    S = np.zeros(4)
    
    # PART 1: WOMAN LOOP
    for i in range(len(X)):
        # Get woman
        test_woman = X[i,:]
        woman_all_states = np.array([])
        woman_event_times = np.array([])
    
        t = np.array([])
        
        j = 0
        # Run through woman: ONE-WOMAN LOOP
        while j < len(test_woman)-1:
            y_start = test_woman[j]
            y_end = test_woman[j+1]
            
            try_state = np.array([y_start])
            if try_state < 5:
                try_t = np.array([np.random.exponential(scale = 1/-Q_test[try_state[-1]-1, try_state[-1]-1])])
            else:
                break
            
            # TRY LOOP
            while (np.sum(t) + np.sum(try_t)) < ((j+1)*48):
                # Get new state
                probs = -Q_test[try_state[-1] - 1, try_state[-1]:]/Q_test[try_state[-1] - 1, try_state[-1] - 1]
                addition_for_try_state = np.array([np.random.choice(states[try_state[-1]:], p = probs)])
                try_state = np.concatenate((try_state, addition_for_try_state))
                
                # Get time for state
                if try_state[-1] < 5:
                    addition_for_try_t = np.array([np.random.exponential(scale = 1/-Q_test[try_state[-1]-1,try_state[-1]-1])])
                    try_t = np.concatenate((try_t, addition_for_try_t))
                         
                else:
                    break
             
                                             
            # ACCEPTANCE OR REJECTION
            if try_state[-1] == y_end:
                if np.sum(t) + np.sum(try_t) < ((j+2)*48):
                    t = np.concatenate((t, try_t))
                    woman_all_states = np.concatenate((woman_all_states, try_state))
                    j += 1
                    
                else:
                    how_many_intervals = int(np.floor((np.sum(t) + np.sum(try_t))/48))
                    if (j + how_many_intervals < len(test_woman)):
                        if (test_woman[j + how_many_intervals] == try_state[-1]):
                            t = np.concatenate((t, try_t))
                            woman_all_states = np.concatenate((woman_all_states, try_state))
                            j += how_many_intervals
                        
                        
        # Calculate N
        for w in range(len(woman_all_states)-1):
            i = int(woman_all_states[w] - 1)
            j = int(woman_all_states[w+1] - 1)
            if i != j:
                N[i,j] += 1
            
        # Calculate S
        for w in range(1,5):
            S[w-1] += np.sum(t[np.where(woman_all_states == w)[0]])
            
    ### Create Q ###
    Q_simulation = np.array([[0.0, 0.0, 0.0, 0.0, 0.0],
              [0.0, 0.0, 0.0, 0.0, 0.0], 
              [0.0, 0.0, 0.0, 0.0, 0.0],
              [0.0, 0.0, 0.0, 0.0, 0.0],
              [0.0, 0.0, 0.0, 0.0, 0.0]])
    
    for i in range(4):
        for j in range(5):
            if S[i] != 0:
                entrance = N[i,j]/S[i]
                Q_simulation[i][j] += entrance
                Q_simulation[i,j] += N[i,j]/S[i]
    
    # Make sure each row sums to 0
    for i in range(5):
        for j in range(5):
            if i == j:
                Q_simulation[i, j] = -np.sum(Q_simulation[i,j+1:])
    
    ### Find diff ###
    diff_matrix = Q_test - Q_simulation
    diff = np.linalg.norm(diff_matrix, np.inf)
    Q_test = np.copy(Q_simulation)
    

print('Done. Final Q-matrix:')
print(Q_test)



  ret = add.reduce(abs(x), axis=col_axis).max(axis=row_axis)


ValueError: probabilities contain NaN