In [138]:
import numpy as np
import pandas as pd
import scipy.linalg as la
from scipy.special import logsumexp
from scipy.stats import multivariate_normal

In [139]:
df = pd.read_csv('alsfrs_cleaned.csv', index_col=0)

In [140]:
df.head()

Unnamed: 0,subject_id,Q1_Speech,Q2_Salivation,Q3_Swallowing,Q4_Handwriting,Q5_Cutting,Q6_Dressing_and_Hygiene,Q7_Turning_in_Bed,Q8_Walking,Q9_Climbing_Stairs,Q10_Respiratory,ALSFRS_Delta,ALSFRS_Total
0,329,4.0,3.0,4.0,3.0,2.0,3.0,2.0,2.0,1.0,3.0,0.0,27.0
1,329,4.0,3.0,4.0,3.0,1.5,3.0,2.0,2.0,1.0,3.0,8.0,26.5
2,329,4.0,3.0,4.0,3.0,1.0,3.0,2.0,2.0,1.0,3.0,16.0,26.0
3,329,4.0,3.0,4.0,3.0,3.0,3.0,3.0,2.0,1.0,4.0,42.0,30.0
4,329,4.0,3.0,4.0,3.0,2.0,3.0,4.0,2.0,2.0,3.0,72.0,30.0


In [141]:
test_subject = df[df['subject_id'] == 329]
test_subject

Unnamed: 0,subject_id,Q1_Speech,Q2_Salivation,Q3_Swallowing,Q4_Handwriting,Q5_Cutting,Q6_Dressing_and_Hygiene,Q7_Turning_in_Bed,Q8_Walking,Q9_Climbing_Stairs,Q10_Respiratory,ALSFRS_Delta,ALSFRS_Total
0,329,4.0,3.0,4.0,3.0,2.0,3.0,2.0,2.0,1.0,3.0,0.0,27.0
1,329,4.0,3.0,4.0,3.0,1.5,3.0,2.0,2.0,1.0,3.0,8.0,26.5
2,329,4.0,3.0,4.0,3.0,1.0,3.0,2.0,2.0,1.0,3.0,16.0,26.0
3,329,4.0,3.0,4.0,3.0,3.0,3.0,3.0,2.0,1.0,4.0,42.0,30.0
4,329,4.0,3.0,4.0,3.0,2.0,3.0,4.0,2.0,2.0,3.0,72.0,30.0
5,329,3.0,3.0,4.0,3.0,4.0,3.0,4.0,2.0,2.0,4.0,106.0,32.0
6,329,4.0,3.0,3.0,3.0,1.0,3.0,3.0,2.0,1.0,4.0,135.0,27.0
7,329,4.0,4.0,4.0,1.0,1.0,1.0,2.0,2.0,1.0,1.0,168.0,21.0
8,329,4.0,2.0,4.0,3.0,1.0,3.0,2.0,2.0,1.0,3.0,189.0,25.0
9,329,4.0,4.0,4.0,3.0,3.0,3.0,3.0,2.0,1.0,3.0,212.0,30.0


In [142]:
observations = test_subject.drop(['subject_id', 'ALSFRS_Delta', 'ALSFRS_Total'], axis=1).values
time_intervals = test_subject.ALSFRS_Delta.values

### Notation:
- D = number of parameters in the observed state (10)
- M = number of hidden states

In [143]:
M = 5
D = 10
rate_matrix = np.zeros((M, M), dtype=float) # M x M
emission_matrix = np.ones((2, M, D), dtype=float) # 2 x M x D

In [144]:
def log_transition_matrix(time_interval):
    """
        Input: scalar
        Output: M x M
    """
    return np.log(la.expm(rate_matrix * time_interval))

In [145]:
def log_emission(observation):
    """
        Input: D x 1
        Output: M x 1
    """
    b = np.ndarray(M, dtype=float)
    for i in range(M):
        means = emission_matrix[0,i]
        covariance = np.diag(emission_matrix[1,i])
        b[i] = multivariate_normal.logpdf(observation, means, covariance)
    return b

In [146]:
def backward(observations, time_intervals):
    T = observations.shape[0]
    beta = np.zeros((M, T), dtype=float) # TODO: ndarray
    beta[:,T-1] = np.ones(M)
    for i in range(M):
        for t in range(T-2, -1, -1):
            a = log_transition_matrix(time_intervals[t])
            b = log_emission(observations[t+1])
            beta[i,t] = logsumexp([beta[j,t+1] + a[i,j] + b[j] for j in range(M)])
    
    return beta

backward(observations, time_intervals)

  


array([[-319.20823865, -291.89385332, -264.70446799, -231.51508266,
        -199.32569732, -163.13631199, -134.44692666, -110.75754133,
         -85.068156  ,  -51.87877066,  -24.68938533,    1.        ],
       [-319.20823865, -291.89385332, -264.70446799, -231.51508266,
        -199.32569732, -163.13631199, -134.44692666, -110.75754133,
         -85.068156  ,  -51.87877066,  -24.68938533,    1.        ],
       [-319.20823865, -291.89385332, -264.70446799, -231.51508266,
        -199.32569732, -163.13631199, -134.44692666, -110.75754133,
         -85.068156  ,  -51.87877066,  -24.68938533,    1.        ],
       [-319.20823865, -291.89385332, -264.70446799, -231.51508266,
        -199.32569732, -163.13631199, -134.44692666, -110.75754133,
         -85.068156  ,  -51.87877066,  -24.68938533,    1.        ],
       [-319.20823865, -291.89385332, -264.70446799, -231.51508266,
        -199.32569732, -163.13631199, -134.44692666, -110.75754133,
         -85.068156  ,  -51.87877066,  -24.6