# This notebook demonstrates a Hidden markov modeling of transition probabilities from times series data

# load required packages

In [1]:
using GraphRecipes, Plots
using Revise; using MDToolbox

└ @ Revise /Users/seica/.julia/packages/Revise/BqeJF/src/Revise.jl:1328


# generate training data

In [2]:
# ground-truth transition probabilities
state_num = 5
E = [[] for i in 1:state_num]
for i in 1:state_num
    for j in 1:state_num
        if i == j
            push!(E[i], MDToolbox.Edge(j, 0.6))
        end
        if abs(i - j) == 1 || state_num - abs(i - j) == 1
            push!(E[i], MDToolbox.Edge(j, 0.2))
        end
    end
end
E

5-element Array{Array{Any,1},1}:
 [MDToolbox.Edge(1, 0.6), MDToolbox.Edge(2, 0.2), MDToolbox.Edge(5, 0.2)]
 [MDToolbox.Edge(1, 0.2), MDToolbox.Edge(2, 0.6), MDToolbox.Edge(3, 0.2)]
 [MDToolbox.Edge(2, 0.2), MDToolbox.Edge(3, 0.6), MDToolbox.Edge(4, 0.2)]
 [MDToolbox.Edge(3, 0.2), MDToolbox.Edge(4, 0.6), MDToolbox.Edge(5, 0.2)]
 [MDToolbox.Edge(1, 0.2), MDToolbox.Edge(4, 0.2), MDToolbox.Edge(5, 0.6)]

In [3]:
# equilibrium probabilities
pi_i = [
0.2, 
0.2, 
0.2,
0.2,
0.2
]

5-element Array{Float64,1}:
 0.2
 0.2
 0.2
 0.2
 0.2

In [4]:
# emission probabilities
emission = [
0.7 0.2 0.05 0.05;
0.2 0.7 0.05 0.05;
0.25 0.25 0.25 0.25;
0.05 0.05 0.7 0.2;
0.05 0.05 0.2 0.7
]

5×4 Array{Float64,2}:
 0.7   0.2   0.05  0.05
 0.2   0.7   0.05  0.05
 0.25  0.25  0.25  0.25
 0.05  0.05  0.7   0.2
 0.05  0.05  0.2   0.7

In [5]:
observations = []
for i in 1:10
    state, observation = MDToolbox.msmgenerate_edge(1000, state_num, E, pi_i, emission)
    push!(observations, observation)
end

# Hidden markov modeling

In [6]:
# initial transition probabilities
E0 = [[] for i in 1:state_num]
for i in 1:state_num
    for j in 1:state_num
        if i == j
            push!(E0[i], MDToolbox.Edge(j, rand()))
        end
        if abs(i - j) == 1 || state_num - abs(i - j) == 1
            push!(E0[i], MDToolbox.Edge(j, rand()))
        end
    end
end

In [7]:
@time E1, emission1, pi_i1 = MDToolbox.msmbaumwelch_edge(observations, state_num, E0, pi_i, emission);

count: 20 diff_sum: 0.029085050054236256
count: 40 diff_sum: 0.007722248640285034
count: 60 diff_sum: 0.004193983583367811
count: 80 diff_sum: 0.0029363900616977967
count: 100 diff_sum: 0.0020823554524194987
count: 120 diff_sum: 0.0014457576128763805
count: 140 diff_sum: 0.0009825280967692696
count: 160 diff_sum: 0.000657060683203925
count: 180 diff_sum: 0.0004345350983864793
count: 200 diff_sum: 0.0002852332952348602
count: 220 diff_sum: 0.00018630987762721651
count: 240 diff_sum: 0.00012130308310107085
 43.219712 seconds (1.03 G allocations: 17.377 GiB, 5.69% gc time)


In [8]:
T = zeros(state_num, state_num)
for state in 1:state_num
    for edge in E[state]
        T[state, edge.to] = edge.value
    end
end

T0 = zeros(state_num, state_num)
for state in 1:state_num
    for edge in E0[state]
        T0[state, edge.to] = edge.value
    end
end

T1 = zeros(state_num, state_num)
for state in 1:state_num
    for edge in E1[state]
        T1[state, edge.to] = edge.value
    end
end

In [9]:
T

5×5 Array{Float64,2}:
 0.6  0.2  0.0  0.0  0.2
 0.2  0.6  0.2  0.0  0.0
 0.0  0.2  0.6  0.2  0.0
 0.0  0.0  0.2  0.6  0.2
 0.2  0.0  0.0  0.2  0.6

In [10]:
T0

5×5 Array{Float64,2}:
 0.775959  0.810934  0.0       0.0       0.472828
 0.924865  0.706727  0.750343  0.0       0.0
 0.0       0.410227  0.899504  0.88271   0.0
 0.0       0.0       0.986784  0.782084  0.199711
 0.448349  0.0       0.0       0.857292  0.382649

In [11]:
T1

5×5 Array{Float64,2}:
 0.575871  0.218325  0.0       0.0       0.205805
 0.214932  0.590163  0.194904  0.0       0.0
 0.0       0.249757  0.526989  0.223254  0.0
 0.0       0.0       0.203435  0.610029  0.186536
 0.202882  0.0       0.0       0.2099    0.587218

In [12]:
alpha_list_seica, alpha_sum_list = MDToolbox.msmforward_edge(observations, state_num, E, pi_i, emission);

In [13]:
alpha_list_seica[1]

1000×5 Array{Float64,2}:
 0.04        0.04        0.2        0.56        0.16
 0.0829016   0.326425    0.388601   0.132124    0.0699482
 0.0799037   0.628961    0.251504   0.0264741   0.0131568
 0.0837384   0.73727     0.167361   0.00816853  0.00346244
 0.458717    0.325414    0.206037   0.006452    0.00337893
 0.665118    0.182906    0.132356   0.00637476  0.0132444
 0.278991    0.599823    0.0933071  0.00524261  0.0226358
 0.108004    0.160719    0.327463   0.0389766   0.364838
 0.505575    0.156021    0.251234   0.0343979   0.052772
 0.231021    0.573972    0.158004   0.0136295   0.0233726
 0.127259    0.72863     0.13087    0.00547992  0.00776057
 0.100209    0.766603    0.126219   0.00347429  0.00349559
 0.0938241   0.77478     0.125825   0.00306994  0.00250108
 ⋮                                              
 0.129808    0.0513126   0.133044   0.405418    0.280417
 0.0231089   0.0133558   0.137128   0.208892    0.617515
 0.568672    0.0464796   0.183789   0.0801322   0.120927
 0.

In [14]:
beta_list_seica = MDToolbox.msmbackward_edge(observations, state_num, E, pi_i, emission, alpha_sum_list);

In [15]:
beta_list_seica[1]

1000×5 Array{Float64,2}:
 2.35942    5.47887    2.43618   0.281465   0.260082
 0.882457   1.83633    0.787575  0.0922471  0.131199
 0.877301   1.268      0.507035  0.075454   0.217376
 1.80763    1.04748    0.437492  0.130832   0.598398
 1.19961    0.990261   0.606744  0.182484   0.379645
 0.872094   1.51788    1.02277   0.361231   0.351282
 1.09984    0.882437   1.24612   1.09003    1.84935
 1.89571    1.76334    0.947694  0.214953   0.52939
 0.863355   2.0626     0.929085  0.108399   0.0862806
 0.579266   1.34132    0.594082  0.067971   0.06424
 0.572596   1.18126    0.500099  0.0567535  0.0861455
 0.797053   1.12809    0.431254  0.0592198  0.199185
 1.80263    1.01132    0.361828  0.100065   0.593941
 ⋮                                          
 0.394075   0.407739   1.12471   1.14234    1.12391
 0.593289   0.844106   1.91973   1.36468    0.69099
 0.180763   0.22216    1.87082   4.14129    1.74644
 0.0899516  0.0892783  0.720805  1.60438    0.716109
 0.130134   0.083193   0.548017  

In [16]:
@time T2, emission2, pi_i2 = MDToolbox.msmbaumwelch(observations, T0, pi_i, emission);

100 iteration LogLikelihood = -1.289679e+04  delta = 4.180257e-03  tolerance = 1.000000e-04
 30.629224 seconds (217.09 M allocations: 9.406 GiB, 5.23% gc time)


In [17]:
T2

5×5 Array{Float64,2}:
 0.57565   0.218491  0.0       0.0       0.205859
 0.214973  0.589732  0.195295  0.0       0.0
 0.0       0.251435  0.523448  0.225118  0.0
 0.0       0.0       0.203951  0.609401  0.186648
 0.203023  0.0       0.0       0.209971  0.587005

In [18]:
logL, alpha_list, factor_list = MDToolbox.msmforward(observations, T, pi_i, emission);

In [19]:
alpha_list[1]

1000×5 Array{Float64,2}:
 0.04        0.04        0.2        0.56        0.16
 0.0829016   0.326425    0.388601   0.132124    0.0699482
 0.0799037   0.628961    0.251504   0.0264741   0.0131568
 0.0837384   0.73727     0.167361   0.00816853  0.00346244
 0.458717    0.325414    0.206037   0.006452    0.00337893
 0.665118    0.182906    0.132356   0.00637476  0.0132444
 0.278991    0.599823    0.0933071  0.00524261  0.0226358
 0.108004    0.160719    0.327463   0.0389766   0.364838
 0.505575    0.156021    0.251234   0.0343979   0.052772
 0.231021    0.573972    0.158004   0.0136295   0.0233726
 0.127259    0.72863     0.13087    0.00547992  0.00776057
 0.100209    0.766603    0.126219   0.00347429  0.00349559
 0.0938241   0.77478     0.125825   0.00306994  0.00250108
 ⋮                                              
 0.129808    0.0513126   0.133044   0.405418    0.280417
 0.0231089   0.0133558   0.137128   0.208892    0.617515
 0.568672    0.0464796   0.183789   0.0801322   0.120927
 0.

In [20]:
logL2, beta_list = MDToolbox.msmbackward(observations, factor_list, T, pi_i, emission);

In [21]:
beta_list[1]

1000×5 Array{Float64,2}:
 2.35942    5.47887    2.43618   0.281465   0.260082
 0.882457   1.83633    0.787575  0.0922471  0.131199
 0.877301   1.268      0.507035  0.075454   0.217376
 1.80763    1.04748    0.437492  0.130832   0.598398
 1.19961    0.990261   0.606744  0.182484   0.379645
 0.872094   1.51788    1.02277   0.361231   0.351282
 1.09984    0.882437   1.24612   1.09003    1.84935
 1.89571    1.76334    0.947694  0.214953   0.52939
 0.863355   2.0626     0.929085  0.108399   0.0862806
 0.579266   1.34132    0.594082  0.067971   0.06424
 0.572596   1.18126    0.500099  0.0567535  0.0861455
 0.797053   1.12809    0.431254  0.0592198  0.199185
 1.80263    1.01132    0.361828  0.100065   0.593941
 ⋮                                          
 0.394075   0.407739   1.12471   1.14234    1.12391
 0.593289   0.844106   1.91973   1.36468    0.69099
 0.180763   0.22216    1.87082   4.14129    1.74644
 0.0899516  0.0892783  0.720805  1.60438    0.716109
 0.130134   0.083193   0.548017  

In [22]:
alpha_list[1] - alpha_list_seica[1]

1000×5 Array{Float64,2}:
  0.0           0.0           0.0           0.0           0.0
  0.0          -5.55112e-17   0.0           0.0           1.38778e-17
  2.77556e-17   0.0           1.11022e-16   6.93889e-18   5.20417e-18
  1.38778e-17   1.11022e-16   2.77556e-17   3.46945e-18   1.30104e-18
  0.0           0.0           0.0           0.0           4.33681e-19
 -2.22045e-16  -2.77556e-17   0.0          -8.67362e-19   0.0
  5.55112e-17   1.11022e-16   4.16334e-17   1.73472e-18  -3.46945e-18
 -2.77556e-17   0.0           5.55112e-17   0.0          -5.55112e-17
 -1.11022e-16   0.0           0.0           0.0          -1.38778e-17
 -1.11022e-16   0.0           0.0          -3.46945e-18  -1.04083e-17
 -5.55112e-17   0.0           0.0          -8.67362e-19  -4.33681e-18
  0.0           0.0           0.0           0.0          -1.73472e-18
 -2.77556e-17   0.0           0.0          -8.67362e-19   0.0
  ⋮                                                      
 -2.77556e-17  -2.08167e-17   0

In [23]:
beta_list[1] - beta_list_seica[1]

1000×5 Array{Float64,2}:
  3.10862e-15   6.21725e-15   3.10862e-15   3.33067e-16   2.77556e-16
  9.99201e-16   2.44249e-15   9.99201e-16   8.32667e-17   1.38778e-16
  7.77156e-16   1.11022e-15   3.33067e-16   6.93889e-17   1.94289e-16
  1.77636e-15   1.11022e-15   3.88578e-16   1.38778e-16   7.77156e-16
  1.33227e-15   7.77156e-16   5.55112e-16   1.66533e-16   3.33067e-16
  9.99201e-16   1.33227e-15   1.11022e-15   3.33067e-16   3.88578e-16
  8.88178e-16   6.66134e-16   4.44089e-16   6.66134e-16   1.11022e-15
  1.55431e-15   1.11022e-15   5.55112e-16   1.66533e-16   4.44089e-16
  7.77156e-16   1.77636e-15   6.66134e-16   9.71445e-17   5.55112e-17
  3.33067e-16   1.11022e-15   4.44089e-16   6.93889e-17   4.16334e-17
  4.44089e-16   6.66134e-16   4.44089e-16   4.16334e-17   4.16334e-17
  4.44089e-16   8.88178e-16   2.77556e-16   5.55112e-17   1.38778e-16
  1.33227e-15   8.88178e-16   3.88578e-16   6.93889e-17   4.44089e-16
  ⋮                                                      
 -4.440

# visualization

In [24]:
using GraphRecipes, Plots
pyplot()

Plots.PyPlotBackend()

In [None]:
graphplot(T,
          markersize = 0.2,
          node_weights = pi_i,
          markercolor = :white,
          names = 1:size(T, 1),
          fontsize = 10,
          linecolor = :darkgrey,
          nodeshape = :circle,
          edgewidth = T, 
          self_edge_size = 0.0, 
          arrow = true
          )

In [None]:
graphplot(T1,
          markersize = 0.2,
          node_weights = pi_i,
          markercolor = :white,
          names = 1:size(T, 1),
          fontsize = 10,
          linecolor = :darkgrey,
          nodeshape = :circle,
          edgewidth = T,
          arrow = true
          )