In [2]:
using DataFrames, FredData, XLSX, SHA, Dates, CSV, TimeSeries, StatsBase, HiddenMarkovModels, Distributions, Plots, PrettyTables

In [3]:
function guess_init(n_scenarios)
    regimes_probs = rand(1:100,n_scenarios)
    return regimes_probs/sum(regimes_probs)
end

function guess_tmatrix(n_scenarios)
    regimes_probs = rand(1:100,n_scenarios,n_scenarios)
    return regimes_probs ./ sum(regimes_probs,dims=2)
end

function guess_distribution()
    sigma = rand(Uniform(0,0.25))
    means = rand(Uniform(-0.25,0.25))
    return Normal(means,sigma)
end

function train_hmm(data, n_scenarios = 2)

    function guess_distribution()
        sigma = rand(Uniform(0,0.25))
        means = rand(Uniform(-0.25,0.25))
        return Normal(means,sigma)
    end
    
    init_guess = guess_init(n_scenarios)
    guess_matrix = guess_tmatrix(n_scenarios)
    guess_dist = [guess_distribution() for s in 1:n_scenarios]
    hmm_guess = HMM(init_guess, guess_matrix, guess_dist);
    return hmm_est, loglikelihood_evolution = baum_welch(hmm_guess, data);
end 

function regime_summary(loglikehood,hmm_est, freq = 1)
    tm = transition_matrix(hmm_est)
    
return DataFrame(Dict(
  "mu1" => mean(obs_distributions(hmm_est)[1]) * freq,
  "sig1" => std(obs_distributions(hmm_est)[1]) *  freq^0.5,
  "mu2" => mean(obs_distributions(hmm_est)[2]) * freq,
  "sig2" => std(obs_distributions(hmm_est)[2]) * freq^0.5,       
  "p11" =>  tm[1,1],
  "p21" =>  tm[2,1],
  "loglikehood" => last(loglikehood),
  "prob1" =>  tm[2,1]/(tm[1,2]+tm[2,1])
  
)) 
end

regime_summary (generic function with 2 methods)

In [4]:
cpius_source = TimeArray(CSV.File("cpius.csv", delim=';', dateformat="yyyy-mm-dd", decimal=','),timestamp=:Date)
cpi_us = cpius_source ./ 100


830×1 TimeArray{Float64, 1, Date, Vector{Float64}} 1955-02-01 to 2024-03-01
┌────────────┬─────────────────┐
│[1m            [0m│[1m CPALTT01USM657N [0m│
├────────────┼─────────────────┤
│ 1955-02-01 │             0.0 │
│ 1955-03-01 │             0.0 │
│ 1955-04-01 │             0.0 │
│ 1955-05-01 │             0.0 │
│ 1955-06-01 │             0.0 │
│ 1955-07-01 │      0.00374532 │
│ 1955-08-01 │             0.0 │
│ 1955-09-01 │      0.00373134 │
│     ⋮      │        ⋮        │
│ 2023-09-01 │      0.00248513 │
│ 2023-10-01 │     -0.00038338 │
│ 2023-11-01 │     -0.00201514 │
│ 2023-12-01 │     -0.00099332 │
│ 2024-01-01 │       0.0054475 │
│ 2024-02-01 │      0.00618967 │
│ 2024-03-01 │      0.00646417 │
└────────────┴─────────────────┘
[36m                815 rows omitted[0m

In [5]:
describe(values(cpi_us))

Summary Stats:
Length:         830
Missing Count:  0
Mean:           0.002974
Std. Deviation: 0.003559
Minimum:        -0.019153
1st Quartile:   0.000344
Median:         0.002906
3rd Quartile:   0.004985
Maximum:        0.018059
Type:           Float64


In [6]:
cpi_us_1983 = from(cpi_us,Date(1983,1,1))

495×1 TimeArray{Float64, 1, Date, Vector{Float64}} 1983-01-01 to 2024-03-01
┌────────────┬─────────────────┐
│[1m            [0m│[1m CPALTT01USM657N [0m│
├────────────┼─────────────────┤
│ 1983-01-01 │      0.00204918 │
│ 1983-02-01 │      0.00102249 │
│ 1983-03-01 │             0.0 │
│ 1983-04-01 │      0.00715015 │
│ 1983-05-01 │      0.00608519 │
│ 1983-06-01 │      0.00302419 │
│ 1983-07-01 │       0.0040201 │
│ 1983-08-01 │        0.003003 │
│     ⋮      │        ⋮        │
│ 2023-09-01 │      0.00248513 │
│ 2023-10-01 │     -0.00038338 │
│ 2023-11-01 │     -0.00201514 │
│ 2023-12-01 │     -0.00099332 │
│ 2024-01-01 │       0.0054475 │
│ 2024-02-01 │      0.00618967 │
│ 2024-03-01 │      0.00646417 │
└────────────┴─────────────────┘
[36m                480 rows omitted[0m

In [7]:
describe(values(cpi_us_1983))

Summary Stats:
Length:         495
Missing Count:  0
Mean:           0.002358
Std. Deviation: 0.003261
Minimum:        -0.019153
1st Quartile:   0.000610
Median:         0.002404
3rd Quartile:   0.004327
Maximum:        0.013736
Type:           Float64


In [27]:
n_regimes = 3
hmm_est, loglikehood = train_hmm(values(cpi_us_1983),n_regimes)
println(last(loglikehood))
hmm_est

2199.3702415457697


Hidden Markov Model with:
 - initialization: [1.0, 1.512262037478013e-72, 3.4493781766279777e-59]
 - transition matrix: [0.8590144104972102 0.0795824859574773 0.06140310354531249; 0.26558993839865985 0.7012705337388168 0.033139527862523485; 1.7907258377002065e-8 0.24068609278410485 0.7593138893086366]
 - observation distributions: [Normal{Float64}(μ=0.001665537926398226, σ=0.0017637878340031588), Normal{Float64}(μ=0.004949659096574292, σ=0.001998092874921631), Normal{Float64}(μ=0.00034257797888757527, σ=0.005286307294722554)]

In [28]:
dists = obs_distributions(hmm_est)
for r in 1:n_regimes
    println("regime $r, mean: $(mean(dists[r])*12), std: $(std(dists[r])*12^0.5))")
end


regime 1, mean: 0.019986455116778712, std: 0.006109940284530664)
regime 2, mean: 0.05939590915889151, std: 0.006921596755211262)
regime 3, mean: 0.004110935746650903, std: 0.018312305637762892)
