In [2]:
using DataFrames, FredData, XLSX, SHA, Dates, CSV, TimeSeries, StatsBase, HiddenMarkovModels, Distributions, Plots, PrettyTables

In [12]:
function guess_init(n_scenarios)
    regimes_probs = rand(1:100,n_scenarios)
    return regimes_probs/sum(regimes_probs)
end

function guess_tmatrix(n_scenarios)
    regimes_probs = rand(1:100,n_scenarios,n_scenarios)
    return regimes_probs ./ sum(regimes_probs,dims=2)
end

function guess_distribution()
    sigma = rand(Uniform(0,0.25))
    means = rand(Uniform(-0.25,0.25))
    return Normal(means,sigma)
end

function train_hmm(data, n_scenarios = 2)

    function guess_distribution()
        sigma = rand(Uniform(0,0.25))
        means = rand(Uniform(-0.25,0.25))
        return Normal(means,sigma)
    end
    
    init_guess = guess_init(n_scenarios)
    guess_matrix = guess_tmatrix(n_scenarios)
    guess_dist = [guess_distribution() for s in 1:n_scenarios]
    hmm_guess = HMM(init_guess, guess_matrix, guess_dist);
    return hmm_est, loglikelihood_evolution = baum_welch(hmm_guess, data);
end 

function regime_summary(loglikehood,hmm_est, freq = 1)
    tm = transition_matrix(hmm_est)
    
return DataFrame(Dict(
  "mu1" => mean(obs_distributions(hmm_est)[1]) * freq,
  "sig1" => std(obs_distributions(hmm_est)[1]) *  freq^0.5,
  "mu2" => mean(obs_distributions(hmm_est)[2]) * freq,
  "sig2" => std(obs_distributions(hmm_est)[2]) * freq^0.5,       
  "p11" =>  tm[1,1],
  "p21" =>  tm[2,1],
  "loglikehood" => last(loglikehood),
  "prob1" =>  tm[2,1]/(tm[1,2]+tm[2,1])
  
)) 
end

regime_summary (generic function with 2 methods)

In [7]:
cpius_source = TimeArray(CSV.File("cpius.csv", delim=';', dateformat="yyyy-mm-dd", decimal=','),timestamp=:Date)
cpi_us = cpius_source ./ 100


830×1 TimeArray{Float64, 1, Date, Vector{Float64}} 1955-02-01 to 2024-03-01
┌────────────┬─────────────────┐
│[1m            [0m│[1m CPALTT01USM657N [0m│
├────────────┼─────────────────┤
│ 1955-02-01 │             0.0 │
│ 1955-03-01 │             0.0 │
│ 1955-04-01 │             0.0 │
│ 1955-05-01 │             0.0 │
│ 1955-06-01 │             0.0 │
│ 1955-07-01 │      0.00374532 │
│ 1955-08-01 │             0.0 │
│ 1955-09-01 │      0.00373134 │
│     ⋮      │        ⋮        │
│ 2023-09-01 │      0.00248513 │
│ 2023-10-01 │     -0.00038338 │
│ 2023-11-01 │     -0.00201514 │
│ 2023-12-01 │     -0.00099332 │
│ 2024-01-01 │       0.0054475 │
│ 2024-02-01 │      0.00618967 │
│ 2024-03-01 │      0.00646417 │
└────────────┴─────────────────┘
[36m                815 rows omitted[0m

In [9]:
describe(values(cpi_us))

Summary Stats:
Length:         830
Missing Count:  0
Mean:           0.002974
Std. Deviation: 0.003559
Minimum:        -0.019153
1st Quartile:   0.000344
Median:         0.002906
3rd Quartile:   0.004985
Maximum:        0.018059
Type:           Float64


In [10]:
cpi_us_1983 = from(cpi_us,Date(1983,1,1))

495×1 TimeArray{Float64, 1, Date, Vector{Float64}} 1983-01-01 to 2024-03-01
┌────────────┬─────────────────┐
│[1m            [0m│[1m CPALTT01USM657N [0m│
├────────────┼─────────────────┤
│ 1983-01-01 │      0.00204918 │
│ 1983-02-01 │      0.00102249 │
│ 1983-03-01 │             0.0 │
│ 1983-04-01 │      0.00715015 │
│ 1983-05-01 │      0.00608519 │
│ 1983-06-01 │      0.00302419 │
│ 1983-07-01 │       0.0040201 │
│ 1983-08-01 │        0.003003 │
│     ⋮      │        ⋮        │
│ 2023-09-01 │      0.00248513 │
│ 2023-10-01 │     -0.00038338 │
│ 2023-11-01 │     -0.00201514 │
│ 2023-12-01 │     -0.00099332 │
│ 2024-01-01 │       0.0054475 │
│ 2024-02-01 │      0.00618967 │
│ 2024-03-01 │      0.00646417 │
└────────────┴─────────────────┘
[36m                480 rows omitted[0m

In [11]:
describe(values(cpi_us_1983))

Summary Stats:
Length:         495
Missing Count:  0
Mean:           0.002358
Std. Deviation: 0.003261
Minimum:        -0.019153
1st Quartile:   0.000610
Median:         0.002404
3rd Quartile:   0.004327
Maximum:        0.013736
Type:           Float64


In [45]:
n_regimes = 6
hmm_est, loglikehood = train_hmm(values(cpi_us_1983),n_regimes)
println(last(loglikehood))
hmm_est

2235.3922836529314


Hidden Markov Model with:
 - initialization: [3.3882376918598465e-30, 0.9999999999999895, 4.703595486085791e-111, 1.057767403460179e-14, 4.640738136566136e-73, 0.0]
 - transition matrix: [0.6796174880886128 0.24209245223256715 1.4759752938365558e-5 5.3728702436623006e-5 0.07822134766193298 2.2356151211005015e-7; 0.0005364151380971921 0.003373768919871589 0.18631265777545689 0.8097771581532875 1.3286722602054602e-11 3.770001121410043e-30; 0.12553834302777137 4.82232457505938e-13 0.7139343778857989 2.5289051635250463e-10 0.00016014895632039582 0.16036712987673654; 0.39828069184627646 0.03981987329237119 0.025905466389236083 0.5359937332412739 2.352303974771947e-7 4.451121385624436e-13; 0.057870040149251775 0.3168158733482657 0.028518494425442242 0.002925878862527726 0.5938696131269512 1.0008756138214143e-7; 0.286225091148384 1.4364979929565913e-7 2.0100676502522134e-19 0.09083380973520677 0.6229409554666097 1.468575303828437e-16]
 - observation distributions: [Normal{Float64}(μ=0.0036444

In [46]:
dists = obs_distributions(hmm_est)
for r in 1:n_regimes
    println("regime $r, mean: $(mean(dists[r])*12), std: $(std(dists[r])*12^0.5))")
end


regime 1, mean: 0.04373370942651327, std: 0.004886654175981257)
regime 2, mean: 0.015592865194794737, std: 0.005460550714464175)
regime 3, mean: -0.017115703132583204, std: 0.01576423578838088)
regime 4, mean: 0.005908055469741896, std: 0.004633919930662512)
regime 5, mean: 0.08754183617946756, std: 0.008435193677093016)
regime 6, mean: 0.06630174123862179, std: 0.00038166269875875464)
