In [None]:
import numpy as np


def coin_mle(data):
    """
    Returns the learned probability of getting a heads using MLE.
    
    Parameters
    ----------
    data: list, array-like
        The list of observations. 1 for heads and 0 for tails.
    
    Returns
    -------
    theta: The learned probability of getting a heads.
    """
    data = np.array(data)
    n_heads = np.sum(data)

    return n_heads / data.size

In [None]:
coin_mle([1, 1, 1, 0, 0])

0.6

In [None]:
coin_mle([1, 1, 1, 0, 0, 0])

0.5

### MLE for normal distributions

The normal distribution is parametrized by its mean and standard deviation and the distribution is given as follows: $P(x|\mu,σ) = \frac{1}{\sqrt{2\pi σ^2}} e^{\frac{(x-\mu)^2}{2σ^2}} $

The likelihood is the probability of the data being observed, given the parameters. So, given the observed data, we can state the likelihood function as follows: $L(\mu, \sigma^2) = P(X|\mu,σ^2) = Pr(x_1, ..., x_N|\mu, σ^2) = ∏^N_{n=1}P(X|\mu,σ^2) = ∏^N_{n=1} \frac{1}{\sqrt{2\pi σ^2}} e^{\frac{(x-\mu)^2}{2σ^2}}   $

In [None]:
import numpy as np

def gaussian_mle(data):
    """
    Returns the learned parameters of the Normal Distribution using MLE.
    Parameters
    ----------
    data: list, array-like
        The list of observed variables.
    Returns
    -------
    \mu: The learned mean of the Normal Distribution.
    \sigma: The learned standard deviation of the Normal Distribution.
    """
    data = np.array(data)
    mu = np.mean(data)
    variance = np.sqrt(np.mean((data - mu)**2))
    return mu, variance

In [None]:
from numpy.random import normal

In [None]:
data = normal(loc=1, scale=2, size=10)

In [None]:
data

array([-0.52667196,  0.02500209,  2.09007582, -1.06386571,  1.91294937,
        3.02462072,  0.64428612,  3.66153366,  0.27519472,  0.90412091])

In [None]:
mu, sigma = gaussian_mle(data)

In [None]:
mu

1.0947245748685188

In [None]:
sigma

1.4600281693618726

In [None]:
data = normal(loc=1, scale=2, size=1000)
data[:10]

array([ 4.14905268,  1.09361053,  2.66125811,  3.00395662, -2.85138656,
       -0.62216494,  2.90116423,  2.44018189, -1.83470295, -1.1054538 ])

In [None]:
mu, sigma = gaussian_mle(data)

In [None]:
mu

0.95993558419226

In [None]:
sigma

2.056662081155069

### The Baum-Welch algorithm (expectation maximization)

The expectation maximization  (EM) algorithm (known as Baum-Welch  when applied to HMMs) is an iterative method used  to find the maximum likelihood  or maximum a posteriori  (MAP) estimates of parameters  in statistical models, where the model depends on unobserved  latent variables. The EM iteration alternates between performing an expectation  (E) step, which creates a function for the expectation of the log-likelihood evaluated using the current estimate for the parameters, and a maximization (M) step, which computes parameters maximizing the expected log-likelihood found on the E  step. These parameter estimates are then used to determine the distribution of the latent variables in the next E  step.

The EM algorithm starts with initial value of parameters $(θ^{
old})$. In the $E$ step, we take these
parameters and find the posterior distribution of latent variables $P(Z|X,θ^{
old})$. We then use
this posterior distribution to evaluate the expectation of the logarithm of the complete data
likelihood function, as a function of the parameters $θ$, to give the function $Q(θ,θ^{
old})$, defined
by the following: $Q(θ,θ^{
old})$ =$ \sum_z Pr(Z|X,θ^{old} lnPr(X,Z|\theta)$

In [None]:
!pip install hmmlearn

In [None]:
!pip install yfinance

In [None]:
import pandas as pd
import numpy as np
import scipy as sp 
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import yfinance as yf
from hmmlearn.hmm import GaussianHMM

In [None]:
today = datetime.today()
days = timedelta(days=500)
start = today-days

In [None]:
print('start:', start, '\t', 'end:', today)

start: 2020-10-26 01:37:14.461805 	 end: 2022-03-10 01:37:14.461805


In [None]:
stk = 'EBAY'
stk = yf.download(stk, start, today)
cl = stk['Adj Close']
vl = stk['Volume'][1:]

[*********************100%***********************]  1 of 1 completed


In [None]:
ret = cl.pct_change(1)
ret.dropna(inplace=True)

In [None]:
X = np.column_stack([ret, vl])

In [None]:
# Make an HMM instance and execute fit
model = GaussianHMM(n_components=4, covariance_type="diag", n_iter=1000).fit(X)

In [None]:
# Predict the optimal sequence of internal hidden state
hidden_states = model.predict(X)

In [None]:
print("Transition matrix")
print(model.transmat_)

Transition matrix
[[7.91660071e-01 3.98961713e-09 2.11706958e-18 2.08339925e-01]
 [8.54451453e-08 5.26792194e-01 4.57478207e-07 4.73207263e-01]
 [3.27473842e-58 8.15344173e-01 2.11417378e-11 1.84655827e-01]
 [2.93291055e-01 9.46583124e-04 8.52818706e-02 6.20480491e-01]]


In [None]:
print("Means and vars of each hidden state")
for i in range(model.n_components):
    print("{0}th hidden state".format(i))
    print("mean = ", model.means_[i])
    print("var = ", np.diag(model.covars_[i]))
    print()

Means and vars of each hidden state
0th hidden state
mean =  [-1.39608837e-05  5.36828428e+06]
var =  [2.59800714e-04 1.10703292e+12]

1th hidden state
mean =  [7.62206256e-03 1.21132237e+07]
var =  [1.21230391e-03 4.64908420e+12]

2th hidden state
mean =  [-2.14156781e-02  2.17175937e+07]
var =  [3.58897684e-03 2.21903812e+13]

3th hidden state
mean =  [1.37302452e-03 8.74210263e+06]
var =  [5.94983006e-04 2.65711640e+12]

