In [3]:
import pandas as pd
import numpy as np
from pandas_datareader import data as pdr
from ta import add_all_ta_features
from pyhhmm.gaussian import GaussianHMM
import yfinance as yf
yf.pdr_override()

# Statistics
from statsmodels.tsa.stattools import adfuller

#Data Preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

#Supervised Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RepeatedKFold

#Reporting 
import matplotlib.pyplot as plt

# Data Management

In [17]:
# Data Extraction
start_date = '2017-01-01';
end_date = '2024-04-12';

data = pdr.DataReader('BTC-USD', start=start_date, end=end_date)
data = data[['Open', 'High', 'Low', 'Adj Close', 'Volume']]
data

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-01,963.658020,1003.080017,958.698975,998.325012,147775008
2017-01-02,998.617004,1031.390015,996.702026,1021.750000,222184992
2017-01-03,1021.599976,1044.079956,1021.599976,1043.839966,185168000
2017-01-04,1044.400024,1159.420044,1044.400024,1154.729980,344945984
2017-01-05,1156.729980,1191.099976,910.416992,1013.380005,510199008
...,...,...,...,...,...
2024-04-07,68897.109375,70284.429688,68851.632812,69362.554688,21204930369
2024-04-08,69362.554688,72715.359375,69064.242188,71631.359375,37261432669
2024-04-09,71632.500000,71742.507812,68212.921875,69139.015625,36426900409
2024-04-10,69140.242188,71093.429688,67503.562500,70587.882812,38318601774


In [18]:
## Add Returns And Range
df = data.copy();

df['Returns'] = (df['Adj Close'] / df['Adj Close'].shift(1)) - 1;
df['Range'] = (df['High'] / df['Low']) - 1;
df.dropna(inplace=True);

In [14]:
# Structure Data
X_train = df[['Returns', 'Range']]
X_train

Unnamed: 0_level_0,Returns,Range
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-02,0.023464,0.034803
2017-01-03,0.021620,0.022005
2017-01-04,0.106233,0.110130
2017-01-05,-0.122410,0.308302
2017-01-06,-0.109711,0.184249
...,...,...
2024-04-07,0.006770,0.020810
2024-04-08,0.032709,0.052866
2024-04-09,-0.034794,0.051744
2024-04-10,0.020956,0.053180


### HMM Learning 

In [21]:
model = GaussianHMM(n_states = 4, covariance_type='full', n_emissions=2) 
model.train([np.array(X_train.values)]);
model

<pyhhmm.gaussian.GaussianHMM at 0x14f20b580>

In [32]:
#Check Results
hidden_states = model.predict([X_train.values])[0]
hidden_states[:40]

array([0, 0, 2, 1, 1, 1, 2, 3, 0, 1, 2, 2, 3, 0, 0, 2, 3, 0, 0, 0, 3, 0,
       3, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 0, 3, 0, 0, 3, 1, 2], dtype=int32)

In [33]:
# Regime state means for each feature
model.means

array([[ 0.01353414,  0.03064206],
       [-0.04158962,  0.12337256],
       [ 0.04714478,  0.08084809],
       [-0.01574903,  0.03803952]])

In [28]:
# Regime state convars for each feature
model.covars

array([[[ 0.00028518,  0.00022079],
        [ 0.00022079,  0.00025473]],

       [[ 0.00315604, -0.00164093],
        [-0.00164093,  0.00486344]],

       [[ 0.00189593,  0.00143039],
        [ 0.00143039,  0.00145052]],

       [[ 0.00037986, -0.00029069],
        [-0.00029069,  0.00042544]]])

# Data Visualization

In [36]:
# Structure the prices for plotting
i = 0;

label_0 = []
label_1 = []
label_2 = []
label_3 = []
prices = df['Adj Close'].values.astype(float);

print(len(prices) == len(hidden_states))

for s in hidden_states:
    if s == 0:
        label_0.append(prices[i]);
        label_1.append('nan');
        label_2.append('nan');
        label_3.append('nan');
    if s == 1:
        label_0.append('nan');
        label_1.append(prices[i]);
        label_2.append('nan');
        label_3.append('nan');
    if s == 2:
        label_0.append('nan');
        label_1.append('nan');
        label_2.append(prices[i]);
        label_3.append('nan');
    if s == 3:
        label_0.append('nan');
        label_1.append('nan');
        label_2.append('nan');
        label_3.append(prices[i]);
    i += 1;


True
