In [50]:
import pandas as pd
import numpy as np
from pandas_datareader import data as pdr
from ta import add_all_ta_features
from pyhhmm.gaussian import GaussianHMM
import yfinance as yf
yf.pdr_override()

# Statistics
from statsmodels.tsa.stattools import adfuller

#Data Preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

#Supervised Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RepeatedKFold

#Reporting 
import matplotlib.pyplot as plt

In [51]:
# Data Extraction
start_date = '2012-01-01';
end_date = '2024-04-12';

data = pdr.DataReader('BTC-USD', start=start_date, end=end_date)
data = data[['Open', 'High', 'Low', 'Adj Close', 'Volume']]

[*********************100%%**********************]  1 of 1 completed


In [52]:
## Add Returns And Range
df = data.copy();

df['Returns'] = (df['Adj Close'] / df['Adj Close'].shift(1)) - 1;
df['Range'] = (df['High'] / df['Low']) - 1;
df.dropna(inplace=True);
df

Unnamed: 0_level_0,Open,High,Low,Adj Close,Volume,Returns,Range
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200,-0.071926,0.105920
2014-09-19,424.102997,427.834991,384.532013,394.795990,37919700,-0.069843,0.112612
2014-09-20,394.673004,423.295990,389.882996,408.903992,36863600,0.035735,0.085700
2014-09-21,408.084991,412.425995,393.181000,398.821014,26580100,-0.024659,0.048947
2014-09-22,399.100006,406.915985,397.130005,402.152008,24127600,0.008352,0.024642
...,...,...,...,...,...,...,...
2024-04-07,68897.109375,70284.429688,68851.632812,69362.554688,21204930369,0.006770,0.020810
2024-04-08,69362.554688,72715.359375,69064.242188,71631.359375,37261432669,0.032709,0.052866
2024-04-09,71632.500000,71742.507812,68212.921875,69139.015625,36426900409,-0.034794,0.051744
2024-04-10,69140.242188,71093.429688,67503.562500,70587.882812,38318601774,0.020956,0.053180


In [53]:
# Add moving Average
df['MA_12'] = df['Adj Close'].rolling(window=12).mean();
df['MA_21'] = df['Adj Close'].rolling(window=21).mean();
print(len(df))
df.tail()

3494


Unnamed: 0_level_0,Open,High,Low,Adj Close,Volume,Returns,Range,MA_12,MA_21
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-04-07,68897.109375,70284.429688,68851.632812,69362.554688,21204930369,0.00677,0.02081,68900.59668,67842.636905
2024-04-08,69362.554688,72715.359375,69064.242188,71631.359375,37261432669,0.032709,0.052866,69081.931315,68037.054315
2024-04-09,71632.5,71742.507812,68212.921875,69139.015625,36426900409,-0.034794,0.051744,68948.10319,68381.161086
2024-04-10,69140.242188,71093.429688,67503.5625,70587.882812,38318601774,0.020956,0.05318,69006.024414,68508.504464
2024-04-11,70575.734375,71256.234375,69571.8125,70060.609375,30153382941,-0.00747,0.024211,69040.633138,68726.08631


In [54]:
# Structure Data
X_train = df[['Returns', 'Range']].iloc[0:];
X_test = df[['Returns', 'Range']].iloc[0:];
save_df = df.iloc[0:]

X_test

#print('Train Length ', len(X_train))
#print('Test Length ', len(X_train))

#print('X Train From: ', X_train.head(1).index.item())
#print('X Test From: ', X_test.head(1).index.item())

Unnamed: 0_level_0,Returns,Range
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2014-09-18,-0.071926,0.105920
2014-09-19,-0.069843,0.112612
2014-09-20,0.035735,0.085700
2014-09-21,-0.024659,0.048947
2014-09-22,0.008352,0.024642
...,...,...
2024-04-07,0.006770,0.020810
2024-04-08,0.032709,0.052866
2024-04-09,-0.034794,0.051744
2024-04-10,0.020956,0.053180


## Train HMM

In [55]:
model = GaussianHMM(n_states = 4, covariance_type='full', n_emissions=2) 
model.train([np.array(X_train.values)]);
model.predict([X_train.values])[0][:10]

array([3, 3, 2, 1, 0, 2, 1, 1, 1, 0], dtype=int32)

In [56]:
#Make Prediction on Test Data
df_main = save_df.copy();
df_main.drop(columns=['High', 'Low'], inplace=True);

hmm_results = model.predict([X_test.values])[0]
df_main['HMM'] = hmm_results;
df_main.tail()


Unnamed: 0_level_0,Open,Adj Close,Volume,Returns,Range,MA_12,MA_21,HMM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-04-07,68897.109375,69362.554688,21204930369,0.00677,0.02081,68900.59668,67842.636905,0
2024-04-08,69362.554688,71631.359375,37261432669,0.032709,0.052866,69081.931315,68037.054315,2
2024-04-09,71632.5,69139.015625,36426900409,-0.034794,0.051744,68948.10319,68381.161086,1
2024-04-10,69140.242188,70587.882812,38318601774,0.020956,0.05318,69006.024414,68508.504464,2
2024-04-11,70575.734375,70060.609375,30153382941,-0.00747,0.024211,69040.633138,68726.08631,0


# Run Backtest

In [57]:
# Add MA Signals
df_main.loc[df_main['MA_12'] > df_main['MA_21'], 'MA_Signal'] = 1;
df_main.loc[df_main['MA_12'] <= df_main['MA_21'], 'MA_Signal'] = 0;


In [58]:
# Add HMM Signal
favourable_state = [0, 1, 2, 3];
hmm_values = df_main['HMM'].values;
hmm_values = [1 if x in favourable_state else 0 for x in hmm_values]
df_main['HMM_Signal'] = hmm_values;
df_main.tail(10)

Unnamed: 0_level_0,Open,Adj Close,Volume,Returns,Range,MA_12,MA_21,HMM,MA_Signal,HMM_Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2024-04-02,69705.023438,65446.972656,50705240709,-0.061048,0.079301,68436.915365,68176.143973,1,1.0,1
2024-04-03,65446.671875,65980.8125,34488018367,0.008157,0.036469,68620.419596,67837.920759,2,1.0,1
2024-04-04,65975.695312,68508.84375,34439527442,0.038315,0.064156,68990.972982,67700.408854,2,1.0,1
2024-04-05,68515.757812,67837.640625,33748230056,-0.009797,0.041118,69041.262044,67625.831101,1,1.0,1
2024-04-06,67840.570312,68896.109375,19967785809,0.015603,0.031676,68952.703451,67796.354539,0,1.0,1
2024-04-07,68897.109375,69362.554688,21204930369,0.00677,0.02081,68900.59668,67842.636905,0,1.0,1
2024-04-08,69362.554688,71631.359375,37261432669,0.032709,0.052866,69081.931315,68037.054315,2,1.0,1
2024-04-09,71632.5,69139.015625,36426900409,-0.034794,0.051744,68948.10319,68381.161086,1,1.0,1
2024-04-10,69140.242188,70587.882812,38318601774,0.020956,0.05318,69006.024414,68508.504464,2,1.0,1
2024-04-11,70575.734375,70060.609375,30153382941,-0.00747,0.024211,69040.633138,68726.08631,0,1.0,1


In [59]:
# Add combined Signal
df_main['Main_Sinal'] = 0;
df_main.loc[(df_main['MA_Signal'] == 1) & (df_main['HMM_Signal'] == 1), 'Main_Sinal'] = 1;
df_main['Main_Sinal'] = df_main['Main_Sinal'].shift(1); # dem ket qua nay cho row tiep theo


In [None]:
# BenchMark Returns
df_main['lrets_bench'] = np.log(df_main['Adj Close'] / df_main['Adj Close'].shift(1))
df_main['bench_prod'] = df_main['lrets_bench'].cumsum();
df_main['bench_prod_exp'] = np.exp(df_main['bench_prod']) - 1;

In [None]:
# Strategy Returns
df_main['lrets_strat'] = np.log(df_main['Open'].shift(-1) / df_main['Open']) * df_main['Main_Signal']
df_main['lrets_prod'] = df_main['lrets_strat'].cumsum();
df_main['strat_prod_exp'] = np.exp(df_main['lrets_prod']) - 1;