Importing required libraries


In [None]:
!pip install hmmlearn


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import numpy as np
import pandas as pd
from hmmlearn.hmm import GaussianHMM
import plotly.graph_objects as go
from plotly.graph_objs.scatter.marker import Line
from plotly.subplots import make_subplots
import plotly.express as px
import math

import warnings

warnings.filterwarnings('ignore')



Uploading Dataset


In [None]:
df1 = pd.read_csv('/content/drive/MyDrive/Capstone/Data/^IXIC.csv')
df1.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2000-01-03,4186.189941,4192.189941,3989.709961,4131.149902,4131.149902,1510070000
1,2000-01-04,4020.0,4073.25,3898.22998,3901.689941,3901.689941,1511840000
2,2000-01-05,3854.350098,3924.209961,3734.870117,3877.540039,3877.540039,1735670000
3,2000-01-06,3834.439941,3868.76001,3715.620117,3727.129883,3727.129883,1598320000
4,2000-01-07,3711.090088,3882.669922,3711.090088,3882.620117,3882.620117,1634930000


In [None]:
df1.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,5853.0,5853.0,5853.0,5853.0,5853.0,5853.0
mean,4696.518501,4730.535452,4658.097665,4696.294041,4696.294041,2324933000.0
std,3592.069429,3618.161581,3562.313319,3592.167671,3592.167671,1131048000.0
min,1116.76001,1135.890015,1108.48999,1114.109985,1114.109985,221430000.0
25%,2155.810059,2171.939941,2140.530029,2155.290039,2155.290039,1724370000.0
50%,2983.030029,2999.139893,2964.939941,2983.659912,2983.659912,1957720000.0
75%,6202.75,6217.339844,6149.870117,6193.299805,6193.299805,2324730000.0
max,16120.919922,16212.230469,16017.230469,16057.44043,16057.44043,11621190000.0


In [None]:
df1.dtypes

Date          object
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object

In [None]:
df1.isnull().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

No null values. 


Selecting only 'Date' and 'Adj Close' columns.

In [None]:
stocks = df1[['Date', 'Adj Close']]
stocks = stocks.set_index('Date')
stocks.head()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2000-01-03,4131.149902
2000-01-04,3901.689941
2000-01-05,3877.540039
2000-01-06,3727.129883
2000-01-07,3882.620117


In [None]:
trading_instrument = "Adj Close"

In [None]:
stocks.columns.name = trading_instrument 
px.line(stocks['Adj Close'])


We can see the 2007-2009 fianancial crisis, COVID crash in 2020, and the 2022 recession period. 

The function named prepare_data_for_model_input returns a dataframe with close price, moving average price, and its log returns.

In [None]:
def prepare_data_for_model_input(prices, ma):
    '''
        Input:
        prices (df) - Dataframe of close prices
        ma (int) - legth of the moveing average
        
        Output:
        prices(df) - An enhanced prices dataframe, with moving averages and log return columns
        prices_array(nd.array) - an array of log returns
    '''
    
    instrument = 'Adj Close' 
    stocks[f'{instrument}_ma'] = prices.rolling(ma).mean()
    stocks[f'{instrument}_log_return'] = np.log(prices[f'{instrument}_ma']/prices[f'{instrument}_ma'].shift(1)).dropna()
 
    prices.dropna(inplace = True)
    prices_array = np.array([[q] for q in stocks[f'{instrument}_log_return'].values])
    
    return prices, prices_array

In [None]:
stocks1, prices_array = prepare_data_for_model_input(stocks, 7)
stocks1.head()

Adj Close,Adj Close,Adj Close_ma,Adj Close_log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-12,3850.02002,3887.122838,-0.010279
2000-01-13,3957.209961,3895.054269,0.002038
2000-01-14,4064.27002,3921.729981,0.006825
2000-01-18,4130.810059,3979.398577,0.014598
2000-01-19,4151.290039,4017.779995,0.009599


In [None]:
prices_array

array([[-0.0102789 ],
       [ 0.00203836],
       [ 0.00682527],
       ...,
       [ 0.00360776],
       [ 0.00271207],
       [ 0.00440741]])

In [None]:
# Create a line graph of the Adj Close Log Return
fig = px.line(stocks, x=stocks.index, y='Adj Close_log_return', title='Adj Close Log Return vs. Date')

# Show the graph
fig.show()

No. of negative values in log returns column

In [None]:
class RegimeDetection:
  def get_regimes_hmm(self, input_data, params):
    hmm_model = self.initialise_model(GaussianHMM(), params).fit(input_data)
    return hmm_model

  def initialise_model(self, model, params):
    for parameter, value in params.items():
      setattr(model, parameter, value)
    return model


In [None]:
import datetime
def plot_hidden_states(hidden_states, prices_df):
    
    '''
    Input:
    hidden_states(numpy.ndarray) - array of predicted hidden states
    prices_df(df) - dataframe of close prices
    
    Output:
    Graph showing hidden states and prices
    
    '''
    
    colors = ['blue', 'green']
    n_components = len(np.unique(hidden_states))
    fig = go.Figure()



    for i in range(n_components):
        mask = hidden_states == i
        print('Number of observations for State ', i,":", len(prices_df.index[mask]))
        
        fig.add_trace(go.Scatter(x=prices_df.index[mask], y=prices_df[f"{prices_df.columns.name}"][mask],
                    mode='markers',  name='Hidden State ' + str(i), marker=dict(size=4,color=colors[i])))
        
    fig.update_layout(height=400, width=900,legend=dict(
            yanchor="top", y=0.99, xanchor="left",x=0.01), margin=dict(l=20, r=20, t=20, b=20)).show()

In [None]:
regime_detection = RegimeDetection()

In [None]:
params = {'n_components':2, 'covariance_type':"full", 'random_state':100}

 

hmm_model = regime_detection.get_regimes_hmm(prices_array, params)

hmm_states = hmm_model.predict(prices_array)

plot_hidden_states(np.array(hmm_states), stocks[[f'{trading_instrument}']])




Number of observations for State  0 : 1385
Number of observations for State  1 : 4461
