In [1]:
import numpy as np
import pandas as pd
from hmmlearn.hmm import GaussianHMM
import plotly.graph_objects as go
from plotly.graph_objs.scatter.marker import Line
from plotly.subplots import make_subplots
import plotly.express as px
from sklearn.mixture import GaussianMixture
import math
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from hmmlearn.hmm import GaussianHMM

Data Ingestion

In [2]:
data = pd.read_excel('5years_database.xlsx', sheet_name='Data with Vix')
data.head(5)

Unnamed: 0,Date,Sp-close,Volume,Vix-close
0,02/22/2023,3991.05,4079320000,22.29
1,02/21/2023,3997.34,4121590000,22.87
2,02/17/2023,4079.09,4045480000,20.02
3,02/16/2023,4090.41,4143660000,20.17
4,02/15/2023,4147.6,4075980000,18.23


In [3]:
data['Sp-close'] = pd.to_numeric(data['Sp-close'])

In [4]:
def prepare_data_for_model_input(data, ma):
    
    data['sp_close_ma'] = data['Sp-close'].rolling(ma).mean()
    data['sp_log_return'] = np.log(data['Sp-close']/data['Sp-close'].shift(1)).dropna()
    data['sp_volume_norm'] = (data['Volume'] - data['Volume'].mean()) / data['Volume'].std()

    data.dropna(inplace=True)
    data.reset_index(inplace=True)
    data['Date'] = pd.to_datetime(data['Date'], format='%m/%d/%Y') # convert 'Date' column to datetime
    data.set_index('Date', inplace=True)  # Set the 'Date' column as the index

    data_array = np.array(data[['sp_log_return', 'sp_volume_norm']])

    
    return data, data_array

In [5]:
data, data_array = prepare_data_for_model_input(data, 7)

In [6]:
class RegimeDetection:

 def get_regimes_gmm(self, input_data, params):
       gmm = self.initialise_model(GaussianMixture(), params).fit(input_data)
       return gmm       
 def initialise_model(self, model, params):
      for parameter, value in params.items():
         setattr(model, parameter, value)
      return model

In [7]:
def plot_hidden_states(hidden_states, data_df):
    '''
    Input:
    hidden_states(numpy.ndarray) - array of predicted hidden states
    data_df(df) - dataframe of market data

    Output:
    Graph showing hidden states and market data
    '''

    colors = ['green','blue']
    n_components = len(np.unique(hidden_states))
    fig = go.Figure()
    for i in range(n_components):
        mask = hidden_states == i
        print('Number of observations for State ', i, ":", len(data_df.index[mask]))

        x_axis = [dt.strftime('%Y-%m-%d') for dt in data_df.index[mask].tolist()]

        fig.add_trace(go.Scatter(x=x_axis, y=data_df['Sp-close'][mask],
                    mode='markers', name='Hidden State ' + str(i), marker=dict(size=4, color=colors[i%2])))

    fig.update_layout(height=400, width=900, legend=dict(
            yanchor="top", y=0.99, xanchor="left", x=0.01), margin=dict(l=20, r=20, t=20, b=20)).show()


In [8]:
rd = RegimeDetection()

In [9]:
params = {'n_components':2, 'covariance_type': 'full', 'max_iter': 100000, 'n_init': 30,'init_params': 'kmeans', 'random_state':100}
gmm_model = rd.get_regimes_gmm(data_array, params)
gmm_states = gmm_model.predict(data_array)
plot_hidden_states(np.array(gmm_states), data[['Sp-close']])

Number of observations for State  0 : 989
Number of observations for State  1 : 173
