Importing required libraries


In [None]:
!pip install hmmlearn


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting hmmlearn
  Downloading hmmlearn-0.2.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl (217 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m217.2/217.2 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: hmmlearn
Successfully installed hmmlearn-0.2.8


In [None]:
import numpy as np
import pandas as pd
from hmmlearn.hmm import GaussianHMM
import plotly.graph_objects as go
from plotly.graph_objs.scatter.marker import Line
from plotly.subplots import make_subplots
import plotly.express as px
import math

import warnings

warnings.filterwarnings('ignore')



Uploading Dataset


In [None]:
df1 = pd.read_csv('/content/drive/MyDrive/Capstone/Data/^GSPC.csv')
df1.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2000-01-03,1469.25,1478.0,1438.359985,1455.219971,1455.219971,931800000
1,2000-01-04,1455.219971,1455.219971,1397.430054,1399.420044,1399.420044,1009000000
2,2000-01-05,1399.420044,1413.27002,1377.680054,1402.109985,1402.109985,1085500000
3,2000-01-06,1402.109985,1411.900024,1392.099976,1403.449951,1403.449951,1092300000
4,2000-01-07,1403.449951,1441.469971,1400.72998,1441.469971,1441.469971,1225200000


In [None]:
df1.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,5852.0,5852.0,5852.0,5852.0,5852.0,5852.0
mean,1897.236028,1908.609778,1884.928111,1897.458841,1897.458841,3310019000.0
std,985.705448,990.934957,980.049559,985.847329,985.847329,1524686000.0
min,679.280029,695.27002,666.789978,676.530029,676.530029,356070000.0
25%,1185.672485,1192.485016,1178.11499,1185.63501,1185.63501,1996340000.0
50%,1437.960022,1448.01001,1429.434998,1438.169983,1438.169983,3418445000.0
75%,2424.232422,2432.17749,2407.572509,2419.4599,2419.4599,4166290000.0
max,4804.509766,4818.620117,4780.040039,4796.560059,4796.560059,11456230000.0


In [None]:
df1.dtypes

Date          object
Open         float64
High         float64
Low          float64
Close        float64
Adj Close    float64
Volume         int64
dtype: object

In [None]:
df1.isnull().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

No null values. 


Selecting only 'Date' and 'Adj Close' columns.

In [None]:
stocks = df1[['Date', 'Adj Close']]
stocks = stocks.set_index('Date')
stocks.head()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2000-01-03,1455.219971
2000-01-04,1399.420044
2000-01-05,1402.109985
2000-01-06,1403.449951
2000-01-07,1441.469971


In [None]:
trading_instrument = "Adj Close"

In [None]:
df2 = pd.read_csv('/content/drive/MyDrive/Capstone/Data/^IXIC.csv')
stocks_nasdaq = df2[['Date', 'Adj Close']]
stocks_nasdaq = stocks_nasdaq.set_index('Date')
stocks_nasdaq.head()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
2000-01-03,4131.149902
2000-01-04,3901.689941
2000-01-05,3877.540039
2000-01-06,3727.129883
2000-01-07,3882.620117


In [None]:
stocks.columns.name = trading_instrument 
px.line(stocks['Adj Close']) 



We can see the 2007-2009 fianancial crisis, COVID crash in 2020, and the 2022 recession period. 

The function named prepare_data_for_model_input returns a dataframe with close price, moving average price, and its log returns.

In [None]:
def prepare_data_for_model_input(prices, ma):
    '''
        Input:
        prices (df) - Dataframe of close prices
        ma (int) - legth of the moveing average
        
        Output:
        prices(df) - An enhanced prices dataframe, with moving averages and log return columns
        prices_array(nd.array) - an array of log returns
    '''
    
    instrument = 'Adj Close' 
    stocks[f'{instrument}_ma'] = prices.rolling(ma).mean()
    stocks[f'{instrument}_log_return'] = np.log(prices[f'{instrument}_ma']/prices[f'{instrument}_ma'].shift(1)).dropna()
 
    prices.dropna(inplace = True)
    prices_array = np.array([[q] for q in stocks[f'{instrument}_log_return'].values])
    
    return prices, prices_array

In [None]:
stocks1, prices_array = prepare_data_for_model_input(stocks, 7)
stocks1.head()

Adj Close,Adj Close,Adj Close_ma,Adj Close_log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-12,1432.25,1424.979998,-0.0023
2000-01-13,1449.680054,1432.159999,0.005026
2000-01-14,1465.150024,1441.165719,0.006269
2000-01-18,1455.140015,1448.550014,0.005111
2000-01-19,1455.900024,1450.61145,0.001422


In [None]:
prices_array

array([[-0.00230014],
       [ 0.00502602],
       [ 0.00626852],
       ...,
       [ 0.00624603],
       [ 0.00458035],
       [ 0.00397103]])

In [None]:
# Create a line graph of the Adj Close Log Return
fig = px.line(stocks, x=stocks.index, y='Adj Close_log_return', title='Adj Close Log Return vs. Date')

# Show the graph
fig.show()

No. of negative values in log returns column

In [None]:
class RegimeDetection:
  def get_regimes_hmm(self, input_data, params):
    hmm_model = self.initialise_model(GaussianHMM(), params).fit(input_data)
    return hmm_model

  def initialise_model(self, model, params):
    for parameter, value in params.items():
      setattr(model, parameter, value)
    return model


In [None]:
import datetime
def plot_hidden_states(hidden_states, prices_df):
    
    '''
    Input:
    hidden_states(numpy.ndarray) - array of predicted hidden states
    prices_df(df) - dataframe of close prices
    
    Output:
    Graph showing hidden states and prices
    
    '''
    
    colors = ['blue', 'green']
    n_components = len(np.unique(hidden_states))
    fig = go.Figure()



    for i in range(n_components):
        mask = hidden_states == i
        print('Number of observations for State ', i,":", len(prices_df.index[mask]))
        
        fig.add_trace(go.Scatter(x=prices_df.index[mask], y=prices_df[f"{prices_df.columns.name}"][mask],
                    mode='markers',  name='Hidden State ' + str(i), marker=dict(size=4,color=colors[i])))
        
    fig.update_layout(height=400, width=900,legend=dict(
            yanchor="top", y=0.99, xanchor="left",x=0.01), margin=dict(l=20, r=20, t=20, b=20)).show()

In [None]:
regime_detection = RegimeDetection()

In [None]:
params = {'n_components':2, 'covariance_type':"full", 'random_state':100}

 

hmm_model = regime_detection.get_regimes_hmm(prices_array, params)

hmm_states = hmm_model.predict(prices_array)

plot_hidden_states(np.array(hmm_states), stocks[[f'{trading_instrument}']])



Number of observations for State  0 : 770
Number of observations for State  1 : 5075
