In [23]:
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import yfinance as yf
import pandas as pd
import random
import plotly.graph_objects as go
import numpy as np
import scipy.stats as scs
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc
from collections import Counter, deque
from imblearn.under_sampling import RandomUnderSampler

# S&P

In [61]:
data = yf.download('^GSPC', start="2000-01-01", end="2024-03-30", interval="1d")
data['log_return'] = np.log(data['Adj Close'] / data['Adj Close'].shift(1))
data.dropna(inplace=True)

mod_msm = sm.tsa.MarkovRegression(data['log_return'], k_regimes=2, trend='n', switching_variance=True)
res_msm = mod_msm.fit()

hidden_states = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data = data.loc[hidden_states.index]
res_msm.summary()

[*********************100%%**********************]  1 of 1 completed

A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.



0,1,2,3
Dep. Variable:,log_return,No. Observations:,6097.0
Model:,MarkovRegression,Log Likelihood,19384.157
Date:,"Mon, 06 May 2024",AIC,-38760.314
Time:,22:47:42,BIC,-38733.452
Sample:,0,HQIC,-38750.993
,- 6097,,
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,5.06e-05,1.64e-06,30.786,0.000,4.74e-05,5.38e-05

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,0.0004,1.58e-05,24.184,0.000,0.000,0.000

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p[0->0],0.9899,0.002,489.871,0.000,0.986,0.994
p[1->0],0.0227,0.005,4.962,0.000,0.014,0.032


In [62]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-01-04,1455.219971,1455.219971,1397.430054,1399.420044,1399.420044,1009000000,-0.039099
2000-01-05,1399.420044,1413.270020,1377.680054,1402.109985,1402.109985,1085500000,0.001920
2000-01-06,1402.109985,1411.900024,1392.099976,1403.449951,1403.449951,1092300000,0.000955
2000-01-07,1403.449951,1441.469971,1400.729980,1441.469971,1441.469971,1225200000,0.026730
2000-01-10,1441.469971,1464.359985,1441.469971,1457.599976,1457.599976,1064800000,0.011128
...,...,...,...,...,...,...,...
2024-03-22,5242.479980,5246.089844,5229.870117,5234.180176,5234.180176,3374700000,-0.001403
2024-03-25,5219.520020,5229.089844,5216.089844,5218.189941,5218.189941,3331360000,-0.003060
2024-03-26,5228.850098,5235.160156,5203.419922,5203.580078,5203.580078,3871790000,-0.002804
2024-03-27,5226.310059,5249.259766,5213.919922,5248.490234,5248.490234,3850500000,0.008594


In [63]:
def plot_hidden_states(hidden_states, prices_df, column_name):
    colors = ['green', 'blue']
    n_components = len(np.unique(hidden_states))
    fig = go.Figure()

    for i in range(n_components):
        mask = hidden_states == i
        print('Number of observations for State ', i, ":", sum(mask),  end='\t')



        fig.add_trace(go.Scatter(x=prices_df.index[mask], y=prices_df[column_name][mask],
                                 mode='markers', name='State ' + str(i),
                                 marker=dict(size=4, color=colors[i])))

    fig.update_layout(title='Hidden States Visualization (MSM)', title_x=0.5,
                      xaxis_title='Date',
                      yaxis_title='Price',
                      height=400, width=900,
                      legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
                      margin=dict(l=20, r=20, t=40, b=20))
    fig.show()

plot_hidden_states(hidden_states, data, 'Adj Close')


Number of observations for State  0 : 4212	Number of observations for State  1 : 1885	

In [64]:
regimes = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data['regime'] = regimes.shift(1)  # shift for using the previous day's regime for today's prediction
data.dropna(inplace=True)

In [65]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,log_return,regime
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2000-01-05,1399.420044,1413.270020,1377.680054,1402.109985,1402.109985,1085500000,0.001920,1.0
2000-01-06,1402.109985,1411.900024,1392.099976,1403.449951,1403.449951,1092300000,0.000955,1.0
2000-01-07,1403.449951,1441.469971,1400.729980,1441.469971,1441.469971,1225200000,0.026730,1.0
2000-01-10,1441.469971,1464.359985,1441.469971,1457.599976,1457.599976,1064800000,0.011128,1.0
2000-01-11,1457.599976,1458.660034,1434.420044,1438.560059,1438.560059,1014000000,-0.013149,1.0
...,...,...,...,...,...,...,...,...
2024-03-22,5242.479980,5246.089844,5229.870117,5234.180176,5234.180176,3374700000,-0.001403,0.0
2024-03-25,5219.520020,5229.089844,5216.089844,5218.189941,5218.189941,3331360000,-0.003060,0.0
2024-03-26,5228.850098,5235.160156,5203.419922,5203.580078,5203.580078,3871790000,-0.002804,0.0
2024-03-27,5226.310059,5249.259766,5213.919922,5248.490234,5248.490234,3850500000,0.008594,0.0


In [66]:
transition_matrix = res_msm.regime_transition

print("Transition Matrix:")
print(transition_matrix)

num_regimes = len(transition_matrix)

for i in range(num_regimes):
    for j in range(num_regimes):
        probability = transition_matrix[i][j][0]
        print(f"Probability of transitioning from Regime {i} to Regime {j}: {probability:.4f}")


Transition Matrix:
[[[0.98985098]
  [0.02268881]]

 [[0.01014902]
  [0.97731119]]]
Probability of transitioning from Regime 0 to Regime 0: 0.9899
Probability of transitioning from Regime 0 to Regime 1: 0.0227
Probability of transitioning from Regime 1 to Regime 0: 0.0101
Probability of transitioning from Regime 1 to Regime 1: 0.9773


In [67]:
import pandas as pd
transition_matrix = np.squeeze(res_msm.regime_transition)
last_date = data.index[-1]
future_dates = pd.date_range(start=last_date, periods=31, freq='B')[1:]
future_probabilities = pd.DataFrame(index=future_dates, columns=['Regime 0', 'Regime 1'])
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values
# Simulating future probabilities
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()

print("Future Regime Probabilities:")
print(future_probabilities)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-03-29  0.992099  0.007901
2024-04-01   0.98211  0.030232
2024-04-02  0.972449  0.051829
2024-04-03  0.963106  0.072716
2024-04-04  0.954069  0.092918
2024-04-05   0.94533  0.112457
2024-04-08  0.936877  0.131354
2024-04-09  0.928701   0.14963
2024-04-10  0.920795  0.167306
2024-04-11  0.913147  0.184402
2024-04-12  0.905751  0.200936
2024-04-15  0.898598  0.216928
2024-04-16   0.89168  0.232394
2024-04-17  0.884989  0.247353
2024-04-18  0.878517   0.26182
2024-04-19  0.872259  0.275812
2024-04-22  0.866205  0.289345
2024-04-23  0.860351  0.302433
2024-04-24  0.854688  0.315091
2024-04-25  0.849212  0.327334
2024-04-26  0.843915  0.339175
2024-04-29  0.838793  0.350627
2024-04-30  0.833838  0.361703
2024-05-01  0.829047  0.372415
2024-05-02  0.824412  0.382775
2024-05-03   0.81993  0.392796
2024-05-06  0.815595  0.402487
2024-05-07  0.811402   0.41186
2024-05-08  0.807348  0.420925
2024-05-09  0.803426  0.429692


In [68]:
future_regimes = pd.Series(index=future_dates, dtype=int)
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values

# Simulate future probabilities and determining regimes
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()
    future_regimes[i] = np.argmax(future_probabilities.iloc[i])

print("Future Regime Probabilities:")
print(future_probabilities)
print("\nFuture Predicted Regimes:")
print(future_regimes)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-03-29  0.992099  0.007901
2024-04-01   0.98211  0.030232
2024-04-02  0.972449  0.051829
2024-04-03  0.963106  0.072716
2024-04-04  0.954069  0.092918
2024-04-05   0.94533  0.112457
2024-04-08  0.936877  0.131354
2024-04-09  0.928701   0.14963
2024-04-10  0.920795  0.167306
2024-04-11  0.913147  0.184402
2024-04-12  0.905751  0.200936
2024-04-15  0.898598  0.216928
2024-04-16   0.89168  0.232394
2024-04-17  0.884989  0.247353
2024-04-18  0.878517   0.26182
2024-04-19  0.872259  0.275812
2024-04-22  0.866205  0.289345
2024-04-23  0.860351  0.302433
2024-04-24  0.854688  0.315091
2024-04-25  0.849212  0.327334
2024-04-26  0.843915  0.339175
2024-04-29  0.838793  0.350627
2024-04-30  0.833838  0.361703
2024-05-01  0.829047  0.372415
2024-05-02  0.824412  0.382775
2024-05-03   0.81993  0.392796
2024-05-06  0.815595  0.402487
2024-05-07  0.811402   0.41186
2024-05-08  0.807348  0.420925
2024-05-09  0.803426  0.429692

Future Pr

In [69]:
import numpy as np
import pandas as pd

mean_returns = data.groupby('regime')['log_return'].mean()


np.random.seed(0)

# Last known price and regime
last_price = data['Adj Close'].iloc[-1]
last_regime = int(data['regime'].iloc[-1])

# Number of days to forecast
num_days = 30
predicted_prices = [last_price]
predicted_regimes = [last_regime]

for i in range(num_days):
    #the last regime in the list to determine transitions
    current_regime = predicted_regimes[-1]

    #transition probabilities for the current regime
    transitions = transition_matrix[current_regime, :]

    # Normalize and flatten the transition probabilities to ensure they sum to 1 and are 1-dimensional
    transitions = (transitions / np.sum(transitions)).flatten()
    print("Normalized Transition probabilities for regime", current_regime, ":", transitions)

    # Randomly select the next regime based on transition probabilities
    next_regime = np.random.choice([0, 1], p=transitions)
    predicted_regimes.append(next_regime)

    # Compute the mean return for the next regime
    mean_return = mean_returns[next_regime]

    # Calculate the next day's price
    next_price = predicted_prices[-1] * np.exp(mean_return)
    predicted_prices.append(next_price)

predicted_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=num_days, freq='B')
predicted_df = pd.DataFrame({
    'Predicted Prices': predicted_prices[1:],
    'Regime': predicted_regimes[1:]
}, index=predicted_dates)

predicted_df

Normalized Transition probabilities for regime 0 : [0.97759218 0.02240782]
Normalized Transition probabilities for regime 0 : [0.97759218 0.02240782]
Normalized Transition probabilities for regime 0 : [0.97759218 0.02240782]
Normalized Transition probabilities for regime 0 : [0.97759218 0.02240782]
Normalized Transition probabilities for regime 0 : [0.97759218 0.02240782]
Normalized Transition probabilities for regime 0 : [0.97759218 0.02240782]
Normalized Transition probabilities for regime 0 : [0.97759218 0.02240782]
Normalized Transition probabilities for regime 0 : [0.97759218 0.02240782]
Normalized Transition probabilities for regime 0 : [0.97759218 0.02240782]
Normalized Transition probabilities for regime 0 : [0.97759218 0.02240782]
Normalized Transition probabilities for regime 0 : [0.97759218 0.02240782]
Normalized Transition probabilities for regime 0 : [0.97759218 0.02240782]
Normalized Transition probabilities for regime 0 : [0.97759218 0.02240782]
Normalized Transition pro

Unnamed: 0,Predicted Prices,Regime
2024-03-29,5257.742198,0
2024-04-01,5261.136489,0
2024-04-02,5264.532971,0
2024-04-03,5267.931645,0
2024-04-04,5271.332514,0
2024-04-05,5274.735578,0
2024-04-08,5278.140839,0
2024-04-09,5281.548299,0
2024-04-10,5284.957958,0
2024-04-11,5288.369818,0


# Ftse

In [70]:
data = yf.download('^FTSE', start="2000-01-01", end="2024-03-30", interval="1d")
data['log_return'] = np.log(data['Adj Close'] / data['Adj Close'].shift(1))
data.dropna(inplace=True)

mod_msm = sm.tsa.MarkovRegression(data['log_return'], k_regimes=2, trend='n', switching_variance=True)
res_msm = mod_msm.fit()

hidden_states = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data = data.loc[hidden_states.index]
res_msm.summary()

[*********************100%%**********************]  1 of 1 completed

A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.



0,1,2,3
Dep. Variable:,log_return,No. Observations:,6122.0
Model:,MarkovRegression,Log Likelihood,19689.026
Date:,"Mon, 06 May 2024",AIC,-39370.052
Time:,22:48:19,BIC,-39343.174
Sample:,0,HQIC,-39360.728
,- 6122,,
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,5.067e-05,1.5e-06,33.757,0.000,4.77e-05,5.36e-05

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,0.0004,1.49e-05,23.675,0.000,0.000,0.000

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p[0->0],0.9884,0.002,457.615,0.000,0.984,0.993
p[1->0],0.0300,0.006,5.378,0.000,0.019,0.041


In [71]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-01-05,6665.899902,6665.899902,6500.399902,6535.899902,6535.899902,670234000,-0.019695
2000-01-06,6535.899902,6547.299805,6424.399902,6447.200195,6447.200195,785532000,-0.013664
2000-01-07,6447.200195,6532.399902,6438.799805,6504.799805,6504.799805,888306000,0.008894
2000-01-10,6504.799805,6634.200195,6504.799805,6607.700195,6607.700195,735455000,0.015695
2000-01-11,6607.700195,6638.100098,6510.799805,6518.899902,6518.899902,863011000,-0.013530
...,...,...,...,...,...,...,...
2024-03-22,7882.600098,7961.399902,7882.200195,7930.899902,7930.899902,1056237300,0.006109
2024-03-25,7930.899902,7939.399902,7891.700195,7917.600098,7917.600098,1070739800,-0.001678
2024-03-26,7917.600098,7934.399902,7892.100098,7931.000000,7931.000000,1278455500,0.001691
2024-03-27,7931.000000,7938.100098,7893.000000,7932.000000,7932.000000,1017867800,0.000126


In [72]:
def plot_hidden_states(hidden_states, prices_df, column_name):
    colors = ['green', 'blue']
    n_components = len(np.unique(hidden_states))
    fig = go.Figure()

    for i in range(n_components):
        mask = hidden_states == i
        print('Number of observations for State ', i, ":", sum(mask),  end='\t')



        fig.add_trace(go.Scatter(x=prices_df.index[mask], y=prices_df[column_name][mask],
                                 mode='markers', name='State ' + str(i),
                                 marker=dict(size=4, color=colors[i])))

    fig.update_layout(title='Hidden States Visualization (MSM)', title_x=0.5,
                      xaxis_title='Date',
                      yaxis_title='Price',
                      height=400, width=900,
                      legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
                      margin=dict(l=20, r=20, t=40, b=20))
    fig.show()

plot_hidden_states(hidden_states, data, 'Adj Close')


Number of observations for State  0 : 4470	Number of observations for State  1 : 1652	

In [73]:
regimes = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data['regime'] = regimes.shift(1)  # shift for using the previous day's regime for today's prediction
data.dropna(inplace=True)

In [74]:
transition_matrix = res_msm.regime_transition

print("Transition Matrix:")
print(transition_matrix)

num_regimes = len(transition_matrix)

for i in range(num_regimes):
    for j in range(num_regimes):
        probability = transition_matrix[i][j][0]
        print(f"Probability of transitioning from Regime {i} to Regime {j}: {probability:.4f}")


Transition Matrix:
[[[0.98838571]
  [0.0299875 ]]

 [[0.01161429]
  [0.9700125 ]]]
Probability of transitioning from Regime 0 to Regime 0: 0.9884
Probability of transitioning from Regime 0 to Regime 1: 0.0300
Probability of transitioning from Regime 1 to Regime 0: 0.0116
Probability of transitioning from Regime 1 to Regime 1: 0.9700


In [75]:
import pandas as pd
transition_matrix = np.squeeze(res_msm.regime_transition)
last_date = data.index[-1]
future_dates = pd.date_range(start=last_date, periods=31, freq='B')[1:]
future_probabilities = pd.DataFrame(index=future_dates, columns=['Regime 0', 'Regime 1'])
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values
# Simulating future probabilities
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()

print("Future Regime Probabilities:")
print(future_probabilities)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-03-29  0.991108  0.008892
2024-04-01  0.979701  0.038346
2024-04-02  0.968767  0.066575
2024-04-03  0.958289  0.093629
2024-04-04  0.948247  0.119558
2024-04-05  0.938622  0.144409
2024-04-08  0.929398  0.168225
2024-04-09  0.920557  0.191051
2024-04-10  0.912085  0.212927
2024-04-11  0.903965  0.233893
2024-04-12  0.896182  0.253987
2024-04-15  0.888723  0.273244
2024-04-16  0.881575  0.291701
2024-04-17  0.874724   0.30939
2024-04-18  0.868158  0.326343
2024-04-19  0.861865  0.342591
2024-04-22  0.855834  0.358162
2024-04-23  0.850054  0.373086
2024-04-24  0.844515  0.387389
2024-04-25  0.839205  0.401097
2024-04-26  0.834117  0.414235
2024-04-29  0.829241  0.426826
2024-04-30  0.824567  0.438894
2024-05-01  0.820087  0.450459
2024-05-02  0.815794  0.461543
2024-05-03   0.81168  0.472166
2024-05-06  0.807737  0.482348
2024-05-07  0.803958  0.492105
2024-05-08  0.800336  0.501457
2024-05-09  0.796865   0.51042


In [76]:
future_regimes = pd.Series(index=future_dates, dtype=int)
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values

# Simulate future probabilities and determining regimes
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()
    future_regimes[i] = np.argmax(future_probabilities.iloc[i])

print("Future Regime Probabilities:")
print(future_probabilities)
print("\nFuture Predicted Regimes:")
print(future_regimes)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-03-29  0.991108  0.008892
2024-04-01  0.979701  0.038346
2024-04-02  0.968767  0.066575
2024-04-03  0.958289  0.093629
2024-04-04  0.948247  0.119558
2024-04-05  0.938622  0.144409
2024-04-08  0.929398  0.168225
2024-04-09  0.920557  0.191051
2024-04-10  0.912085  0.212927
2024-04-11  0.903965  0.233893
2024-04-12  0.896182  0.253987
2024-04-15  0.888723  0.273244
2024-04-16  0.881575  0.291701
2024-04-17  0.874724   0.30939
2024-04-18  0.868158  0.326343
2024-04-19  0.861865  0.342591
2024-04-22  0.855834  0.358162
2024-04-23  0.850054  0.373086
2024-04-24  0.844515  0.387389
2024-04-25  0.839205  0.401097
2024-04-26  0.834117  0.414235
2024-04-29  0.829241  0.426826
2024-04-30  0.824567  0.438894
2024-05-01  0.820087  0.450459
2024-05-02  0.815794  0.461543
2024-05-03   0.81168  0.472166
2024-05-06  0.807737  0.482348
2024-05-07  0.803958  0.492105
2024-05-08  0.800336  0.501457
2024-05-09  0.796865   0.51042

Future Pr

In [77]:
import numpy as np
import pandas as pd

mean_returns = data.groupby('regime')['log_return'].mean()


np.random.seed(0)

# Last known price and regime
last_price = data['Adj Close'].iloc[-1]
last_regime = int(data['regime'].iloc[-1])

# Number of days to forecast
num_days = 30
predicted_prices = [last_price]
predicted_regimes = [last_regime]

for i in range(num_days):
    #the last regime in the list to determine transitions
    current_regime = predicted_regimes[-1]

    #transition probabilities for the current regime
    transitions = transition_matrix[current_regime, :]

    # Normalize and flatten the transition probabilities to ensure they sum to 1 and are 1-dimensional
    transitions = (transitions / np.sum(transitions)).flatten()
    print("Normalized Transition probabilities for regime", current_regime, ":", transitions)

    # Randomly select the next regime based on transition probabilities
    next_regime = np.random.choice([0, 1], p=transitions)
    predicted_regimes.append(next_regime)

    # Compute the mean return for the next regime
    mean_return = mean_returns[next_regime]

    # Calculate the next day's price
    next_price = predicted_prices[-1] * np.exp(mean_return)
    predicted_prices.append(next_price)

predicted_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=num_days, freq='B')
predicted_df = pd.DataFrame({
    'Predicted Prices': predicted_prices[1:],
    'Regime': predicted_regimes[1:]
}, index=predicted_dates)

print(predicted_df)


Normalized Transition probabilities for regime 0 : [0.97055352 0.02944648]
Normalized Transition probabilities for regime 0 : [0.97055352 0.02944648]
Normalized Transition probabilities for regime 0 : [0.97055352 0.02944648]
Normalized Transition probabilities for regime 0 : [0.97055352 0.02944648]
Normalized Transition probabilities for regime 0 : [0.97055352 0.02944648]
Normalized Transition probabilities for regime 0 : [0.97055352 0.02944648]
Normalized Transition probabilities for regime 0 : [0.97055352 0.02944648]
Normalized Transition probabilities for regime 0 : [0.97055352 0.02944648]
Normalized Transition probabilities for regime 0 : [0.97055352 0.02944648]
Normalized Transition probabilities for regime 0 : [0.97055352 0.02944648]
Normalized Transition probabilities for regime 0 : [0.97055352 0.02944648]
Normalized Transition probabilities for regime 0 : [0.97055352 0.02944648]
Normalized Transition probabilities for regime 0 : [0.97055352 0.02944648]
Normalized Transition pro

In [78]:
predicted_df

Unnamed: 0,Predicted Prices,Regime
2024-03-29,7954.75914,0
2024-04-01,7956.918768,0
2024-04-02,7959.078982,0
2024-04-03,7961.239783,0
2024-04-04,7963.40117,0
2024-04-05,7965.563144,0
2024-04-08,7967.725706,0
2024-04-09,7969.888854,0
2024-04-10,7972.052589,0
2024-04-11,7974.216912,0


# Nikkei

In [79]:
data = yf.download('^N225', start="2000-01-01", end="2024-03-30", interval="1d")
data['log_return'] = np.log(data['Adj Close'] / data['Adj Close'].shift(1))
data.dropna(inplace=True)

mod_msm = sm.tsa.MarkovRegression(data['log_return'], k_regimes=2, trend='n', switching_variance=True)
res_msm = mod_msm.fit()

hidden_states = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data = data.loc[hidden_states.index]
res_msm.summary()

[*********************100%%**********************]  1 of 1 completed

A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.



0,1,2,3
Dep. Variable:,log_return,No. Observations:,5938.0
Model:,MarkovRegression,Log Likelihood,17311.911
Date:,"Mon, 06 May 2024",AIC,-34615.823
Time:,22:49:57,BIC,-34589.066
Sample:,0,HQIC,-34606.526
,- 5938,,
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,0.0001,3.77e-06,30.353,0.000,0.000,0.000

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,0.0006,3.78e-05,15.283,0.000,0.001,0.001

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p[0->0],0.9882,0.002,436.580,0.000,0.984,0.993
p[1->0],0.0452,0.009,5.222,0.000,0.028,0.062


In [80]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-01-05,19003.509766,19003.509766,18221.820312,18542.550781,18542.550781,0,-0.024521
2000-01-06,18574.009766,18582.740234,18168.269531,18168.269531,18168.269531,0,-0.020391
2000-01-07,18194.050781,18285.730469,18068.099609,18193.410156,18193.410156,0,0.001383
2000-01-11,18246.099609,18887.560547,18246.099609,18850.919922,18850.919922,0,0.035502
2000-01-12,18780.169922,18811.869141,18626.919922,18677.419922,18677.419922,0,-0.009246
...,...,...,...,...,...,...,...
2024-03-25,40798.960938,40837.179688,40414.121094,40414.121094,40414.121094,101500000,-0.011668
2024-03-26,40345.039062,40529.531250,40280.851562,40398.031250,40398.031250,101400000,-0.000398
2024-03-27,40517.171875,40979.359375,40452.210938,40762.730469,40762.730469,121300000,0.008987
2024-03-28,40324.421875,40482.019531,40054.058594,40168.070312,40168.070312,135600000,-0.014696


In [81]:
def plot_hidden_states(hidden_states, prices_df, column_name):
    colors = ['green', 'blue']
    n_components = len(np.unique(hidden_states))
    fig = go.Figure()

    for i in range(n_components):
        mask = hidden_states == i
        print('Number of observations for State ', i, ":", sum(mask),  end='\t')



        fig.add_trace(go.Scatter(x=prices_df.index[mask], y=prices_df[column_name][mask],
                                 mode='markers', name='State ' + str(i),
                                 marker=dict(size=4, color=colors[i])))

    fig.update_layout(title='Hidden States Visualization (MSM)', title_x=0.5,
                      xaxis_title='Date',
                      yaxis_title='Price',
                      height=400, width=900,
                      legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
                      margin=dict(l=20, r=20, t=40, b=20))
    fig.show()

plot_hidden_states(hidden_states, data, 'Adj Close')


Number of observations for State  0 : 4779	Number of observations for State  1 : 1159	

In [83]:
regimes = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data['regime'] = regimes.shift(1)  # shift for using the previous day's regime for today's prediction
data.dropna(inplace=True)

In [84]:
transition_matrix = res_msm.regime_transition

print("Transition Matrix:")
print(transition_matrix)

num_regimes = len(transition_matrix)

for i in range(num_regimes):
    for j in range(num_regimes):
        probability = transition_matrix[i][j][0]
        print(f"Probability of transitioning from Regime {i} to Regime {j}: {probability:.4f}")


Transition Matrix:
[[[0.98815325]
  [0.04517171]]

 [[0.01184675]
  [0.95482829]]]
Probability of transitioning from Regime 0 to Regime 0: 0.9882
Probability of transitioning from Regime 0 to Regime 1: 0.0452
Probability of transitioning from Regime 1 to Regime 0: 0.0118
Probability of transitioning from Regime 1 to Regime 1: 0.9548


In [85]:
import pandas as pd
transition_matrix = np.squeeze(res_msm.regime_transition)
last_date = data.index[-1]
future_dates = pd.date_range(start=last_date, periods=31, freq='B')[1:]
future_probabilities = pd.DataFrame(index=future_dates, columns=['Regime 0', 'Regime 1'])
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values
# Simulating future probabilities
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()

print("Future Regime Probabilities:")
print(future_probabilities)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-04-01  0.979376  0.020624
2024-04-02  0.968018  0.063933
2024-04-03  0.957307  0.104772
2024-04-04  0.947207  0.143282
2024-04-05  0.937683  0.179597
2024-04-08  0.928703  0.213841
2024-04-09  0.920234  0.246133
2024-04-10  0.912248  0.276583
2024-04-11  0.904717  0.305297
2024-04-12  0.897616  0.332374
2024-04-15   0.89092  0.357907
2024-04-16  0.884605  0.381984
2024-04-17  0.878651  0.404688
2024-04-18  0.873036  0.426098
2024-04-19  0.867741  0.446287
2024-04-22  0.862748  0.465325
2024-04-23   0.85804  0.483277
2024-04-24    0.8536  0.500206
2024-04-25  0.849414  0.516169
2024-04-26  0.845466  0.531222
2024-04-29  0.841743  0.545417
2024-04-30  0.838233  0.558803
2024-05-01  0.834922  0.571425
2024-05-02  0.831801  0.583328
2024-05-03  0.828857  0.594552
2024-05-06  0.826081  0.605136
2024-05-07  0.823464  0.615116
2024-05-08  0.820996  0.624528
2024-05-09  0.818668  0.633402
2024-05-10  0.816473  0.641771


In [86]:
future_regimes = pd.Series(index=future_dates, dtype=int)
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values

# Simulate future probabilities and determining regimes
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()
    future_regimes[i] = np.argmax(future_probabilities.iloc[i])

print("Future Regime Probabilities:")
print(future_probabilities)
print("\nFuture Predicted Regimes:")
print(future_regimes)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-04-01  0.979376  0.020624
2024-04-02  0.968018  0.063933
2024-04-03  0.957307  0.104772
2024-04-04  0.947207  0.143282
2024-04-05  0.937683  0.179597
2024-04-08  0.928703  0.213841
2024-04-09  0.920234  0.246133
2024-04-10  0.912248  0.276583
2024-04-11  0.904717  0.305297
2024-04-12  0.897616  0.332374
2024-04-15   0.89092  0.357907
2024-04-16  0.884605  0.381984
2024-04-17  0.878651  0.404688
2024-04-18  0.873036  0.426098
2024-04-19  0.867741  0.446287
2024-04-22  0.862748  0.465325
2024-04-23   0.85804  0.483277
2024-04-24    0.8536  0.500206
2024-04-25  0.849414  0.516169
2024-04-26  0.845466  0.531222
2024-04-29  0.841743  0.545417
2024-04-30  0.838233  0.558803
2024-05-01  0.834922  0.571425
2024-05-02  0.831801  0.583328
2024-05-03  0.828857  0.594552
2024-05-06  0.826081  0.605136
2024-05-07  0.823464  0.615116
2024-05-08  0.820996  0.624528
2024-05-09  0.818668  0.633402
2024-05-10  0.816473  0.641771

Future Pr

In [87]:
import numpy as np
import pandas as pd

mean_returns = data.groupby('regime')['log_return'].mean()


np.random.seed(0)

# Last known price and regime
last_price = data['Adj Close'].iloc[-1]
last_regime = int(data['regime'].iloc[-1])

# Number of days to forecast
num_days = 30
predicted_prices = [last_price]
predicted_regimes = [last_regime]

for i in range(num_days):
    #the last regime in the list to determine transitions
    current_regime = predicted_regimes[-1]

    #transition probabilities for the current regime
    transitions = transition_matrix[current_regime, :]

    # Normalize and flatten the transition probabilities to ensure they sum to 1 and are 1-dimensional
    transitions = (transitions / np.sum(transitions)).flatten()
    print("Normalized Transition probabilities for regime", current_regime, ":", transitions)

    # Randomly select the next regime based on transition probabilities
    next_regime = np.random.choice([0, 1], p=transitions)
    predicted_regimes.append(next_regime)

    # Compute the mean return for the next regime
    mean_return = mean_returns[next_regime]

    # Calculate the next day's price
    next_price = predicted_prices[-1] * np.exp(mean_return)
    predicted_prices.append(next_price)

predicted_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=num_days, freq='B')
predicted_df = pd.DataFrame({
    'Predicted Prices': predicted_prices[1:],
    'Regime': predicted_regimes[1:]
}, index=predicted_dates)

print(predicted_df)


Normalized Transition probabilities for regime 0 : [0.95628509 0.04371491]
Normalized Transition probabilities for regime 0 : [0.95628509 0.04371491]
Normalized Transition probabilities for regime 0 : [0.95628509 0.04371491]
Normalized Transition probabilities for regime 0 : [0.95628509 0.04371491]
Normalized Transition probabilities for regime 0 : [0.95628509 0.04371491]
Normalized Transition probabilities for regime 0 : [0.95628509 0.04371491]
Normalized Transition probabilities for regime 0 : [0.95628509 0.04371491]
Normalized Transition probabilities for regime 0 : [0.95628509 0.04371491]
Normalized Transition probabilities for regime 0 : [0.95628509 0.04371491]
Normalized Transition probabilities for regime 1 : [0.01225515 0.98774485]
Normalized Transition probabilities for regime 1 : [0.01225515 0.98774485]
Normalized Transition probabilities for regime 1 : [0.01225515 0.98774485]
Normalized Transition probabilities for regime 1 : [0.01225515 0.98774485]
Normalized Transition pro

In [88]:
predicted_df

Unnamed: 0,Predicted Prices,Regime
2024-04-01,40387.76886,0
2024-04-02,40406.104635,0
2024-04-03,40424.448734,0
2024-04-04,40442.801161,0
2024-04-05,40461.16192,0
2024-04-08,40479.531015,0
2024-04-09,40497.908449,0
2024-04-10,40516.294226,0
2024-04-11,40467.707933,1
2024-04-12,40419.179905,1


# Apple


In [90]:
data = yf.download('AAPL', start="2000-01-01", end="2024-03-30", interval="1d")
data['log_return'] = np.log(data['Adj Close'] / data['Adj Close'].shift(1))
data.dropna(inplace=True)

mod_msm = sm.tsa.MarkovRegression(data['log_return'], k_regimes=2, trend='n', switching_variance=True)
res_msm = mod_msm.fit()

hidden_states = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data = data.loc[hidden_states.index]
res_msm.summary()

[*********************100%%**********************]  1 of 1 completed

A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.



0,1,2,3
Dep. Variable:,log_return,No. Observations:,6097.0
Model:,MarkovRegression,Log Likelihood,14826.483
Date:,"Mon, 06 May 2024",AIC,-29644.965
Time:,22:52:13,BIC,-29618.103
Sample:,0,HQIC,-29635.644
,- 6097,,
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,0.0002,9.66e-06,24.737,0.000,0.000,0.000

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,0.0019,0.000,18.486,0.000,0.002,0.002

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p[0->0],0.9778,0.004,257.177,0.000,0.970,0.985
p[1->0],0.0677,0.011,6.188,0.000,0.046,0.089


In [91]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-01-04,0.966518,0.987723,0.903460,0.915179,0.774790,512377600,-0.088077
2000-01-05,0.926339,0.987165,0.919643,0.928571,0.786128,778321600,0.014527
2000-01-06,0.947545,0.955357,0.848214,0.848214,0.718098,767972800,-0.090514
2000-01-07,0.861607,0.901786,0.852679,0.888393,0.752113,460734400,0.046281
2000-01-10,0.910714,0.912946,0.845982,0.872768,0.738885,505064000,-0.017744
...,...,...,...,...,...,...,...
2024-03-22,171.759995,173.050003,170.059998,172.279999,172.279999,71106600,0.005296
2024-03-25,170.570007,171.940002,169.449997,170.850006,170.850006,54288300,-0.008335
2024-03-26,170.000000,171.419998,169.580002,169.710007,169.710007,57388400,-0.006695
2024-03-27,170.410004,173.600006,170.110001,173.309998,173.309998,60273300,0.020991


In [92]:
def plot_hidden_states(hidden_states, prices_df, column_name):
    colors = ['green', 'blue']
    n_components = len(np.unique(hidden_states))
    fig = go.Figure()

    for i in range(n_components):
        mask = hidden_states == i
        print('Number of observations for State ', i, ":", sum(mask),  end='\t')



        fig.add_trace(go.Scatter(x=prices_df.index[mask], y=prices_df[column_name][mask],
                                 mode='markers', name='State ' + str(i),
                                 marker=dict(size=4, color=colors[i])))

    fig.update_layout(title='Hidden States Visualization (MSM)', title_x=0.5,
                      xaxis_title='Date',
                      yaxis_title='Price',
                      height=400, width=900,
                      legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
                      margin=dict(l=20, r=20, t=40, b=20))
    fig.show()

plot_hidden_states(hidden_states, data, 'Adj Close')


Number of observations for State  0 : 4681	Number of observations for State  1 : 1416	

In [93]:
regimes = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data['regime'] = regimes.shift(1)  # shift for using the previous day's regime for today's prediction
data.dropna(inplace=True)

In [94]:
transition_matrix = res_msm.regime_transition

print("Transition Matrix:")
print(transition_matrix)

num_regimes = len(transition_matrix)

for i in range(num_regimes):
    for j in range(num_regimes):
        probability = transition_matrix[i][j][0]
        print(f"Probability of transitioning from Regime {i} to Regime {j}: {probability:.4f}")


Transition Matrix:
[[[0.97784328]
  [0.06772462]]

 [[0.02215672]
  [0.93227538]]]
Probability of transitioning from Regime 0 to Regime 0: 0.9778
Probability of transitioning from Regime 0 to Regime 1: 0.0677
Probability of transitioning from Regime 1 to Regime 0: 0.0222
Probability of transitioning from Regime 1 to Regime 1: 0.9323


In [95]:
import pandas as pd
transition_matrix = np.squeeze(res_msm.regime_transition)
last_date = data.index[-1]
future_dates = pd.date_range(start=last_date, periods=31, freq='B')[1:]
future_probabilities = pd.DataFrame(index=future_dates, columns=['Regime 0', 'Regime 1'])
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values
# Simulating future probabilities
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()

print("Future Regime Probabilities:")
print(future_probabilities)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-03-29  0.974143  0.025857
2024-04-01  0.953132   0.09008
2024-04-02  0.934009   0.14853
2024-04-03  0.916606  0.201726
2024-04-04  0.900766  0.250141
2024-04-05   0.88635  0.294204
2024-04-08   0.87323  0.334307
2024-04-09   0.86129  0.370805
2024-04-10  0.850422  0.404023
2024-04-11  0.840531  0.434256
2024-04-12   0.83153   0.46177
2024-04-15  0.823337  0.486812
2024-04-16  0.815881  0.509603
2024-04-17  0.809095  0.530346
2024-04-18  0.802918  0.549224
2024-04-19  0.797297  0.566405
2024-04-22  0.792182  0.582042
2024-04-23  0.787526  0.596274
2024-04-24  0.783288  0.609226
2024-04-25  0.779431  0.621015
2024-04-26  0.775921  0.631743
2024-04-29  0.772727  0.641508
2024-04-30   0.76982  0.650395
2024-05-01  0.767174  0.658483
2024-05-02  0.764765  0.665844
2024-05-03  0.762574  0.672543
2024-05-06  0.760579   0.67864
2024-05-07  0.758763   0.68419
2024-05-08  0.757111   0.68924
2024-05-09  0.755607  0.693837


In [96]:
future_regimes = pd.Series(index=future_dates, dtype=int)
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values

# Simulate future probabilities and determining regimes
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()
    future_regimes[i] = np.argmax(future_probabilities.iloc[i])

print("Future Regime Probabilities:")
print(future_probabilities)
print("\nFuture Predicted Regimes:")
print(future_regimes)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-03-29  0.974143  0.025857
2024-04-01  0.953132   0.09008
2024-04-02  0.934009   0.14853
2024-04-03  0.916606  0.201726
2024-04-04  0.900766  0.250141
2024-04-05   0.88635  0.294204
2024-04-08   0.87323  0.334307
2024-04-09   0.86129  0.370805
2024-04-10  0.850422  0.404023
2024-04-11  0.840531  0.434256
2024-04-12   0.83153   0.46177
2024-04-15  0.823337  0.486812
2024-04-16  0.815881  0.509603
2024-04-17  0.809095  0.530346
2024-04-18  0.802918  0.549224
2024-04-19  0.797297  0.566405
2024-04-22  0.792182  0.582042
2024-04-23  0.787526  0.596274
2024-04-24  0.783288  0.609226
2024-04-25  0.779431  0.621015
2024-04-26  0.775921  0.631743
2024-04-29  0.772727  0.641508
2024-04-30   0.76982  0.650395
2024-05-01  0.767174  0.658483
2024-05-02  0.764765  0.665844
2024-05-03  0.762574  0.672543
2024-05-06  0.760579   0.67864
2024-05-07  0.758763   0.68419
2024-05-08  0.757111   0.68924
2024-05-09  0.755607  0.693837

Future Pr

In [97]:
import numpy as np
import pandas as pd

mean_returns = data.groupby('regime')['log_return'].mean()


np.random.seed(0)

# Last known price and regime
last_price = data['Adj Close'].iloc[-1]
last_regime = int(data['regime'].iloc[-1])

# Number of days to forecast
num_days = 30
predicted_prices = [last_price]
predicted_regimes = [last_regime]

for i in range(num_days):
    #the last regime in the list to determine transitions
    current_regime = predicted_regimes[-1]

    #transition probabilities for the current regime
    transitions = transition_matrix[current_regime, :]

    # Normalize and flatten the transition probabilities to ensure they sum to 1 and are 1-dimensional
    transitions = (transitions / np.sum(transitions)).flatten()
    print("Normalized Transition probabilities for regime", current_regime, ":", transitions)

    # Randomly select the next regime based on transition probabilities
    next_regime = np.random.choice([0, 1], p=transitions)
    predicted_regimes.append(next_regime)

    # Compute the mean return for the next regime
    mean_return = mean_returns[next_regime]

    # Calculate the next day's price
    next_price = predicted_prices[-1] * np.exp(mean_return)
    predicted_prices.append(next_price)

predicted_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=num_days, freq='B')
predicted_df = pd.DataFrame({
    'Predicted Prices': predicted_prices[1:],
    'Regime': predicted_regimes[1:]
}, index=predicted_dates)

print(predicted_df)


Normalized Transition probabilities for regime 0 : [0.93522695 0.06477305]
Normalized Transition probabilities for regime 0 : [0.93522695 0.06477305]
Normalized Transition probabilities for regime 0 : [0.93522695 0.06477305]
Normalized Transition probabilities for regime 0 : [0.93522695 0.06477305]
Normalized Transition probabilities for regime 0 : [0.93522695 0.06477305]
Normalized Transition probabilities for regime 0 : [0.93522695 0.06477305]
Normalized Transition probabilities for regime 0 : [0.93522695 0.06477305]
Normalized Transition probabilities for regime 0 : [0.93522695 0.06477305]
Normalized Transition probabilities for regime 0 : [0.93522695 0.06477305]
Normalized Transition probabilities for regime 1 : [0.02321456 0.97678544]
Normalized Transition probabilities for regime 1 : [0.02321456 0.97678544]
Normalized Transition probabilities for regime 1 : [0.02321456 0.97678544]
Normalized Transition probabilities for regime 1 : [0.02321456 0.97678544]
Normalized Transition pro

In [98]:
predicted_df

Unnamed: 0,Predicted Prices,Regime
2024-03-29,171.72489,0
2024-04-01,171.970135,0
2024-04-02,172.215729,0
2024-04-03,172.461675,0
2024-04-04,172.707971,0
2024-04-05,172.95462,0
2024-04-08,173.20162,0
2024-04-09,173.448974,0
2024-04-10,173.29235,1
2024-04-11,173.135867,1


# Google


In [99]:
data = yf.download('GOOG', start="2000-01-01", end="2024-03-30", interval="1d")
data['log_return'] = np.log(data['Adj Close'] / data['Adj Close'].shift(1))
data.dropna(inplace=True)

mod_msm = sm.tsa.MarkovRegression(data['log_return'], k_regimes=2, trend='n', switching_variance=True)
res_msm = mod_msm.fit()

hidden_states = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data = data.loc[hidden_states.index]
res_msm.summary()

[*********************100%%**********************]  1 of 1 completed

A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.



0,1,2,3
Dep. Variable:,log_return,No. Observations:,4935.0
Model:,MarkovRegression,Log Likelihood,13205.446
Date:,"Mon, 06 May 2024",AIC,-26402.892
Time:,22:53:46,BIC,-26376.876
Sample:,0,HQIC,-26393.768
,- 4935,,
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,0.0002,5.72e-06,26.363,0.000,0.000,0.000

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,0.0011,7.34e-05,15.349,0.000,0.001,0.001

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p[0->0],0.9661,0.005,191.197,0.000,0.956,0.976
p[1->0],0.1158,0.018,6.324,0.000,0.080,0.152


In [100]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2004-08-20,2.515820,2.716817,2.503118,2.697639,2.697639,458857488,0.076433
2004-08-23,2.758411,2.826406,2.716070,2.724787,2.724787,366857939,0.010013
2004-08-24,2.770615,2.779581,2.579581,2.611960,2.611960,306396159,-0.042289
2004-08-25,2.614201,2.689918,2.587302,2.640104,2.640104,184645512,0.010717
2004-08-26,2.613952,2.688672,2.606729,2.687676,2.687676,142572401,0.017859
...,...,...,...,...,...,...,...
2024-03-22,150.240005,152.559998,150.089996,151.770004,151.770004,19226300,0.020166
2024-03-25,150.949997,151.455994,148.800003,151.149994,151.149994,15114700,-0.004094
2024-03-26,151.240005,153.199997,151.029999,151.699997,151.699997,19312700,0.003632
2024-03-27,152.145004,152.690002,150.130005,151.940002,151.940002,16622000,0.001581


In [101]:
def plot_hidden_states(hidden_states, prices_df, column_name):
    colors = ['green', 'blue']
    n_components = len(np.unique(hidden_states))
    fig = go.Figure()

    for i in range(n_components):
        mask = hidden_states == i
        print('Number of observations for State ', i, ":", sum(mask),  end='\t')



        fig.add_trace(go.Scatter(x=prices_df.index[mask], y=prices_df[column_name][mask],
                                 mode='markers', name='State ' + str(i),
                                 marker=dict(size=4, color=colors[i])))

    fig.update_layout(title='Hidden States Visualization (MSM)', title_x=0.5,
                      xaxis_title='Date',
                      yaxis_title='Price',
                      height=400, width=900,
                      legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
                      margin=dict(l=20, r=20, t=40, b=20))
    fig.show()

plot_hidden_states(hidden_states, data, 'Adj Close')


Number of observations for State  0 : 3947	Number of observations for State  1 : 988	

In [102]:
regimes = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data['regime'] = regimes.shift(1)  # shift for using the previous day's regime for today's prediction
data.dropna(inplace=True)

In [103]:
transition_matrix = res_msm.regime_transition

print("Transition Matrix:")
print(transition_matrix)

num_regimes = len(transition_matrix)

for i in range(num_regimes):
    for j in range(num_regimes):
        probability = transition_matrix[i][j][0]
        print(f"Probability of transitioning from Regime {i} to Regime {j}: {probability:.4f}")


Transition Matrix:
[[[0.9661485 ]
  [0.11581284]]

 [[0.0338515 ]
  [0.88418716]]]
Probability of transitioning from Regime 0 to Regime 0: 0.9661
Probability of transitioning from Regime 0 to Regime 1: 0.1158
Probability of transitioning from Regime 1 to Regime 0: 0.0339
Probability of transitioning from Regime 1 to Regime 1: 0.8842


In [104]:
import pandas as pd
transition_matrix = np.squeeze(res_msm.regime_transition)
last_date = data.index[-1]
future_dates = pd.date_range(start=last_date, periods=31, freq='B')[1:]
future_probabilities = pd.DataFrame(index=future_dates, columns=['Regime 0', 'Regime 1'])
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values
# Simulating future probabilities
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()

print("Future Regime Probabilities:")
print(future_probabilities)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-03-29  0.977119  0.022881
2024-04-01  0.944816  0.133394
2024-04-02  0.917348  0.227367
2024-04-03  0.893992  0.307276
2024-04-04   0.87413  0.375225
2024-04-05  0.857242  0.433005
2024-04-08  0.842881  0.482137
2024-04-09  0.830669  0.523916
2024-04-10  0.820285  0.559442
2024-04-11  0.811455  0.589651
2024-04-12  0.803947  0.615338
2024-04-15  0.797562  0.637182
2024-04-16  0.792133  0.655756
2024-04-17  0.787516   0.67155
2024-04-18  0.783591   0.68498
2024-04-19  0.780252  0.696401
2024-04-22  0.777414  0.706112
2024-04-23     0.775  0.714369
2024-04-24  0.772948  0.721391
2024-04-25  0.771203  0.727362
2024-04-26  0.769718  0.732439
2024-04-29  0.768457  0.736757
2024-04-30  0.767383  0.740428
2024-05-01  0.766471   0.74355
2024-05-02  0.765695  0.746204
2024-05-03  0.765035  0.748462
2024-05-06  0.764474  0.750381
2024-05-07  0.763997  0.752013
2024-05-08  0.763591  0.753401
2024-05-09  0.763247  0.754581


In [105]:
future_regimes = pd.Series(index=future_dates, dtype=int)
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values

# Simulate future probabilities and determining regimes
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()
    future_regimes[i] = np.argmax(future_probabilities.iloc[i])

print("Future Regime Probabilities:")
print(future_probabilities)
print("\nFuture Predicted Regimes:")
print(future_regimes)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-03-29  0.977119  0.022881
2024-04-01  0.944816  0.133394
2024-04-02  0.917348  0.227367
2024-04-03  0.893992  0.307276
2024-04-04   0.87413  0.375225
2024-04-05  0.857242  0.433005
2024-04-08  0.842881  0.482137
2024-04-09  0.830669  0.523916
2024-04-10  0.820285  0.559442
2024-04-11  0.811455  0.589651
2024-04-12  0.803947  0.615338
2024-04-15  0.797562  0.637182
2024-04-16  0.792133  0.655756
2024-04-17  0.787516   0.67155
2024-04-18  0.783591   0.68498
2024-04-19  0.780252  0.696401
2024-04-22  0.777414  0.706112
2024-04-23     0.775  0.714369
2024-04-24  0.772948  0.721391
2024-04-25  0.771203  0.727362
2024-04-26  0.769718  0.732439
2024-04-29  0.768457  0.736757
2024-04-30  0.767383  0.740428
2024-05-01  0.766471   0.74355
2024-05-02  0.765695  0.746204
2024-05-03  0.765035  0.748462
2024-05-06  0.764474  0.750381
2024-05-07  0.763997  0.752013
2024-05-08  0.763591  0.753401
2024-05-09  0.763247  0.754581

Future Pr

In [106]:
import numpy as np
import pandas as pd

mean_returns = data.groupby('regime')['log_return'].mean()


np.random.seed(0)

# Last known price and regime
last_price = data['Adj Close'].iloc[-1]
last_regime = int(data['regime'].iloc[-1])

# Number of days to forecast
num_days = 30
predicted_prices = [last_price]
predicted_regimes = [last_regime]

for i in range(num_days):
    #the last regime in the list to determine transitions
    current_regime = predicted_regimes[-1]

    #transition probabilities for the current regime
    transitions = transition_matrix[current_regime, :]

    # Normalize and flatten the transition probabilities to ensure they sum to 1 and are 1-dimensional
    transitions = (transitions / np.sum(transitions)).flatten()
    print("Normalized Transition probabilities for regime", current_regime, ":", transitions)

    # Randomly select the next regime based on transition probabilities
    next_regime = np.random.choice([0, 1], p=transitions)
    predicted_regimes.append(next_regime)

    # Compute the mean return for the next regime
    mean_return = mean_returns[next_regime]

    # Calculate the next day's price
    next_price = predicted_prices[-1] * np.exp(mean_return)
    predicted_prices.append(next_price)

predicted_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=num_days, freq='B')
predicted_df = pd.DataFrame({
    'Predicted Prices': predicted_prices[1:],
    'Regime': predicted_regimes[1:]
}, index=predicted_dates)

print(predicted_df)


Normalized Transition probabilities for regime 0 : [0.89296028 0.10703972]
Normalized Transition probabilities for regime 0 : [0.89296028 0.10703972]
Normalized Transition probabilities for regime 0 : [0.89296028 0.10703972]
Normalized Transition probabilities for regime 0 : [0.89296028 0.10703972]
Normalized Transition probabilities for regime 0 : [0.89296028 0.10703972]
Normalized Transition probabilities for regime 0 : [0.89296028 0.10703972]
Normalized Transition probabilities for regime 0 : [0.89296028 0.10703972]
Normalized Transition probabilities for regime 0 : [0.89296028 0.10703972]
Normalized Transition probabilities for regime 0 : [0.89296028 0.10703972]
Normalized Transition probabilities for regime 1 : [0.03687372 0.96312628]
Normalized Transition probabilities for regime 1 : [0.03687372 0.96312628]
Normalized Transition probabilities for regime 1 : [0.03687372 0.96312628]
Normalized Transition probabilities for regime 1 : [0.03687372 0.96312628]
Normalized Transition pro

In [107]:
predicted_df

Unnamed: 0,Predicted Prices,Regime
2024-03-29,152.389782,0
2024-04-01,152.519681,0
2024-04-02,152.64969,0
2024-04-03,152.77981,0
2024-04-04,152.910041,0
2024-04-05,153.040383,0
2024-04-08,153.170836,0
2024-04-09,153.301401,0
2024-04-10,153.405556,1
2024-04-11,153.509783,1


# Amazon


In [108]:
data = yf.download('AMZN', start="2000-01-01", end="2024-03-30", interval="1d")
data['log_return'] = np.log(data['Adj Close'] / data['Adj Close'].shift(1))
data.dropna(inplace=True)

mod_msm = sm.tsa.MarkovRegression(data['log_return'], k_regimes=2, trend='n', switching_variance=True)
res_msm = mod_msm.fit()

hidden_states = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data = data.loc[hidden_states.index]
res_msm.summary()

[*********************100%%**********************]  1 of 1 completed

A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.



0,1,2,3
Dep. Variable:,log_return,No. Observations:,6097.0
Model:,MarkovRegression,Log Likelihood,14036.369
Date:,"Mon, 06 May 2024",AIC,-28064.738
Time:,22:55:06,BIC,-28037.875
Sample:,0,HQIC,-28055.417
,- 6097,,
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,0.0003,8.47e-06,32.152,0.000,0.000,0.000

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,0.0032,0.000,20.606,0.000,0.003,0.003

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p[0->0],0.9686,0.004,266.899,0.000,0.962,0.976
p[1->0],0.0999,0.012,8.385,0.000,0.077,0.123


In [109]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-01-04,4.268750,4.575000,4.087500,4.096875,4.096875,349748000,-0.086884
2000-01-05,3.525000,3.756250,3.400000,3.487500,3.487500,769148000,-0.161039
2000-01-06,3.565625,3.634375,3.200000,3.278125,3.278125,375040000,-0.061914
2000-01-07,3.350000,3.525000,3.309375,3.478125,3.478125,210108000,0.059222
2000-01-10,3.628125,3.631250,3.278125,3.459375,3.459375,295158000,-0.005405
...,...,...,...,...,...,...,...
2024-03-22,177.750000,179.259995,176.750000,178.869995,178.869995,27964100,0.004033
2024-03-25,178.009995,180.990005,177.240005,179.710007,179.710007,29815500,0.004685
2024-03-26,180.149994,180.449997,177.949997,178.300003,178.300003,29659000,-0.007877
2024-03-27,179.880005,180.000000,177.309998,179.830002,179.830002,33272600,0.008544


In [110]:
def plot_hidden_states(hidden_states, prices_df, column_name):
    colors = ['green', 'blue']
    n_components = len(np.unique(hidden_states))
    fig = go.Figure()

    for i in range(n_components):
        mask = hidden_states == i
        print('Number of observations for State ', i, ":", sum(mask),  end='\t')



        fig.add_trace(go.Scatter(x=prices_df.index[mask], y=prices_df[column_name][mask],
                                 mode='markers', name='State ' + str(i),
                                 marker=dict(size=4, color=colors[i])))

    fig.update_layout(title='Hidden States Visualization (MSM)', title_x=0.5,
                      xaxis_title='Date',
                      yaxis_title='Price',
                      height=400, width=900,
                      legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
                      margin=dict(l=20, r=20, t=40, b=20))
    fig.show()

plot_hidden_states(hidden_states, data, 'Adj Close')


Number of observations for State  0 : 4735	Number of observations for State  1 : 1362	

In [111]:
regimes = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data['regime'] = regimes.shift(1)  # shift for using the previous day's regime for today's prediction
data.dropna(inplace=True)

In [112]:
transition_matrix = res_msm.regime_transition

print("Transition Matrix:")
print(transition_matrix)

num_regimes = len(transition_matrix)

for i in range(num_regimes):
    for j in range(num_regimes):
        probability = transition_matrix[i][j][0]
        print(f"Probability of transitioning from Regime {i} to Regime {j}: {probability:.4f}")


Transition Matrix:
[[[0.96864195]
  [0.0999494 ]]

 [[0.03135805]
  [0.9000506 ]]]
Probability of transitioning from Regime 0 to Regime 0: 0.9686
Probability of transitioning from Regime 0 to Regime 1: 0.0999
Probability of transitioning from Regime 1 to Regime 0: 0.0314
Probability of transitioning from Regime 1 to Regime 1: 0.9001


In [113]:
import pandas as pd
transition_matrix = np.squeeze(res_msm.regime_transition)
last_date = data.index[-1]
future_dates = pd.date_range(start=last_date, periods=31, freq='B')[1:]
future_probabilities = pd.DataFrame(index=future_dates, columns=['Regime 0', 'Regime 1'])
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values
# Simulating future probabilities
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()

print("Future Regime Probabilities:")
print(future_probabilities)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-03-29  0.986394  0.013606
2024-04-01  0.955889  0.110836
2024-04-02   0.92939  0.195298
2024-04-03   0.90637   0.26867
2024-04-04  0.886373  0.332408
2024-04-05  0.869002  0.387777
2024-04-08  0.853912  0.435875
2024-04-09  0.840803  0.477657
2024-04-10  0.829415  0.513953
2024-04-11  0.819523  0.545484
2024-04-12   0.81093  0.572874
2024-04-15  0.803465  0.596667
2024-04-16   0.79698  0.617337
2024-04-17  0.791347  0.635292
2024-04-18  0.786453  0.650889
2024-04-19  0.782202  0.664439
2024-04-22  0.778509  0.676209
2024-04-23  0.775301  0.686434
2024-04-24  0.772515  0.695316
2024-04-25  0.770094  0.703032
2024-04-26  0.767991  0.709735
2024-04-29  0.766164  0.715558
2024-04-30  0.764577  0.720616
2024-05-01  0.763199   0.72501
2024-05-02  0.762001  0.728827
2024-05-03  0.760961  0.732142
2024-05-06  0.760057  0.735023
2024-05-07  0.759272  0.737525
2024-05-08   0.75859  0.739699
2024-05-09  0.757998  0.741587


In [114]:
future_regimes = pd.Series(index=future_dates, dtype=int)
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values

# Simulate future probabilities and determining regimes
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()
    future_regimes[i] = np.argmax(future_probabilities.iloc[i])

print("Future Regime Probabilities:")
print(future_probabilities)
print("\nFuture Predicted Regimes:")
print(future_regimes)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-03-29  0.986394  0.013606
2024-04-01  0.955889  0.110836
2024-04-02   0.92939  0.195298
2024-04-03   0.90637   0.26867
2024-04-04  0.886373  0.332408
2024-04-05  0.869002  0.387777
2024-04-08  0.853912  0.435875
2024-04-09  0.840803  0.477657
2024-04-10  0.829415  0.513953
2024-04-11  0.819523  0.545484
2024-04-12   0.81093  0.572874
2024-04-15  0.803465  0.596667
2024-04-16   0.79698  0.617337
2024-04-17  0.791347  0.635292
2024-04-18  0.786453  0.650889
2024-04-19  0.782202  0.664439
2024-04-22  0.778509  0.676209
2024-04-23  0.775301  0.686434
2024-04-24  0.772515  0.695316
2024-04-25  0.770094  0.703032
2024-04-26  0.767991  0.709735
2024-04-29  0.766164  0.715558
2024-04-30  0.764577  0.720616
2024-05-01  0.763199   0.72501
2024-05-02  0.762001  0.728827
2024-05-03  0.760961  0.732142
2024-05-06  0.760057  0.735023
2024-05-07  0.759272  0.737525
2024-05-08   0.75859  0.739699
2024-05-09  0.757998  0.741587

Future Pr

In [115]:
import numpy as np
import pandas as pd

mean_returns = data.groupby('regime')['log_return'].mean()


np.random.seed(0)

# Last known price and regime
last_price = data['Adj Close'].iloc[-1]
last_regime = int(data['regime'].iloc[-1])

# Number of days to forecast
num_days = 30
predicted_prices = [last_price]
predicted_regimes = [last_regime]

for i in range(num_days):
    #the last regime in the list to determine transitions
    current_regime = predicted_regimes[-1]

    #transition probabilities for the current regime
    transitions = transition_matrix[current_regime, :]

    # Normalize and flatten the transition probabilities to ensure they sum to 1 and are 1-dimensional
    transitions = (transitions / np.sum(transitions)).flatten()
    print("Normalized Transition probabilities for regime", current_regime, ":", transitions)

    # Randomly select the next regime based on transition probabilities
    next_regime = np.random.choice([0, 1], p=transitions)
    predicted_regimes.append(next_regime)

    # Compute the mean return for the next regime
    mean_return = mean_returns[next_regime]

    # Calculate the next day's price
    next_price = predicted_prices[-1] * np.exp(mean_return)
    predicted_prices.append(next_price)

predicted_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=num_days, freq='B')
predicted_df = pd.DataFrame({
    'Predicted Prices': predicted_prices[1:],
    'Regime': predicted_regimes[1:]
}, index=predicted_dates)

print(predicted_df)


Normalized Transition probabilities for regime 0 : [0.90646621 0.09353379]
Normalized Transition probabilities for regime 0 : [0.90646621 0.09353379]
Normalized Transition probabilities for regime 0 : [0.90646621 0.09353379]
Normalized Transition probabilities for regime 0 : [0.90646621 0.09353379]
Normalized Transition probabilities for regime 0 : [0.90646621 0.09353379]
Normalized Transition probabilities for regime 0 : [0.90646621 0.09353379]
Normalized Transition probabilities for regime 0 : [0.90646621 0.09353379]
Normalized Transition probabilities for regime 0 : [0.90646621 0.09353379]
Normalized Transition probabilities for regime 0 : [0.90646621 0.09353379]
Normalized Transition probabilities for regime 1 : [0.03366733 0.96633267]
Normalized Transition probabilities for regime 1 : [0.03366733 0.96633267]
Normalized Transition probabilities for regime 1 : [0.03366733 0.96633267]
Normalized Transition probabilities for regime 1 : [0.03366733 0.96633267]
Normalized Transition pro

In [116]:
predicted_df

Unnamed: 0,Predicted Prices,Regime
2024-03-29,180.593605,0
2024-04-01,180.807458,0
2024-04-02,181.021564,0
2024-04-03,181.235924,0
2024-04-04,181.450537,0
2024-04-05,181.665405,0
2024-04-08,181.880527,0
2024-04-09,182.095904,0
2024-04-10,181.853046,1
2024-04-11,181.610512,1


# AMD

In [125]:
data = yf.download('AMD', start="2000-01-01", end="2024-03-30", interval="1d")
data['log_return'] = np.log(data['Adj Close'] / data['Adj Close'].shift(1))
data.dropna(inplace=True)

mod_msm = sm.tsa.MarkovRegression(data['log_return'], k_regimes=2, trend='n', switching_variance=True)
res_msm = mod_msm.fit()

hidden_states = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data = data.loc[hidden_states.index]
res_msm.summary()

[*********************100%%**********************]  1 of 1 completed

A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.



0,1,2,3
Dep. Variable:,log_return,No. Observations:,6097.0
Model:,MarkovRegression,Log Likelihood,11853.16
Date:,"Mon, 06 May 2024",AIC,-23698.32
Time:,22:57:04,BIC,-23671.458
Sample:,0,HQIC,-23688.999
,- 6097,,
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,0.0006,2.19e-05,25.740,0.000,0.001,0.001

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
sigma2,0.0037,0.000,19.727,0.000,0.003,0.004

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p[0->0],0.9491,0.006,152.176,0.000,0.937,0.961
p[1->0],0.1151,0.016,7.137,0.000,0.083,0.147


In [126]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2000-01-04,15.125000,15.500000,14.593750,14.625000,14.625000,6290200,-0.058108
2000-01-05,14.531250,15.062500,14.000000,15.000000,15.000000,8204600,0.025318
2000-01-06,15.500000,16.000000,15.250000,16.000000,16.000000,11489400,0.064539
2000-01-07,15.406250,16.406250,15.375000,16.250000,16.250000,8543400,0.015504
2000-01-10,16.750000,17.750000,16.687500,17.500000,17.500000,12916400,0.074108
...,...,...,...,...,...,...,...
2024-03-22,177.210007,180.759995,175.050003,179.649994,179.649994,57832100,0.005414
2024-03-25,172.899994,182.800003,172.000000,178.630005,178.630005,71935300,-0.005694
2024-03-26,179.539993,182.600006,176.330002,177.869995,177.869995,65322400,-0.004264
2024-03-27,179.960007,181.229996,175.399994,179.589996,179.589996,55685600,0.009624


In [127]:
def plot_hidden_states(hidden_states, prices_df, column_name):
    colors = ['green', 'blue']
    n_components = len(np.unique(hidden_states))
    fig = go.Figure()

    for i in range(n_components):
        mask = hidden_states == i
        print('Number of observations for State ', i, ":", sum(mask),  end='\t')



        fig.add_trace(go.Scatter(x=prices_df.index[mask], y=prices_df[column_name][mask],
                                 mode='markers', name='State ' + str(i),
                                 marker=dict(size=4, color=colors[i])))

    fig.update_layout(title='Hidden States Visualization (MSM)', title_x=0.5,
                      xaxis_title='Date',
                      yaxis_title='Price',
                      height=400, width=900,
                      legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
                      margin=dict(l=20, r=20, t=40, b=20))
    fig.show()

plot_hidden_states(hidden_states, data, 'Adj Close')


Number of observations for State  0 : 4376	Number of observations for State  1 : 1721	

In [128]:
regimes = res_msm.smoothed_marginal_probabilities.idxmax(axis=1)

data['regime'] = regimes.shift(1)  # shift for using the previous day's regime for today's prediction
data.dropna(inplace=True)

In [129]:
transition_matrix = res_msm.regime_transition

print("Transition Matrix:")
print(transition_matrix)

num_regimes = len(transition_matrix)

for i in range(num_regimes):
    for j in range(num_regimes):
        probability = transition_matrix[i][j][0]
        print(f"Probability of transitioning from Regime {i} to Regime {j}: {probability:.4f}")


Transition Matrix:
[[[0.9491336 ]
  [0.11508251]]

 [[0.0508664 ]
  [0.88491749]]]
Probability of transitioning from Regime 0 to Regime 0: 0.9491
Probability of transitioning from Regime 0 to Regime 1: 0.1151
Probability of transitioning from Regime 1 to Regime 0: 0.0509
Probability of transitioning from Regime 1 to Regime 1: 0.8849


In [130]:
import pandas as pd
transition_matrix = np.squeeze(res_msm.regime_transition)
last_date = data.index[-1]
future_dates = pd.date_range(start=last_date, periods=31, freq='B')[1:]
future_probabilities = pd.DataFrame(index=future_dates, columns=['Regime 0', 'Regime 1'])
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values
# Simulating future probabilities
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()

print("Future Regime Probabilities:")
print(future_probabilities)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-03-29   0.96647   0.03353
2024-04-01  0.919014  0.140895
2024-04-02  0.879434  0.230443
2024-04-03  0.846422  0.305131
2024-04-04  0.818889  0.367424
2024-04-05  0.795924   0.41938
2024-04-08  0.776771  0.462713
2024-04-09  0.760796  0.498856
2024-04-10  0.747472  0.529001
2024-04-11  0.736359  0.554143
2024-04-12   0.72709  0.575113
2024-04-15   0.71936  0.592603
2024-04-16  0.712912   0.60719
2024-04-17  0.707535  0.619357
2024-04-18  0.703049  0.629505
2024-04-19  0.699308  0.637968
2024-04-22  0.696188  0.645028
2024-04-23  0.693586  0.650915
2024-04-24  0.691415  0.655826
2024-04-25  0.689605  0.659922
2024-04-26  0.688095  0.663338
2024-04-29  0.686836  0.666187
2024-04-30  0.685785  0.668563
2024-05-01  0.684909  0.670545
2024-05-02  0.684179  0.672198
2024-05-03  0.683569  0.673577
2024-05-06  0.683061  0.674727
2024-05-07  0.682637  0.675686
2024-05-08  0.682284  0.676486
2024-05-09  0.681989  0.677153


In [131]:
future_regimes = pd.Series(index=future_dates, dtype=int)
future_probabilities.iloc[0] = res_msm.smoothed_marginal_probabilities.iloc[-1].values

# Simulate future probabilities and determining regimes
for i in range(1, len(future_probabilities)):
    future_probabilities.iloc[i] = np.dot(future_probabilities.iloc[i-1], transition_matrix).flatten()
    future_regimes[i] = np.argmax(future_probabilities.iloc[i])

print("Future Regime Probabilities:")
print(future_probabilities)
print("\nFuture Predicted Regimes:")
print(future_regimes)

Future Regime Probabilities:
            Regime 0  Regime 1
2024-03-29   0.96647   0.03353
2024-04-01  0.919014  0.140895
2024-04-02  0.879434  0.230443
2024-04-03  0.846422  0.305131
2024-04-04  0.818889  0.367424
2024-04-05  0.795924   0.41938
2024-04-08  0.776771  0.462713
2024-04-09  0.760796  0.498856
2024-04-10  0.747472  0.529001
2024-04-11  0.736359  0.554143
2024-04-12   0.72709  0.575113
2024-04-15   0.71936  0.592603
2024-04-16  0.712912   0.60719
2024-04-17  0.707535  0.619357
2024-04-18  0.703049  0.629505
2024-04-19  0.699308  0.637968
2024-04-22  0.696188  0.645028
2024-04-23  0.693586  0.650915
2024-04-24  0.691415  0.655826
2024-04-25  0.689605  0.659922
2024-04-26  0.688095  0.663338
2024-04-29  0.686836  0.666187
2024-04-30  0.685785  0.668563
2024-05-01  0.684909  0.670545
2024-05-02  0.684179  0.672198
2024-05-03  0.683569  0.673577
2024-05-06  0.683061  0.674727
2024-05-07  0.682637  0.675686
2024-05-08  0.682284  0.676486
2024-05-09  0.681989  0.677153

Future Pr

In [132]:
import numpy as np
import pandas as pd

mean_returns = data.groupby('regime')['log_return'].mean()


np.random.seed(0)

# Last known price and regime
last_price = data['Adj Close'].iloc[-1]
last_regime = int(data['regime'].iloc[-1])

# Number of days to forecast
num_days = 30
predicted_prices = [last_price]
predicted_regimes = [last_regime]

for i in range(num_days):
    #the last regime in the list to determine transitions
    current_regime = predicted_regimes[-1]

    #transition probabilities for the current regime
    transitions = transition_matrix[current_regime, :]

    # Normalize and flatten the transition probabilities to ensure they sum to 1 and are 1-dimensional
    transitions = (transitions / np.sum(transitions)).flatten()
    print("Normalized Transition probabilities for regime", current_regime, ":", transitions)

    # Randomly select the next regime based on transition probabilities
    next_regime = np.random.choice([0, 1], p=transitions)
    predicted_regimes.append(next_regime)

    # Compute the mean return for the next regime
    mean_return = mean_returns[next_regime]

    # Calculate the next day's price
    next_price = predicted_prices[-1] * np.exp(mean_return)
    predicted_prices.append(next_price)

predicted_dates = pd.date_range(start=data.index[-1] + pd.Timedelta(days=1), periods=num_days, freq='B')
predicted_df = pd.DataFrame({
    'Predicted Prices': predicted_prices[1:],
    'Regime': predicted_regimes[1:]
}, index=predicted_dates)

print(predicted_df)


Normalized Transition probabilities for regime 0 : [0.89186171 0.10813829]
Normalized Transition probabilities for regime 0 : [0.89186171 0.10813829]
Normalized Transition probabilities for regime 0 : [0.89186171 0.10813829]
Normalized Transition probabilities for regime 0 : [0.89186171 0.10813829]
Normalized Transition probabilities for regime 0 : [0.89186171 0.10813829]
Normalized Transition probabilities for regime 0 : [0.89186171 0.10813829]
Normalized Transition probabilities for regime 0 : [0.89186171 0.10813829]
Normalized Transition probabilities for regime 0 : [0.89186171 0.10813829]
Normalized Transition probabilities for regime 0 : [0.89186171 0.10813829]
Normalized Transition probabilities for regime 1 : [0.054357 0.945643]
Normalized Transition probabilities for regime 1 : [0.054357 0.945643]
Normalized Transition probabilities for regime 1 : [0.054357 0.945643]
Normalized Transition probabilities for regime 1 : [0.054357 0.945643]
Normalized Transition probabilities for r

In [133]:
predicted_df

Unnamed: 0,Predicted Prices,Regime
2024-03-29,180.570589,0
2024-04-01,180.651208,0
2024-04-02,180.731863,0
2024-04-03,180.812555,0
2024-04-04,180.893282,0
2024-04-05,180.974046,0
2024-04-08,181.054845,0
2024-04-09,181.135681,0
2024-04-10,181.194638,1
2024-04-11,181.253615,1
