In [3]:
#!pip install yfinance
#!pip install hmmlearn
import yfinance as yf
import numpy as np
import pandas as pd
from hmmlearn.hmm import GaussianHMM
from sklearn.metrics import mean_squared_error


ModuleNotFoundError: No module named 'hmmlearn'

In [5]:
pip install hmmlearn

Collecting hmmlearn
  Downloading hmmlearn-0.3.3-cp312-cp312-macosx_10_9_universal2.whl.metadata (3.0 kB)
Collecting scikit-learn!=0.22.0,>=0.16 (from hmmlearn)
  Downloading scikit_learn-1.5.2.tar.gz (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25l|

In [2]:
ticker = "^GSPC"  # S&P 500 index symbol

data = yf.download(ticker, start="1950-01-01", end="2016-11-30", interval="1mo")
data.dropna(inplace=True)

[*********************100%%**********************]  1 of 1 completed


In [3]:
descriptive_stats = {
    "Price": ["Open", "High", "Low", "Close"],
    "Min": [data['Open'].min(), data['High'].min(), data['Low'].min(), data['Close'].min()],
    "Max": [data['Open'].max(), data['High'].max(), data['Low'].max(), data['Close'].max()],
    "Mean": [data['Open'].mean(), data['High'].mean(), data['Low'].mean(), data['Close'].mean()],
    "Std.": [data['Open'].std(), data['High'].std(), data['Low'].std(), data['Close'].std()],
}

summary_table = pd.DataFrame(descriptive_stats)

print(summary_table)


   Price         Min          Max         Mean        Std.
0   Open  167.199997  2173.149902   969.192478  543.577386
1   High  180.270004  2214.100098  1000.686034  557.155179
2    Low  163.360001  2147.580078   935.889869  526.896105
3  Close  179.630005  2198.810059   974.394776  545.519109


In [None]:
#Model selection 
close = data['Close'].values

# M as number of obserations, k as number of parameters 
def calculate_model_criteria(hmm_model, M, k): 
    L = hmm_model.score(close)
    aic = -2 * L + 2 * k
    bic = -2 * L + k * np.log(M)
    hqc = -2 * L + 2 * k * np.log(np.log(M))
    caic = -2 * L + k * (1 + np.log(M))
    return aic, bic, hqc, caic


state_range = range(2, 7)  # HMM with 2 to 6 states
best_model = None
best_criteria = {"AIC": float('inf'), "BIC": float('inf'), "HQC": float('inf'), "CAIC": float('inf')}
data_length = len(close)

for num_states in state_range:
    model = GaussianHMM(n_components=num_states, covariance_type='diag', random_state=42)
    model.fit(close)
    
    num_params = num_states ** 2 + 2 * num_states * close.shape[1] - 1  # Transition, emission, and initial probs
    aic, bic, hqc, caic = calculate_model_criteria(model, data_length, num_params)
    
    # Update best model based on BIC as the primary criterion
    if bic < best_criteria["BIC"]:
        best_model = model
        best_criteria = {"AIC": aic, "BIC": bic, "HQC": hqc, "CAIC": caic}

print("Best Model Criteria:", best_criteria)

# Step 6: Predict closing price
predicted_prices = []
historical_avg_model = data['Close'].rolling(window=10).mean().shift(1)

for i in range(len(close)):
    state = best_model.predict(close[:i + 1])[-1]  # Predict current state
    state_means = best_model.means_[state]            # Mean of observed values in the predicted state
    predicted_price = data['Close'].iloc[i] * (1 + state_means[-1])  # Predict close price based on % change
    predicted_prices.append(predicted_price)

# Step 7: Evaluate predictions
data['Predicted_Close'] = predicted_prices
data['HAR_Close'] = historical_avg_model
data.dropna(inplace=True)

hmm_mse = mean_squared_error(data['Close'], data['Predicted_Close'])
har_mse = mean_squared_error(data['Close'], data['HAR_Close'])
print(f"HMM MSE: {hmm_mse}, HAR MSE: {har_mse}")

# Step 8: Trading simulation (buy if predicted rise, sell otherwise)
data['Signal'] = np.where(data['Predicted_Close'] > data['Close'], 1, -1)
data['Return'] = data['Signal'] * data['Close'].pct_change()

hmm_cumulative_return = (1 + data['Return']).cumprod()
buy_and_hold_return = (1 + data['Close'].pct_change()).cumprod()

# Print results
print(f"HMM Cumulative Return: {hmm_cumulative_return.iloc[-1]:.2f}")
print(f"Buy-and-Hold Cumulative Return: {buy_and_hold_return.iloc[-1]:.2f}")