In [None]:
!pip install hmmlearn

Collecting hmmlearn
  Downloading hmmlearn-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Downloading hmmlearn-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (165 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m165.9/165.9 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: hmmlearn
Successfully installed hmmlearn-0.3.3


In [None]:
import yfinance as yf
import numpy as np
from tabulate import tabulate
import pandas as pd
import matplotlib.pyplot as plt
from hmmlearn import hmm
from sklearn.preprocessing import StandardScaler
from datetime import datetime, timedelta
import plotly.graph_objects as go

def load_csv_data(file_path):
    """Loads market data from a CSV file."""
    try:
        data =  data = pd.read_csv(file_path, header=None, names=["Datetime", "Open", "High", "Low", "Close", "Volume"], parse_dates=["Datetime"], index_col="Datetime")
        if data.empty:
            raise ValueError("CSV file is empty or improperly formatted.")
        return data
    except Exception as e:
        print(f"Error loading CSV data: {e}")
        return None

def calculate_log_returns(data):
    """Calculates log returns with proper handling"""
    data['LogReturns'] = np.log(data['Close']).diff().dropna()
    return data.dropna()

# 2. Volatility Estimation (Corrected MedRV)
def calculate_medrv(returns, window=20):
    """Proper MedRV implementation for 15m data"""
    medrv_values = []
    for i in range(0, len(returns), window):
        window_returns = returns.iloc[i:i+window]
        if len(window_returns) < 3:
            continue  # Skip incomplete windows

        squared_returns = window_returns ** 2
        medians = squared_returns.rolling(3, min_periods=3).median().dropna()
        if len(medians) < 1:
            continue

        medrv = (np.pi / (6 - 4*np.sqrt(3) + np.pi)) * np.sum(medians) / (len(medians) - 2)
        medrv_values.extend([medrv] * len(window_returns))

    return pd.Series(medrv_values[:len(returns)], index=returns.index, name='MedRV')

# 3. HMM Implementation with Data Checks
def create_hmm(n_components, features):
    """Creates HMM with validation checks"""
    if len(features) < 10:  # Minimum data points check
        raise ValueError("Insufficient data for HMM training")

    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(features)

    model = hmm.GaussianHMM(
        n_components=n_components,
        covariance_type="diag",
        n_iter=1000,
        random_state=42
    )
    model.fit(scaled_features)
    return model, scaler

def ait_sahalia_jacod_test(returns):
    """Determines whether jumps are of finite or infinite activity."""
    delta = max(2, len(returns) // 10)  # Ensure delta is at least 2
    RV_delta = np.sum(returns[::delta]**2)
    RV_half_delta = np.sum(returns[::(delta // 2)]**2)

    ratio = RV_delta / RV_half_delta if RV_half_delta != 0 else np.nan

    if np.isnan(ratio):
        return "Undetermined"
    elif ratio < 1:
        return "Infinite Activity Jumps"
    else:
        return "Finite Activity Jumps"

# 4. Main Execution with Error Handling
def main(file_path, n_states=3):
    # Fetch data with automatic date adjustment
    data = load_csv_data(file_path)
    if data is None or data.empty:
        print("Failed to fetch valid data")
        return

    # Feature engineering
    data = calculate_log_returns(data)
    data['MedRV'] = calculate_medrv(data['LogReturns'])
    data.dropna(inplace=True)

    if len(data) < 10:  # Check sufficient data after processing
        print("Insufficient data after processing")
        return

    # Prepare features
    features = data[['LogReturns', 'MedRV']]
    # HMM training
    model, scaler = create_hmm(n_components=n_states, features=features)
    states = model.predict(scaler.transform(features))

        # Jump detection
    state_volatilities = [np.sqrt(cov[0]) for cov in model.covars_]
    jump_state = np.argmax(state_volatilities)
    data['JumpSignal'] = (states == jump_state).astype(int)
    data['JumpType'] = data.apply(lambda row: ait_sahalia_jacod_test(data.loc[:row.name, 'LogReturns'].dropna().values) if row['JumpSignal'] == 1 else 'No Jump', axis=1)
    log_table = data[['JumpSignal','JumpType', 'MedRV']]
    print("\nJump Log Table:\n", tabulate(log_table[log_table['JumpSignal'] == 1], headers='keys', tablefmt='pretty'))
    print("Finite = ", len(log_table[log_table['JumpType'] == 'Finite Activity Jumps']))
    print("Infinite = ", len(log_table[log_table['JumpType'] == 'Infinite Activity Jumps']))
    plot_results(data)
    return data, log_table


# 5. Visualization Function
def plot_results(data):
    """Visualizes market data with jump signals using Plotly."""
    fig = go.Figure()

    # Price
    fig.add_trace(go.Scatter(x=data.index, y=data['Close'], mode='lines', name='Price', line=dict(color='blue')))

    # Returns with Jump Signals
    fig.add_trace(go.Scatter(x=data.index, y=data['LogReturns'], mode='lines', name='Log Returns', line=dict(color='grey', width=1)))
    jumps = data[data['JumpSignal'] == 1]
    fig.add_trace(go.Scatter(x=jumps.index, y=jumps['LogReturns'], mode='markers', name='Detected Jumps', marker=dict(color='red', symbol='triangle-up', size=8)))

    # Volatility
    fig.add_trace(go.Scatter(x=data.index, y=data['MedRV'], mode='lines', name='MedRV', line=dict(color='purple')))

    fig.update_layout(title='Market Price and Jump Detection', xaxis_title='Datetime', yaxis_title='Values', template='plotly_dark')
    fig.show()

if __name__ == "__main__":
    # Example usage for last 30 days
    data = main(file_path="/content/NYFANG_1min.txt")

Error loading CSV data: [Errno 2] No such file or directory: '/content/NYFANG_1min.txt'
Failed to fetch valid data
