In [None]:
! pip install -r requirements.txt

In [1]:
import pandas as pd
import pywt
import numpy as np
from scipy.signal import find_peaks
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from hmmlearn import hmm
from sklearn.preprocessing import LabelEncoder, StandardScaler
import pandas_ta as ta
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from plotly.subplots import make_subplots



df = pd.read_csv(r"D:\DataSpell\projects\tech-trade-test\tsc\coin_Ethereum.csv", parse_dates=['Date'], index_col=['Date'])
df = df[["Open", "High", "Low", "Close", "Volume"]]
df = df.rename(columns={ i:i.lower() for i in ["Open", "High", "Low", "Close", "Volume"]})

ModuleNotFoundError: No module named 'pywt'

In [4]:
def ichimoku_cloud(df):
    high_prices = df['high']
    low_prices = df['low']
    close_prices = df['close']
    df['tenkan_sen'] = (high_prices.rolling(window=9).max() + low_prices.rolling(window=9).min()) / 2
    df['kijun_sen'] = (high_prices.rolling(window=26).max() + low_prices.rolling(window=26).min()) / 2
    df['senkou_span_A'] = ((df['tenkan_sen'] + df['kijun_sen']) / 2).shift(26)
    df['senkou_span_B'] = ((high_prices.rolling(window=52).max() + low_prices.rolling(window=52).min()) / 2).shift(26)
    return df

def fibonacci_retracement(df):
    max_price = df['high'].max()
    min_price = df['low'].min()
    difference = max_price - min_price
    df['fib_r1'] = max_price - 0.236 * difference  # 23.6%
    df['fib_r2'] = max_price - 0.382 * difference  # 38.2%
    df['fib_r3'] = max_price - 0.618 * difference  # 61.8%
    return df

def pivot_points(df):
    df['pivot_point'] = (df['high'].shift(1) + df['low'].shift(1) + df['close'].shift(1)) / 3
    df['resistance1'] = 2 * df['pivot_point'] - df['low'].shift(1)
    df['support1'] = 2 * df['pivot_point'] - df['high'].shift(1)
    return df

def chaikin_volatility(df, ema_length=10, roc_length=10):
    high_low_diff = df['high'] - df['low']
    ema_high_low = high_low_diff.ewm(span=ema_length).mean()
    roc = ((ema_high_low - ema_high_low.shift(roc_length)) / ema_high_low.shift(roc_length)) * 100
    df['chaikin_volatility'] = roc
    return df

def volume_oscillator(df, short_span=12, long_span=26, signal_span=9):
    vo = ((df['volume'].rolling(window=short_span).mean() - df['volume'].rolling(window=long_span).mean()) / df['volume'].rolling(window=long_span).mean()) * 100
    vo_signal = vo.rolling(window=signal_span).mean()
    df['volume_oscillator'], df['vo_signal'] = vo, vo_signal
    return df

def keltner_channel(df, atr_length=20, multiplier=2):
    # Середня величина True Range (ATR)
    high_low = df['high'] - df['low']
    high_close = np.abs(df['high'] - df['close'].shift())
    low_close = np.abs(df['low'] - df['close'].shift())
    tr = high_low.combine(high_close, max).combine(low_close, max)
    atr = tr.rolling(atr_length).mean()
    middle_line = df['close'].ewm(span=atr_length).mean()
    upper_line = middle_line + multiplier * atr
    lower_line = middle_line - multiplier * atr
    df['keltner_upper'], df['keltner_middle'], df['keltner_lower'] = upper_line, middle_line, lower_line
    return df

def wavelet_smooth(data, wavelet='db4', level=None):
    """ Enhanced Wavelet Smoothing with adjustable decomposition level and thresholding """

    coeffs = pywt.wavedec(data, wavelet, level=level)
    sigma = np.median(np.abs(coeffs[-level]))/0.6745
    uthresh = sigma*np.sqrt(2*np.log(len(data)))
    coeffs[1:] = [pywt.threshold(i, value=uthresh, mode='soft') for i in coeffs[1:]]
    smoothed = pywt.waverec(coeffs, wavelet)
    smoothed = smoothed[:len(data)]
    return smoothed

def detect_market_modes(df, column, min_peak_distance=30, slope_std_multiplier=2):
    df['rolling_slope'] = df[column].diff().rolling(window=min_peak_distance, center=True).mean()
    rolling_slope_std = df['rolling_slope'].std()
    slope_threshold = rolling_slope_std * slope_std_multiplier
    df['market_mode'] = 'Flat'

    df.loc[df['rolling_slope'] > slope_threshold, 'market_mode'] = 'Bull'
    df.loc[df['rolling_slope'] < -slope_threshold, 'market_mode'] = 'Bear'
    df['market_mode'] = df['market_mode'].ffill().bfill()
    bull_bear_persist_filter = (df['market_mode'].shift() != df['market_mode']) & \
                               (df['market_mode'].shift(-1) != df['market_mode'])
    df.loc[bull_bear_persist_filter, 'market_mode'] = 'Flat'
    return df

def plot_market_modes(df, column, labeler_column):
    # Create traces for the close prices and market modes
    trace_close = go.Scatter(
        x=df.index,
        y=df[column],
        name='Close Price',
        line=dict(color='blue')
    )
    trace_bull = go.Scatter(
        x=df[df[labeler_column] == 'Bull'].index,
        y=df[df[labeler_column] == 'Bull'][column],
        mode='markers',
        name='Bull Market',
        marker=dict(color='green', size=10, symbol='triangle-up')
    )
    trace_bear = go.Scatter(
        x=df[df[labeler_column] == 'Bear'].index,
        y=df[df[labeler_column] == 'Bear'][column],
        mode='markers',
        name='Bear Market',
        marker=dict(color='red', size=10, symbol='triangle-down')
    )
    
    fig = go.Figure(data=[trace_close, trace_bull, trace_bear])#, trace_flat])
    
    # Set the layout of the figure
    fig.update_layout(
        title='Market Modes Over Time',
        xaxis_title='Date',
        yaxis_title='Price',
        hovermode='closest'
    )
    
    # Show the figure
    fig.show()

def add_ti(df):
    df['SMA'] = ta.sma(df['close'], length=14)  # Проста рухома середня
    df['EMA'] = ta.ema(df['close'], length=14)  # Експоненційна рухома середня
    df['RSI_30'] = ta.rsi(df['close'], length=30)  # Індекс відносної сили
    #df['MACD'] = ta.macd(df['close'])#['MACD_12_26_9']  # Лінія MACD
    #df['MACD_signal'] = ta.macd(df['close'])['MACDs_12_26_9']  # Сигнальна лінія MACD
    df['BB'] = ta.bbands(df['close'], length=20, std=2)['BBL_20_2.0']  # Болінджер Бендс
    df['ATR'] = ta.atr(df['high'], df['low'], df['close'])
    df['RSI'] = ta.rsi(df['close'])
    df['STOCH'] = ta.stoch(df['high'], df['low'], df['close'])['STOCHd_14_3_3']
    df['ADX'] = ta.adx(df['high'], df['low'], df['close'])['ADX_14']
    df['PVO'] = ta.pvo(df['volume'])['PVO_12_26_9']
    df = ichimoku_cloud(df)
    df = fibonacci_retracement(df)
    df = pivot_points(df)
    df = chaikin_volatility(df)
    df = volume_oscillator(df)
    df = keltner_channel(df)
    df.drop(['close_wavelet_smoothed', 'rolling_slope'], axis=1, inplace=True)
    df.dropna(inplace=True)
    return df


In [5]:
df['close_wavelet_smoothed'] = wavelet_smooth(df['close'], wavelet='db4', level=5)
df = detect_market_modes(df, 'close_wavelet_smoothed', min_peak_distance=20, slope_std_multiplier=0.25)
plot_market_modes(df, column='close_wavelet_smoothed', labeler_column='market_mode')
df = add_ti(df)
df.tail()

Unnamed: 0_level_0,open,high,low,close,volume,market_mode,SMA,EMA,RSI_30,BB,...,fib_r3,pivot_point,resistance1,support1,chaikin_volatility,volume_oscillator,vo_signal,keltner_upper,keltner_middle,keltner_lower
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-07-02 23:59:59,2109.892677,2155.596496,2021.824808,2150.040364,31796210000.0,Flat,2040.458874,2126.316034,46.653418,1697.965039,...,1666.678021,2156.361333,2231.641708,2038.325148,-24.099164,-1.380344,-7.509554,2564.915494,2176.395799,1787.876104
2021-07-03 23:59:59,2150.835025,2237.567155,2117.590013,2226.114282,17433360000.0,Flat,2043.859956,2139.622467,47.718162,1710.872352,...,1666.678021,2109.153889,2196.482971,2062.711283,-27.536419,-4.638909,-7.158872,2558.175528,2181.130893,1804.086258
2021-07-04 23:59:59,2226.550382,2384.286857,2190.837703,2321.724112,18787110000.0,Flat,2049.242783,2163.902686,49.040535,1729.881898,...,1666.678021,2193.75715,2269.924287,2149.947145,-20.108931,-6.790339,-6.948064,2577.205795,2194.520723,1811.835651
2021-07-05 23:59:59,2321.922836,2321.922836,2163.041394,2198.582464,20103790000.0,Flat,2071.39528,2168.526657,47.441753,1771.050622,...,1666.678021,2298.949557,2407.061411,2213.612258,-21.742373,-7.270452,-6.724946,2581.073154,2194.907555,1808.741957
2021-07-06 23:59:59,2197.919385,2346.294874,2197.919385,2324.679449,20891860000.0,Flat,2103.518811,2189.347029,49.19625,1774.789855,...,1666.678021,2227.848898,2292.656402,2133.77496,-17.586103,-5.920007,-6.148717,2582.611312,2207.266783,1831.922255


In [7]:
! pip3 install pomegranate

Defaulting to user installation because normal site-packages is not writeable
Collecting pomegranate
  Downloading pomegranate-1.0.3-py3-none-any.whl (90 kB)
[K     |████████████████████████████████| 90 kB 1.9 MB/s eta 0:00:01
[?25hCollecting torch>=1.9.0
  Downloading torch-2.2.1-cp39-none-macosx_11_0_arm64.whl (59.7 MB)
[K     |████████████████████████████████| 59.7 MB 20.6 MB/s eta 0:00:01     |████████████▉                   | 23.9 MB 18.0 MB/s eta 0:00:02
Collecting apricot-select>=0.6.1
  Downloading apricot-select-0.6.1.tar.gz (28 kB)
Collecting networkx>=2.8.4
  Downloading networkx-3.2.1-py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 16.6 MB/s eta 0:00:01
Collecting numba>=0.43.0
  Downloading numba-0.59.0-cp39-cp39-macosx_11_0_arm64.whl (2.6 MB)
[K     |████████████████████████████████| 2.6 MB 21.5 MB/s eta 0:00:01
Collecting nose
  Downloading nose-1.3.7-py3-none-any.whl (154 kB)
[K     |████████████████████████████████| 154 kB 19.7 MB/s eta

In [6]:
def train_test_hmm(df, label_column, split_date):
    le = LabelEncoder()
    df[label_column] = le.fit_transform(df[label_column])
    train_df = df[df.index < split_date]
    test_df = df[df.index >= split_date]
    scaler = StandardScaler()
    X_train = scaler.fit_transform(train_df.drop(label_column, axis=1))
    X_test = scaler.transform(test_df.drop(label_column, axis=1))
    y_train = train_df[label_column].values
    y_test = test_df[label_column].values

    model = hmm.GaussianHMM(n_components=len(le.classes_), covariance_type="full", n_iter=100)
    model.fit(X_train)
    
    y_pred_test = model.predict(X_test)
    state_probabilities = model.predict_proba(X_test)
    print(state_probabilities)

    print("TRAIN:")
    print(classification_report(y_train, model.predict(X_train), target_names=le.classes_))

    print("TEST:")
    print(classification_report(y_test, y_pred_test, target_names=le.classes_))
    
    return test_df, y_test, y_pred_test, state_probabilities, le.classes_

# Використання функції
test_df, true_states, predicted_states, state_probabilities, state_names = train_test_hmm(df.copy(), 'market_mode', pd.to_datetime('2021-01-01'))

def plot_hmm_results(test_df, true_states, predicted_states, state_probabilities, state_names):
    # Створення фігури з 3 підграфіками
    fig = make_subplots(rows=3, cols=1, shared_xaxes=True,
                        vertical_spacing=0.02, subplot_titles=('1. Close Price with True State Markers',
                                                              '2. Close Price with Predicted State Markers',
                                                              '3. State Probabilities Over Time'))

    # Кольори для кожного стану
    colors = ['red', 'green', 'blue', 'orange', 'purple']

    # Додавання доріжок до першого підграфіку (реальна розмітка)
    for state, name in enumerate(state_names):
        fig.add_trace(go.Scatter(
            x=test_df.index[true_states == state],
            y=test_df['close'][true_states == state],
            mode='markers',
            name=f'True State: {name}',
            marker=dict(color=colors[state % len(colors)], symbol=state)
        ), row=1, col=1)

    # Додавання ціни закриття до першого підграфіку
    fig.add_trace(go.Scatter(
        x=test_df.index,
        y=test_df['close'],
        mode='lines',
        name='Close Price',
        line=dict(color='grey', width=1),
        showlegend=False
    ), row=1, col=1)

    # Додавання доріжок до другого підграфіку (передбачення моделі)
    for state, name in enumerate(state_names):
        fig.add_trace(go.Scatter(
            x=test_df.index[predicted_states == state],
            y=test_df['close'][predicted_states == state],
            mode='markers',
            name=f'Predicted State: {name}',
            marker=dict(color=colors[state % len(colors)], symbol=state),
            showlegend=False
        ), row=2, col=1)

    # Додавання ціни закриття до другого підграфіку
    fig.add_trace(go.Scatter(
        x=test_df.index,
        y=test_df['close'],
        mode='lines',
        name='Close Price',
        line=dict(color='grey', width=1),
        showlegend=False
    ), row=2, col=1)

    # Додавання доріжок до третього підграфіку (ймовірності станів)
    for state, name in enumerate(state_names):
        fig.add_trace(go.Scatter(
            x=test_df.index,
            y=state_probabilities[:, state],
            mode='lines',
            name=f'State {name} Probability',
            line=dict(color=colors[state % len(colors)], width=2),
            showlegend=False
        ), row=3, col=1)

    # Оновлення макету
    fig.update_layout(height=1200, title_text='HMM States and Probabilities')
    fig.update_yaxes(title_text="Close Price", row=1, col=1)
    fig.update_yaxes(title_text="Close Price", row=2, col=1)
    fig.update_yaxes(title_text="Probability", row=3, col=1)

    # Відображення графіку
    fig.show()
plot_hmm_results(test_df, true_states, predicted_states, state_probabilities, state_names)


Model is not converging.  Current: 132287.78660659117 is not greater than 161896.81917364593. Delta is -29609.03256705476


[[0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+000 1.00000000e+000]
 [0.00000000e+000 0.00000000e+0


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

