In [3]:
import pandas as pd
import numpy as np
from Util.utilities import *
from Util.price_features import *
from sklearn.ensemble import RandomForestClassifier


In [4]:
# Data
df_ohlcv = pd.read_csv('Analysis/data/full_5m_futures/XRPUSDT_futures_5m_202001_202404.csv', index_col=0)
df_ohlcv['barID'] = df_ohlcv.reset_index().index.values
df_ohlcv.set_index('Time', inplace=True)
df_ohlcv.index = pd.to_datetime(df_ohlcv.index, unit='ms')

In [5]:
# Bollinger Sampling
lookback = 100
feat = getBollinger(df_ohlcv.Close, lookback)

In [6]:
events = getCrossingEvents_dynamicExit(feat, df_ohlcv, -2.75, 0, 'downward', 'upward', 0)
ret = get_lrets(events, df_ohlcv, commission = 0.001, betSize = 1)
Y = pd.DataFrame(index =ret.index)
Y['ret'] = ret
Y['bin'] = (ret>0).astype(int)

In [7]:
# Feature Library
# 13개
def add_featureSetA (X, tEvent, df, scale, lookback):
    print(f'Adding Feature Set A with lookback={lookback} and scale {scale}')
    close_path = getPathMatrix(df.Close, tEvent, nbars = lookback, scale = scale)
    lrets_path = np.log(close_path).diff(axis=1)
    
    X[f'sharpe1_{scale}'] = (lrets_path.sum(axis=1)/np.sqrt((lrets_path**2).sum(axis=1)))
    X[f'sharpe2_{scale}'] = (lrets_path.sum(axis=1)/lrets_path.std(axis=1))
    X[f'returns_{scale}'] = lrets_path.sum(axis=1)
    X[f'std_{scale}'] = lrets_path.std(axis=1)
    X[f'vol_{scale}'] = np.sqrt((lrets_path**2).sum(axis=1))

    X[f'blgr_{scale}'] = getBollinger(df.Close, lookback*scale)[tEvent]
    X[f'trend_{scale}'] = getTrend(tEvent, df.Close, lookback, scale = scale, use_log=True)
    X[f'trendblgr_{scale}'] = getTrendBlgr(tEvent, df.Close, lookback, scale = scale, use_log=True)
    X[f'willR_{scale}'] = williamsR(df.Close, lookback*scale, nFrac=10)[tEvent]

    X[f'ar1_{scale}'] = getAR1(tEvent, df.Close, lookback, scale=scale, use_log=True)
    X[f'adf_{scale}'] = getADF(tEvent, df.Close, lookback, scale=scale,  constant ='c', lags = int(lookback*0.1),)                      

    X[f'macd_norm1_{scale}'] = getMACD_norm(df.Close, span_short = scale*lookback//4, span_long = scale*lookback//2, use_ewm = False)[tEvent]
    X[f'macd_norm2_{scale}'] = getMACD_norm(df.Close, span_short = scale*lookback//2, span_long = scale*lookback, use_ewm = False)[tEvent]
    return X

# 총 30개
def add_featureSetB(X, tEvent, df, scale, lookback):
    print(f'Adding Feature Set B with lookback={lookback} and scale = {scale}')
    import ta
    lookback = lookback*scale
    
    # # Volume (2)
    X[f'cmf_{scale}'] = ta.volume.ChaikinMoneyFlowIndicator(high=df['High'], low=df['Low'], close=df['Close'], volume=df['Volume'], window=lookback).chaikin_money_flow().loc[tEvent]
    X[f'mfi_{scale}'] = ta.volume.MFIIndicator(high=df['High'], low=df['Low'], close=df['Close'], volume=df['Volume'], window=lookback).money_flow_index().loc[tEvent]

    # Volatility Indicators (6)
    X[f'bbw_{scale}'] = ta.volatility.BollingerBands(close=df['Close'], window=lookback).bollinger_wband().loc[tEvent]
    X[f'bbp_{scale}'] = ta.volatility.BollingerBands(close=df['Close'], window=lookback).bollinger_pband().loc[tEvent]
    X[f'kcw_{scale}'] = ta.volatility.KeltnerChannel(close=df['Close'], high=df['High'], low=df['Low'], window=lookback).keltner_channel_wband().loc[tEvent]
    X[f'dcw_{scale}'] = ta.volatility.DonchianChannel(high=df['High'], low=df['Low'], close=df['Close'], window=lookback).donchian_channel_wband().loc[tEvent]
    X[f'dcp_{scale}'] = ta.volatility.DonchianChannel(high=df['High'], low=df['Low'], close=df['Close'], window=lookback).donchian_channel_pband().loc[tEvent]
    X[f'ui_{scale}'] = ta.volatility.UlcerIndex(close=df['Close'], window=lookback).ulcer_index().loc[tEvent]

    # Trend Indicators (12)
    X[f'vortex_ind_diff_{scale}'] = ta.trend.VortexIndicator(high=df['High'], low=df['Low'], close=df['Close'], window=lookback).vortex_indicator_diff().loc[tEvent]
    X[f'trix_{scale}'] = ta.trend.TRIXIndicator(close=df['Close'], window=lookback).trix().loc[tEvent]
    X[f'kst_{scale}'] = ta.trend.KSTIndicator(close=df['Close'], window1=lookback, window2=lookback*2, window3=lookback*3, window4=lookback*4).kst().loc[tEvent]
    X[f'kst_sig_{scale}'] = ta.trend.KSTIndicator(close=df['Close'], window1=lookback, window2=lookback*2, window3=lookback*3, window4=lookback*4).kst_sig().loc[tEvent]
    X[f'kst_diff_{scale}'] = ta.trend.KSTIndicator(close=df['Close'], window1=lookback, window2=lookback*2, window3=lookback*3, window4=lookback*4).kst_diff().loc[tEvent]
    X[f'stc_{scale}'] = ta.trend.STCIndicator(close=df['Close'], window_slow=lookback, window_fast=lookback//2).stc().loc[tEvent]
    X[f'adx_{scale}'] = ta.trend.ADXIndicator(high=df['High'], low=df['Low'], close=df['Close'], window=lookback).adx().loc[tEvent]
    X[f'adx_pos_{scale}'] = ta.trend.ADXIndicator(high=df['High'], low=df['Low'], close=df['Close'], window=lookback).adx_pos().loc[tEvent]
    X[f'adx_neg_{scale}'] = ta.trend.ADXIndicator(high=df['High'], low=df['Low'], close=df['Close'], window=lookback).adx_neg().loc[tEvent]
    X[f'aroon_up_{scale}'] = ta.trend.AroonIndicator(high=df['High'], low=df['Low'], window=lookback).aroon_up().loc[tEvent]
    X[f'aroon_down_{scale}'] = ta.trend.AroonIndicator(high=df['High'], low=df['Low'], window=lookback).aroon_down().loc[tEvent]
    X[f'aroon_ind_{scale}'] = ta.trend.AroonIndicator(high=df['High'], low=df['Low'], window=lookback).aroon_indicator().loc[tEvent]

    # Momentum Indicators (10)
    X[f'stoch_rsi_{scale}'] = ta.momentum.StochRSIIndicator(close=df['Close'], window=lookback).stochrsi().loc[tEvent]
    X[f'stoch_rsi_k_{scale}'] = ta.momentum.StochRSIIndicator(close=df['Close'], window=lookback).stochrsi_k().loc[tEvent]
    X[f'stoch_rsi_d_{scale}'] = ta.momentum.StochRSIIndicator(close=df['Close'], window=lookback).stochrsi_d().loc[tEvent]
    X[f'tsi_{scale}'] = ta.momentum.TSIIndicator(close=df['Close'], window_slow=lookback, window_fast=lookback//2).tsi().loc[tEvent]
    X[f'uo_{scale}'] = ta.momentum.UltimateOscillator(high=df['High'], low=df['Low'], close=df['Close'], window1=lookback, window2=lookback*2, window3=lookback*3).ultimate_oscillator().loc[tEvent]
    X[f'stoch_{scale}'] = ta.momentum.StochasticOscillator(high=df['High'], low=df['Low'], close=df['Close'], window=lookback).stoch().loc[tEvent]
    X[f'stoch_sig_{scale}'] = ta.momentum.StochasticOscillator(high=df['High'], low=df['Low'], close=df['Close'], window=lookback).stoch_signal().loc[tEvent]
    X[f'wr_{scale}'] = ta.momentum.WilliamsRIndicator(high=df['High'], low=df['Low'], close=df['Close'], lbp=lookback).williams_r().loc[tEvent]
    X[f'ppo_{scale}'] = ta.momentum.PercentagePriceOscillator(close=df['Close'], window_slow=lookback, window_fast=lookback//2, window_sign = lookback//3).ppo().loc[tEvent]
    X[f'ppo_signal_{scale}'] = ta.momentum.PercentagePriceOscillator(close=df['Close'], window_slow=lookback, window_fast=lookback//2, window_sign = lookback//3).ppo_signal().loc[tEvent]

    return X


In [8]:
# Build Features
X1 = pd.DataFrame(index = Y.index)
X2 = pd.DataFrame(index = Y.index)

df = df_ohlcv
lookback = 100

scale = 1
X1 = add_featureSetA(X1, Y.index, df, scale, lookback)
X2 = add_featureSetB(X2, Y.index, df, scale, lookback)

scale = 4
X1 = add_featureSetA(X1, Y.index, df, scale, lookback)
X2 = add_featureSetB(X2, Y.index, df, scale, lookback)

scale = 10
X1 = add_featureSetA(X1, Y.index, df, scale, lookback)
X2 = add_featureSetB(X2, Y.index, df, scale, lookback)

scale = 40
X1 = add_featureSetA(X1, Y.index, df, scale, lookback)
X2 = add_featureSetB(X2, Y.index, df, scale, lookback)

Adding Feature Set A with lookback=100 and scale 1
Adding Feature Set B with lookback=100 and scale = 1
Adding Feature Set A with lookback=100 and scale 4
Adding Feature Set B with lookback=100 and scale = 4
Adding Feature Set A with lookback=100 and scale 10
Adding Feature Set B with lookback=100 and scale = 10
Adding Feature Set A with lookback=100 and scale 40
Adding Feature Set B with lookback=100 and scale = 40


  X[f'kst_{scale}'] = ta.trend.KSTIndicator(close=df['Close'], window1=lookback, window2=lookback*2, window3=lookback*3, window4=lookback*4).kst().loc[tEvent]
  X[f'kst_sig_{scale}'] = ta.trend.KSTIndicator(close=df['Close'], window1=lookback, window2=lookback*2, window3=lookback*3, window4=lookback*4).kst_sig().loc[tEvent]
  X[f'kst_diff_{scale}'] = ta.trend.KSTIndicator(close=df['Close'], window1=lookback, window2=lookback*2, window3=lookback*3, window4=lookback*4).kst_diff().loc[tEvent]
  X[f'stc_{scale}'] = ta.trend.STCIndicator(close=df['Close'], window_slow=lookback, window_fast=lookback//2).stc().loc[tEvent]
  X[f'adx_{scale}'] = ta.trend.ADXIndicator(high=df['High'], low=df['Low'], close=df['Close'], window=lookback).adx().loc[tEvent]
  X[f'adx_pos_{scale}'] = ta.trend.ADXIndicator(high=df['High'], low=df['Low'], close=df['Close'], window=lookback).adx_pos().loc[tEvent]
  X[f'adx_neg_{scale}'] = ta.trend.ADXIndicator(high=df['High'], low=df['Low'], close=df['Close'], window=loo

In [9]:
X = pd.concat([X1,X2], axis = 1)
Y['cont'] = Y['ret'] # Return, Slope, Sharpe
Y['bin'] = (Y['cont']>0).astype(int)
X.shape, Y.shape

((1503, 172), (1503, 3))

In [10]:
X_clean = X.replace([np.inf, -np.inf], np.nan)
X_clean = X_clean.dropna()
intersected_indices = X_clean.index.intersection(Y.index)
Y_clean = Y.loc[intersected_indices]
X_clean = X_clean.loc[intersected_indices]
print(X_clean.shape, Y_clean.shape)

(1453, 172) (1453, 3)


In [11]:
# Below for deployed model
# trainStart, trainEnd = pd.to_datetime("2021-01-01"), pd.to_datetime("2024-04-30"); trainDays = (trainEnd - trainStart).days

# This is for calculate returns for portfolio backtesting only
trainStart, trainEnd = pd.to_datetime("2021-01-01"), pd.to_datetime("2022-12-15"); trainDays = (trainEnd - trainStart).days
testStart, testEnd = pd.to_datetime("2023-01-01"), pd.to_datetime("2024-04-30"); testDays = (testEnd - testStart).days

In [12]:
# X_train, X_test, Y_train, Y_test = train_test_split(X_clean, Y_clean, test_size=0.15, shuffle=False)
X_train, X_test, Y_train, Y_test = X_clean.loc[trainStart:trainEnd], X_clean.loc[testStart:testEnd], Y_clean.loc[trainStart:trainEnd], Y_clean.loc[testStart:testEnd]
print('default precision:', Y_test[Y_test['cont']>0]['cont'].sum()/Y_test['cont'].abs().sum())

default precision: 0.5629937597386255


In [13]:
# RF Simple
clf = RandomForestClassifier(n_estimators=500, max_depth = 1, min_samples_leaf = 0.3, max_features=X.shape[1]//3,random_state=2, bootstrap = True, n_jobs=-1,)
clf.fit(X_train, Y_train.bin, sample_weight=Y_train.cont.abs())
y_proba_train = clf.predict_proba(X_train)[:,1]
y_proba_test = clf.predict_proba(X_test)[:,1]

In [14]:
X.shape

(1503, 172)

In [37]:
used_features = set()
for tree in clf.estimators_:
    tree_features = tree.tree_.feature
    used_features.update(tree_features[tree_features >= 0])
if isinstance(X_train, pd.DataFrame):
    used_feature_names = [X_train.columns[i] for i in used_features]
else:
    used_feature_names = list(used_features)
X_train_ = X_train.loc[:,used_feature_names]
X_test_ = X_test.loc[:,used_feature_names]
# RF Simple
clf = RandomForestClassifier(n_estimators= 500, max_depth = 1, min_samples_leaf = 0.3, max_features='sqrt',random_state=1, bootstrap = True, n_jobs=-1,)
clf.fit(X_train_, Y_train.bin, sample_weight=Y_train.cont.abs())
y_proba_train = clf.predict_proba(X_train_)[:,1]
y_proba_test = clf.predict_proba(X_test_)[:,1]

In [38]:
X_train_.shape
# 36 Features

(687, 41)

In [64]:
X_train_.columns

Index(['sharpe1_1', 'sharpe2_1', 'returns_1', 'std_1', 'vol_1', 'blgr_1',
       'trend_1', 'trendblgr_1', 'willR_1', 'ar1_1', 'adf_1', 'macd_norm1_1',
       'macd_norm2_1', 'sharpe1_4', 'sharpe2_4', 'returns_4', 'std_4', 'vol_4',
       'blgr_4', 'trend_4', 'trendblgr_4', 'willR_4', 'ar1_4', 'adf_4',
       'macd_norm1_4', 'macd_norm2_4', 'sharpe1_10', 'sharpe2_10',
       'returns_10', 'std_10', 'vol_10', 'blgr_10', 'trend_10', 'trendblgr_10',
       'willR_10', 'ar1_10'],
      dtype='object')

In [29]:
'sharpe1_1', 'sharpe2_1', 'returns_1', 'std_1', 'vol_1', 'blgr_1',
       'trend_1', 'trendblgr_1', 'willR_1', 'ar1_1', 'adf_1', 'macd_norm1_1',
       'macd_norm2_1', 

'sharpe1_4', 'sharpe2_4', 'returns_4', 'std_4', 'vol_4',
       'blgr_4', 'trend_4', 'trendblgr_4', 'willR_4', 'ar1_4', 'adf_4',
       'macd_norm1_4', 'macd_norm2_4', 

'sharpe1_10', 'sharpe2_10',
       'returns_10', 'std_10', 'vol_10', 'blgr_10', 'trend_10', 'trendblgr_10',
       'willR_10', 'ar1_10'

IndentationError: unexpected indent (3413639109.py, line 2)

In [63]:
# Save Model
import pickle
with open('xrp_model_v240524.pkl', 'wb') as f:
    pickle.dump(clf, f)

In [45]:
# Save return
import pickle
with open('Analysis/[S1] XRP_Reversion/events.pkl', 'wb') as f:
    pickle.dump(events, f)

In [2]:
ret

NameError: name 'ret' is not defined