In [35]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import copy

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

from xgboost import XGBRegressor
from xgboost import XGBClassifier

import statsmodels.api as sm

In [36]:
ITI_amount = pd.read_csv('./data/iti/ITI_amount.csv')
ITI_count = pd.read_csv('./data/iti/ITI_count.csv')
ITI_flag = pd.read_csv('./data/iti/ITI_flag.csv')

In [37]:
ITI_amount = ITI_amount.set_index('date')
ITI_count = ITI_count.set_index('date')
ITI_flag = ITI_flag.set_index('date')

ITI_count

Unnamed: 0_level_0,ITI_count
date,Unnamed: 1_level_1
2018-01-02,-0.000855
2018-01-03,0.009956
2018-01-04,0.017666
2018-01-05,0.011361
2018-01-06,0.021734
...,...
2024-09-26,0.036592
2024-09-27,0.042505
2024-09-28,0.020111
2024-09-29,0.049036


In [38]:
reversion_df = pd.read_csv('./data/reversion/reversion.csv')
reversion_df = reversion_df[['Open datetime', 'Reversion_Signals']]

In [39]:
reversion_df.columns = ['date', 'target']
reversion_df = reversion_df.set_index('date')
reversion_df

Unnamed: 0_level_0,target
date,Unnamed: 1_level_1
2018-01-07,1
2018-01-08,0
2018-01-09,0
2018-01-10,1
2018-01-11,1
...,...
2024-09-26,0
2024-09-27,0
2024-09-28,0
2024-09-29,0


In [40]:
result_df = pd.concat([ITI_amount, ITI_count, ITI_flag, reversion_df], axis=1)
result_df = result_df.dropna()
result_df

Unnamed: 0_level_0,ITI_amount,ITI_count,ITI_flag,target
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-07,0.003948,0.010048,0.992118,1.0
2018-01-08,0.006804,0.013995,1.003961,0.0
2018-01-09,0.003948,0.009246,0.028513,0.0
2018-01-10,0.003948,0.010008,0.011397,1.0
2018-01-11,0.003948,0.007570,0.981418,1.0
...,...,...,...,...
2024-09-26,0.015272,0.036592,0.394437,0.0
2024-09-27,0.022266,0.042505,0.732642,0.0
2024-09-28,0.017516,0.020111,0.690499,0.0
2024-09-29,0.026322,0.049036,0.430052,0.0


In [41]:
y = result_df['target'].values
X = result_df[['ITI_amount', 'ITI_count', 'ITI_flag']]

In [42]:
## train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False, random_state=0)

In [43]:
X_with_const = sm.add_constant(X_test)

# OLS 회귀 적합
# ---------------------------------------------
lr = sm.OLS(y_test, X_with_const).fit()
# print("=== OLS 기본 요약 ===")
# print(lr.summary())

# 로버스트 공분산(이분산 robust: HC1) 적용
# ---------------------------------------------
lr_hc1 = lr.get_robustcov_results(cov_type='HC1')
print("\n=== 로버스트(HC1) 요약 ===")
print(lr_hc1.summary())



=== 로버스트(HC1) 요약 ===
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.016
Model:                            OLS   Adj. R-squared:                  0.010
Method:                 Least Squares   F-statistic:                     3.439
Date:                Tue, 04 Feb 2025   Prob (F-statistic):             0.0168
Time:                        18:53:31   Log-Likelihood:                -61.345
No. Observations:                 492   AIC:                             130.7
Df Residuals:                     488   BIC:                             147.5
Df Model:                           3                                         
Covariance Type:                  HC1                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0612      0.0

In [44]:
corr_df = result_df[['ITI_amount', 'ITI_count', 'ITI_flag']]
corr_df.corr()

Unnamed: 0,ITI_amount,ITI_count,ITI_flag
ITI_amount,1.0,0.144328,0.167909
ITI_count,0.144328,1.0,0.267994
ITI_flag,0.167909,0.267994,1.0


In [45]:
test_df = pd.read_csv("./data/test/test_df.csv")
test_df = test_df.set_index('date')

In [46]:
test_df['volatility_5'] = test_df['returns'].rolling(window=5).std()
test_df['volatility_10'] = test_df['returns'].rolling(window=10).std()
test_df['volatility_15'] = test_df['returns'].rolling(window=15).std()
test_df['volatility_30'] = test_df['returns'].rolling(window=30).std()
test_df['volatility_60'] = test_df['returns'].rolling(window=60).std()

pd.set_option('display.max_columns', None)

# nan값 많은 열
# 'cost-per-transaction-percent'
# 'estimated-transaction-volume-usd'
# 'estimated-transaction-volume'

test_df = test_df.drop(['cost-per-transaction-percent', 'estimated-transaction-volume-usd', 'estimated-transaction-volume'], axis=1)

test_df

Unnamed: 0_level_0,transaction_count,transaction_amount,transaction_flag,returns,close,abs_returns,volume,open,high,low,high_low,daily_avg_trades,daily_sum_trades,daily_sum_taker_buy_base_asset_volume,daily_sum_taker_buy_quote_asset_volume,daily_avg_spread,daily_quote_asset_volume,daily_avg_quote_asset_volume,daily_low,daily_high,daily_avg_price,daily_vwap,daily_avg_volume,daily_morning_returns,daily_afternoon_returns,daily_night_returns,daily_dawn_returns,daily_avg_morning_returns,daily_avg_afternoon_returns,daily_avg_night_returns,daily_avg_dawn_returns,daily_avg_morning_volume,daily_avg_afternoon_volume,daily_avg_night_volume,daily_avg_dawn_volume,daily_morning_volatility,daily_afternoon_volatility,daily_night_volatility,daily_dawn_volatility,daily_volatility,avg-block-size,blocks-size,cost-per-transaction,difficulty,fees-usd-per-transaction,hash-rate,market-price,median-confirmation-time,miners-revenue,n-transactions-excluding-popular,n-transactions-per-block,n-transactions-total,n-transactions,n-unique-addresses,output-volume,trade-volume,transaction-fees-usd,transaction-fees,n-payments-per-block,n-payments,avg-confirmation-time,volatility_5,volatility_10,volatility_15,volatility_30,volatility_60
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1
2017-01-01,0,0.000000e+00,0,,4285.08,,795.150377,4261.48,4485.39,4200.74,284.65,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2017-01-02,0,0.000000e+00,0,-0.041238,4108.37,0.041238,1199.888264,4285.08,4371.52,3938.77,432.75,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.944878,97293.875547,7.350606,3.176884e+11,0.394819,2.526780e+06,997.36,9.816667,2.138666e+06,287576.0,1829.880503,1.839618e+08,290951.0,529709.0,1.573982e+06,6.822471e+06,114872.988609,112.884147,,,,,,,,
2017-01-03,0,0.000000e+00,0,0.007694,4139.98,0.007694,381.309763,4108.37,4184.69,3850.00,334.69,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.906574,97444.110856,7.415466,3.176884e+11,0.398843,2.589950e+06,1013.42,12.350000,2.245225e+06,297536.0,1839.414634,1.842534e+08,301664.0,515024.0,1.950525e+06,8.324526e+06,120759.971352,117.508697,,,,,,,,
2017-01-04,0,0.000000e+00,0,-0.012969,4086.29,0.012969,467.083022,4139.98,4211.08,4032.62,178.46,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.986063,97592.788943,7.000531,3.176884e+11,0.644994,2.432026e+06,1037.83,10.700000,2.332472e+06,325192.0,2147.986928,1.845561e+08,328642.0,545609.0,2.482038e+06,9.340584e+06,214902.266038,193.337436,,,,,,,,
2017-01-05,1,3.936639e+13,1,-0.017201,4016.00,0.017201,691.743060,4086.29,4119.62,3911.79,207.83,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.984197,97743.656720,6.913398,3.176884e+11,0.419516,2.210933e+06,1109.16,10.916667,2.009801e+06,283920.0,2060.721429,1.848845e+08,288501.0,495910.0,2.998215e+06,2.387011e+07,121958.044759,114.991943,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-09-26,0,0.000000e+00,0,0.032018,65173.99,0.032018,28373.305930,63152.01,65839.00,62670.00,3169.00,50.478391,4361333.0,15041.98860,9.711762e+08,0.000005,1.831205e+09,21194.497638,62670.00,65839.00,64278.054721,64540.884675,0.328395,0.012580,0.011469,0.000636,0.006999,5.796342e-07,5.319940e-07,3.103639e-08,3.237245e-07,0.251817,0.666416,0.210727,0.184619,0.000042,0.000090,0.000056,0.000041,0.000061,1.597736,603443.849853,56.629444,8.840440e+13,1.169620,5.712989e+08,,9.550000,2.750464e+07,474557.0,3652.046154,1.086041e+09,474766.0,515965.0,8.415049e+05,,568078.359074,8.530836,6348.207692,825267.0,190.793990,0.018662,0.016709,0.018991,0.021195,0.030288
2024-09-27,1,1.076363e+12,1,0.009144,65769.95,0.009144,22048.804870,65173.99,66498.00,64819.90,1678.10,40.492234,3498529.0,11092.85716,7.291561e+08,0.000004,1.448852e+09,16769.122805,64819.90,66498.00,65612.974239,65711.654429,0.255195,0.001266,0.007309,-0.002210,0.002771,5.970956e-08,3.396637e-07,-1.019043e-07,1.289537e-07,0.290192,0.446167,0.118066,0.166353,0.000047,0.000071,0.000032,0.000041,0.000050,1.736523,603651.398879,63.985187,8.840440e+13,1.604248,6.020611e+08,,9.683333,2.929920e+07,451528.0,3297.248175,1.086516e+09,451723.0,549045.0,8.076291e+05,,734594.752719,11.071403,6157.496350,843577.0,61.365461,0.018662,0.014302,0.018939,0.021140,0.030183
2024-09-28,0,0.000000e+00,0,0.001339,65858.00,0.001339,9127.233160,65769.95,66260.00,65422.23,837.77,15.528970,1341703.0,4501.22534,2.960267e+08,0.000002,6.001185e+08,6945.815777,65422.23,66260.00,65755.198738,65750.063804,0.105639,-0.002362,-0.002505,0.003425,0.002795,-1.092550e-07,-1.158085e-07,1.585881e-07,1.295088e-07,0.109949,0.106194,0.103977,0.102437,0.000022,0.000024,0.000024,0.000023,0.000023,1.775522,603889.347914,53.782789,8.840440e+13,1.298335,5.669043e+08,,10.116667,2.765172e+07,507512.0,3935.713178,1.086966e+09,507707.0,477266.0,4.624474e+05,,667521.876248,10.057609,6670.612403,860509.0,75.537497,0.018062,0.013394,0.016750,0.021142,0.030160
2024-09-29,0,0.000000e+00,0,-0.003887,65602.01,0.003887,8337.741110,65858.00,66076.12,65432.00,644.12,16.359363,1413449.0,4132.93780,2.716223e+08,0.000002,5.479800e+08,6342.361270,65432.00,66076.12,65720.242186,65722.977549,0.096502,0.001493,0.001919,-0.003282,-0.004008,6.920803e-08,8.909649e-08,-1.517046e-07,-1.856774e-07,0.083138,0.122141,0.101764,0.078964,0.000018,0.000025,0.000031,0.000023,0.000025,1.641797,604118.606257,47.124922,8.840440e+13,0.721808,6.811640e+08,,10.950000,3.239494e+07,687244.0,4435.012903,1.087474e+09,687427.0,462111.0,4.892956e+05,,496190.298458,7.532710,6508.270968,1008782.0,127.982783,0.018258,0.012944,0.016518,0.021141,0.030002


In [47]:
ITI_amount = pd.read_csv("./data/iti/ITI_amount_241226.csv")
ITI_count = pd.read_csv("./data/iti/ITI_count_241226.csv")
ITI_flag = pd.read_csv("./data/iti/ITI_flag_241226.csv")

ITI_amount = ITI_amount.set_index('date')
ITI_count = ITI_count.set_index('date')
ITI_flag = ITI_flag.set_index('date')


In [48]:
# test_target_df = test_df[['volatility_5', 'volatility_10', 'volatility_15', 'volatility_30', 'volatility_60']]
test_target_df = pd.concat([ITI_amount, ITI_count, ITI_flag, test_df], axis=1)
test_target_df = test_target_df.dropna()
test_target_df['volatility_5'] = test_target_df['volatility_5'].shift(-5)
test_target_df['volatility_10'] = test_target_df['volatility_10'].shift(-10)
test_target_df['volatility_15'] = test_target_df['volatility_15'].shift(-15)
test_target_df['volatility_30'] = test_target_df['volatility_30'].shift(-30)
test_target_df['volatility_60'] = test_target_df['volatility_60'].shift(-60)

# test_target_df[['transaction_count','transaction_amount','transaction_flag']] = test_target_df[['transaction_count','transaction_amount','transaction_flag']].shift(-1)
test_target_df = test_target_df.dropna()
test_target_df

Unnamed: 0_level_0,ITI_amount,ITI_count,ITI_flag,transaction_count,transaction_amount,transaction_flag,returns,close,abs_returns,volume,open,high,low,high_low,daily_avg_trades,daily_sum_trades,daily_sum_taker_buy_base_asset_volume,daily_sum_taker_buy_quote_asset_volume,daily_avg_spread,daily_quote_asset_volume,daily_avg_quote_asset_volume,daily_low,daily_high,daily_avg_price,daily_vwap,daily_avg_volume,daily_morning_returns,daily_afternoon_returns,daily_night_returns,daily_dawn_returns,daily_avg_morning_returns,daily_avg_afternoon_returns,daily_avg_night_returns,daily_avg_dawn_returns,daily_avg_morning_volume,daily_avg_afternoon_volume,daily_avg_night_volume,daily_avg_dawn_volume,daily_morning_volatility,daily_afternoon_volatility,daily_night_volatility,daily_dawn_volatility,daily_volatility,avg-block-size,blocks-size,cost-per-transaction,difficulty,fees-usd-per-transaction,hash-rate,market-price,median-confirmation-time,miners-revenue,n-transactions-excluding-popular,n-transactions-per-block,n-transactions-total,n-transactions,n-unique-addresses,output-volume,trade-volume,transaction-fees-usd,transaction-fees,n-payments-per-block,n-payments,avg-confirmation-time,volatility_5,volatility_10,volatility_15,volatility_30,volatility_60
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1
2021-01-02,0.003682,0.005105,0.939970,4,3.744000e+14,1,0.097050,32178.33,0.097050,129993.873362,29331.70,33300.00,28946.53,4353.47,25.994468,2245922.0,67446.305246,2.110335e+09,0.000065,4.073842e+09,47150.950968,28946.53,33300.00,30694.541181,31339.198481,1.504559,0.005577,0.104250,-0.020654,0.008806,2.629675e-07,4.620890e-06,-9.149101e-07,4.097032e-07,0.709484,2.586270,2.205785,0.516695,0.000105,0.000244,0.000320,0.000087,0.000213,1.395637,319315.580633,107.041738,1.859959e+13,8.400989,1.396132e+08,29393.75,11.625000,3.190850e+07,293409.0,1967.622517,601835959.0,297111.0,711719.0,2.164453e+06,4.912580e+08,2.504284e+06,79.699067,4414.298013,666559.0,87.547595,0.046184,0.054750,0.054983,0.053360,0.054240
2021-01-03,0.000717,0.003121,0.982070,1,2.040000e+12,1,0.025536,33000.05,0.025536,120957.566750,32176.45,34778.11,31962.99,2815.12,27.427060,2369698.0,59750.332871,2.004428e+09,0.000079,4.057598e+09,46962.944739,31962.99,34778.11,33449.583730,33545.837360,1.399972,0.019398,-0.043711,0.002758,0.049112,9.142759e-07,-2.040671e-06,1.486381e-07,2.236047e-06,1.627154,1.790353,1.039721,1.142659,0.000223,0.000239,0.000205,0.000181,0.000213,1.288694,319526.405445,100.029960,1.859959e+13,8.057991,1.460853e+08,32195.46,15.083333,3.592236e+07,355123.0,2272.886076,602133929.0,359116.0,812749.0,2.211805e+06,1.393907e+09,2.893754e+06,86.419994,4742.778481,749359.0,111.085992,0.045881,0.061740,0.054920,0.054156,0.054547
2021-01-04,0.003189,0.006457,0.020893,0,0.000000e+00,0,-0.030647,31988.71,0.030647,140899.885690,33000.05,33600.00,28130.00,5470.00,30.583426,2642408.0,69088.469230,2.173435e+09,0.000121,4.429010e+09,51261.693862,28130.00,33600.00,31826.481466,31431.117838,1.630786,-0.063083,0.009144,0.031090,-0.005668,-2.759788e-06,4.608122e-07,1.437108e-06,-2.483983e-07,2.816325,1.839324,0.863119,1.004376,0.000717,0.000281,0.000198,0.000172,0.000407,1.318806,319729.832857,103.580664,1.859959e+13,9.162334,1.599542e+08,33000.78,12.883333,3.975281e+07,368932.0,2160.312139,602493027.0,373734.0,851667.0,1.922522e+06,9.758314e+08,3.516375e+06,108.705036,4622.219653,799644.0,85.474696,0.038638,0.060359,0.054454,0.054716,0.054313
2021-01-05,0.007217,0.001479,0.003064,0,0.000000e+00,0,0.061297,33949.53,0.061297,116049.997038,31989.75,34360.00,29900.00,4460.00,29.245961,2526851.0,59691.754755,1.927195e+09,0.000073,3.743617e+09,43328.902137,29900.00,34360.00,32221.813669,32258.733179,1.343171,0.044040,0.030402,0.036857,-0.048531,2.021824e-06,1.406496e-06,1.702368e-06,-2.263027e-06,1.049916,1.146717,1.583017,1.593034,0.000230,0.000200,0.000231,0.000283,0.000238,1.292713,319957.982033,99.733618,1.859959e+13,10.386146,1.451608e+08,32035.03,13.650000,3.531478e+07,349895.0,2255.356688,602865747.0,354091.0,845343.0,2.209914e+06,1.432013e+09,3.677641e+06,113.365867,5039.382166,791183.0,69.086170,0.055678,0.063853,0.052689,0.053908,0.053872
2021-01-06,0.002363,0.002785,0.979905,1,5.665082e+12,1,0.083059,36769.36,0.083059,127139.201310,33949.53,36939.21,33288.00,3651.21,29.997488,2591783.0,63052.914652,2.199632e+09,0.000071,4.431954e+09,51295.766765,33288.00,36939.21,34800.556971,34859.916813,1.471519,-0.001726,-0.000573,0.053608,0.030321,-5.582466e-08,-7.763899e-09,2.448041e-06,1.414631e-06,1.337260,1.305444,1.664178,1.579192,0.000220,0.000194,0.000247,0.000252,0.000229,1.293668,320160.890674,108.219166,1.859959e+13,11.266305,1.636526e+08,34046.67,11.775000,4.300457e+07,393193.0,2245.107345,603220075.0,397384.0,924851.0,2.131443e+06,1.026924e+09,4.477049e+06,128.518985,4849.694915,858396.0,79.048271,0.058247,0.058838,0.058357,0.052206,0.053160
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-28,0.019562,0.008105,0.698105,1,3.825959e+12,1,0.005576,34081.00,0.005576,16880.131440,33892.01,34493.33,33860.00,633.33,7.030301,607411.0,8313.280190,2.836557e+08,0.000006,5.759348e+08,6665.989044,33860.00,34493.33,34085.192405,34119.576642,0.195374,0.002629,-0.001222,-0.001555,0.005730,1.229046e-07,-5.572865e-08,-7.185187e-08,2.651291e-07,0.271870,0.240864,0.099921,0.168838,0.000052,0.000042,0.000021,0.000035,0.000039,1.669112,521791.784757,61.611301,6.103068e+13,1.174527,4.854163e+08,33911.08,8.258333,3.477237e+07,564220.0,3527.393750,910068620.0,564383.0,819540.0,6.240775e+05,1.810689e+08,6.628833e+05,19.435606,7893.693750,1262991.0,40.296982,0.013779,0.010460,0.011637,0.022210,0.022571
2023-10-29,0.015575,0.011624,0.630790,0,0.000000e+00,0,0.013054,34525.89,0.013054,20685.521760,34081.01,34750.11,33930.00,820.11,8.303491,717405.0,10053.913730,3.459321e+08,0.000006,7.115990e+08,8236.290373,33930.00,34750.11,34302.630336,34403.360240,0.239403,0.006507,0.009009,-0.001057,-0.001428,3.006828e-07,4.167965e-07,-4.786439e-08,-6.582518e-08,0.165307,0.392756,0.273698,0.125840,0.000028,0.000056,0.000046,0.000026,0.000041,1.657909,522058.985852,72.198087,6.103068e+13,1.125114,4.156377e+08,34090.68,9.433333,3.026652e+07,410116.0,2994.817518,910634065.0,410290.0,672955.0,6.320227e+05,7.156759e+07,4.716646e+05,13.593507,6823.160584,934773.0,64.459558,0.013714,0.010011,0.012716,0.022251,0.022755
2023-10-30,0.041262,0.025233,0.674546,0,0.000000e+00,0,-0.001482,34474.73,0.001482,33657.959760,34525.88,34856.00,34062.84,793.16,12.936655,1117727.0,15237.567810,5.251622e+08,0.000006,1.159772e+09,13423.285968,34062.84,34856.00,34441.100417,34457.402424,0.389560,0.010438,-0.008689,0.004866,-0.007962,4.820640e-07,-4.015109e-07,2.253512e-07,-3.690331e-07,0.395377,0.664876,0.234043,0.263943,0.000051,0.000071,0.000035,0.000046,0.000053,1.692528,522285.911705,73.863529,6.222467e+13,1.328776,4.825400e+08,34532.39,7.912500,3.508990e+07,462953.0,2969.243590,911043105.0,463202.0,779588.0,9.713817e+05,9.269944e+07,6.312536e+05,18.121008,6788.673077,1059033.0,38.213126,0.014106,0.012955,0.014699,0.022238,0.022833
2023-10-31,0.010006,0.009492,0.702082,0,0.000000e+00,0,0.004787,34639.77,0.004787,32737.898220,34474.74,34720.49,34025.00,695.49,13.646447,1179053.0,15843.795000,5.447229e+08,0.000005,1.125442e+09,13025.954219,34025.00,34720.49,34386.780249,34377.553303,0.378911,0.006011,-0.003109,0.007526,-0.005584,2.785578e-07,-1.427482e-07,3.481355e-07,-2.586188e-07,0.392795,0.420627,0.363601,0.338621,0.000047,0.000054,0.000045,0.000036,0.000046,1.679830,522549.768139,78.345673,6.246347e+13,1.447594,4.812868e+08,34501.00,9.216667,3.398251e+07,433494.0,2798.393548,911506299.0,433751.0,782062.0,7.943055e+05,1.673754e+08,6.278953e+05,18.239112,6476.464516,1003852.0,58.969025,0.014234,0.013311,0.021898,0.022270,0.022834


In [49]:
test_target_df.columns

Index(['ITI_amount', 'ITI_count', 'ITI_flag', 'transaction_count',
       'transaction_amount', 'transaction_flag', 'returns', 'close',
       'abs_returns', 'volume', 'open', 'high', 'low', 'high_low',
       'daily_avg_trades', 'daily_sum_trades',
       'daily_sum_taker_buy_base_asset_volume',
       'daily_sum_taker_buy_quote_asset_volume', 'daily_avg_spread',
       'daily_quote_asset_volume', 'daily_avg_quote_asset_volume', 'daily_low',
       'daily_high', 'daily_avg_price', 'daily_vwap', 'daily_avg_volume',
       'daily_morning_returns', 'daily_afternoon_returns',
       'daily_night_returns', 'daily_dawn_returns',
       'daily_avg_morning_returns', 'daily_avg_afternoon_returns',
       'daily_avg_night_returns', 'daily_avg_dawn_returns',
       'daily_avg_morning_volume', 'daily_avg_afternoon_volume',
       'daily_avg_night_volume', 'daily_avg_dawn_volume',
       'daily_morning_volatility', 'daily_afternoon_volatility',
       'daily_night_volatility', 'daily_dawn_volatili

In [50]:
y = test_target_df['volatility_5'].values
X = test_target_df[['ITI_amount', 'ITI_count', 'ITI_flag']]

## train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

X_with_const = sm.add_constant(X_test)

# OLS 회귀 적합
# ---------------------------------------------
lr = sm.OLS(y_test, X_with_const).fit()
print("=== OLS 기본 요약 ===")
print(lr.summary())

=== OLS 기본 요약 ===
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.007
Model:                            OLS   Adj. R-squared:                 -0.008
Method:                 Least Squares   F-statistic:                    0.4843
Date:                Tue, 04 Feb 2025   Prob (F-statistic):              0.694
Time:                        18:53:31   Log-Likelihood:                 670.27
No. Observations:                 207   AIC:                            -1333.
Df Residuals:                     203   BIC:                            -1319.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0194      0.002  

In [51]:
import time
from IPython.display import clear_output
for t in test_target_df.columns:
    # 출력 지우기 (덮어쓰기 효과)
    clear_output(wait=True)
    y = test_target_df['volatility_5'].values
    X = test_target_df[['ITI_amount', 'ITI_count', 'ITI_flag', t]]

    ## train, test 데이터 분리
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)

    X_with_const = sm.add_constant(X_test)

    # OLS 회귀 적합
    # ---------------------------------------------
    lr = sm.OLS(y_test, X_with_const).fit()
    print("=== OLS 기본 요약 ===")
    print(lr.summary())

    # # 로버스트 공분산(이분산 robust: HC1) 적용
    # # ---------------------------------------------
    # lr_hc1 = lr.get_robustcov_results(cov_type='HC1')
    # print("\n=== 로버스트(HC1) 요약 ===")
    # print(lr_hc1.summary())


    time.sleep(3)


=== OLS 기본 요약 ===
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.166
Model:                            OLS   Adj. R-squared:                  0.155
Method:                 Least Squares   F-statistic:                     15.17
Date:                Tue, 04 Feb 2025   Prob (F-statistic):           2.51e-11
Time:                        18:53:58   Log-Likelihood:                 978.94
No. Observations:                 310   AIC:                            -1948.
Df Residuals:                     305   BIC:                            -1929.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0172      0.002  

KeyboardInterrupt: 

In [396]:
reversion_df = pd.read_csv('./data/reversion/reversion_241226.csv')
reversion_df = reversion_df[['Open datetime', 'Reversion_Signals']]

reversion_df.columns = ['date', 'target']
reversion_df = reversion_df.set_index('date')

result_df = pd.concat([ITI_amount, ITI_count, ITI_flag, reversion_df], axis=1)
result_df = result_df.dropna()

y = result_df['target'].values
X = result_df[['ITI_amount', 'ITI_count', 'ITI_flag']]

## train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)

X_with_const = sm.add_constant(X_test)

# OLS 회귀 적합
# ---------------------------------------------
lr = sm.OLS(y_test, X_with_const).fit()
# print("=== OLS 기본 요약 ===")
# print(lr.summary())

# 로버스트 공분산(이분산 robust: HC1) 적용
# ---------------------------------------------
lr_hc1 = lr.get_robustcov_results(cov_type='HC1')
print("\n=== 로버스트(HC1) 요약 ===")
print(lr_hc1.summary())



=== 로버스트(HC1) 요약 ===
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.032
Model:                            OLS   Adj. R-squared:                  0.028
Method:                 Least Squares   F-statistic:                     5.719
Date:                Fri, 27 Dec 2024   Prob (F-statistic):           0.000716
Time:                        09:35:12   Log-Likelihood:                -142.79
No. Observations:                 738   AIC:                             293.6
Df Residuals:                     734   BIC:                             312.0
Df Model:                           3                                         
Covariance Type:                  HC1                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0577      0.0

In [400]:
y = test_target_df['transaction_count'].shift(-1).fillna(test_target_df['transaction_count'].mean()).values
# y = test_target_df['transaction_amount']
X = test_target_df[['ITI_amount', 'ITI_count', 'ITI_flag']]

## train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)

X_with_const = sm.add_constant(X_test)

# OLS 회귀 적합
# ---------------------------------------------
lr = sm.OLS(y_test, X_with_const).fit()
print("=== OLS 기본 요약 ===")
print(lr.summary())

=== OLS 기본 요약 ===
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.010
Model:                            OLS   Adj. R-squared:                  0.000
Method:                 Least Squares   F-statistic:                     1.017
Date:                Fri, 27 Dec 2024   Prob (F-statistic):              0.385
Time:                        09:36:13   Log-Likelihood:                -416.61
No. Observations:                 310   AIC:                             841.2
Df Residuals:                     306   BIC:                             856.2
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1217      0.148  

In [404]:
y = test_target_df['volatility_5'].values
X = test_target_df[['transaction_amount', 'transaction_count', 'transaction_flag']]

## train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)

X_with_const = sm.add_constant(X_test)

# OLS 회귀 적합
# ---------------------------------------------
lr = sm.OLS(y_test, X_with_const).fit()
print("=== OLS 기본 요약 ===")
print(lr.summary())

=== OLS 기본 요약 ===
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.006
Model:                            OLS   Adj. R-squared:                 -0.004
Method:                 Least Squares   F-statistic:                    0.6375
Date:                Fri, 27 Dec 2024   Prob (F-statistic):              0.591
Time:                        10:47:24   Log-Likelihood:                 951.77
No. Observations:                 310   AIC:                            -1896.
Df Residuals:                     306   BIC:                            -1881.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const             