In [17]:
import torch 
import numpy as np 
import pandas as pd
from pytorch_tabnet.tab_model import TabNetClassifier 
import pandas_ta as ta 
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score 
import json
import torch.nn as nn 
import ccxt
from tqdm import tqdm 
import matplotlib.pyplot as plt 
import seaborn as sns

In [43]:
with open('BTC_USDT-4h-8.json') as f:
    d = json.load(f)
    
chart_df = pd.DataFrame(d)
chart_df = chart_df.rename(columns={0:"timestamp",
                                    1:"open",
                                    2:"high",
                                    3:"low",
                                    4:"close",
                                    5:"volume"})


In [44]:
def process(df): 
    binance = ccxt.binance() 
    dates = df['timestamp'].values 
    timestamp = [] 
    for i in range(len(dates)): 
        date_string = binance.iso8601(int(dates[i])) 
        date_string = date_string[:10] + " " + date_string[11:-5] 
        timestamp.append(date_string) 
    df['datetime'] = timestamp 
    df = df.drop(columns={'timestamp'})
    return df

In [45]:
chart_df = process(chart_df)
chart_df

Unnamed: 0,open,high,low,close,volume,datetime
0,4261.48,4349.99,4261.32,4349.99,82.088865,2017-08-17 04:00:00
1,4333.32,4485.39,4333.32,4427.30,63.619882,2017-08-17 08:00:00
2,4436.06,4485.39,4333.42,4352.34,174.562001,2017-08-17 12:00:00
3,4352.33,4354.84,4200.74,4325.23,225.109716,2017-08-17 16:00:00
4,4307.56,4369.69,4258.56,4285.08,249.769913,2017-08-17 20:00:00
...,...,...,...,...,...,...
10792,22216.90,22332.00,21934.57,22272.50,21809.064630,2022-07-23 16:00:00
10793,22272.50,22559.00,22221.54,22451.07,16460.756740,2022-07-23 20:00:00
10794,22448.58,22677.37,22288.00,22331.86,14961.070670,2022-07-24 00:00:00
10795,22331.86,22839.99,22257.15,22701.60,20882.527650,2022-07-24 04:00:00


In [46]:
hours = []
days = [] 
months = [] 
years = [] 
for dt in tqdm(chart_df['datetime']):
    hour = pd.to_datetime(dt).hour 
    day = pd.to_datetime(dt).day 
    month = pd.to_datetime(dt).month 
    year = pd.to_datetime(dt).year 
    hours.append(hour) 
    days.append(day) 
    months.append(month)
    years.append(year) 

chart_df['hour'] = hours
chart_df['day'] = days 
chart_df['month'] = months 
chart_df['year'] = years 

chart_df.head()

100%|██████████| 10797/10797 [00:03<00:00, 3324.91it/s]


Unnamed: 0,open,high,low,close,volume,datetime,hour,day,month,year
0,4261.48,4349.99,4261.32,4349.99,82.088865,2017-08-17 04:00:00,4,17,8,2017
1,4333.32,4485.39,4333.32,4427.3,63.619882,2017-08-17 08:00:00,8,17,8,2017
2,4436.06,4485.39,4333.42,4352.34,174.562001,2017-08-17 12:00:00,12,17,8,2017
3,4352.33,4354.84,4200.74,4325.23,225.109716,2017-08-17 16:00:00,16,17,8,2017
4,4307.56,4369.69,4258.56,4285.08,249.769913,2017-08-17 20:00:00,20,17,8,2017


In [47]:
targets = [] 
close = chart_df['open'].values 
high = chart_df['high'].values 
low = chart_df['low'].values 

threshold = 0.0075

for i in range(close.shape[0]-1):
    high_volatility = (high[i+1]-close[i]) / close[i] 
    low_volatility = (low[i+1]-close[i]) / close[i] 
    if high_volatility >= threshold:
        targets.append(0) # long position 
    elif low_volatility <= -threshold:
        targets.append(1) # short position 
    else:
        targets.append(2) # deemed low volatility, do not trade 

targets.append(None) 

chart_df['Targets'] = targets 

chart_df.head(3)


Unnamed: 0,open,high,low,close,volume,datetime,hour,day,month,year,Targets
0,4261.48,4349.99,4261.32,4349.99,82.088865,2017-08-17 04:00:00,4,17,8,2017,0.0
1,4333.32,4485.39,4333.32,4427.3,63.619882,2017-08-17 08:00:00,8,17,8,2017,0.0
2,4436.06,4485.39,4333.42,4352.34,174.562001,2017-08-17 12:00:00,12,17,8,2017,1.0


In [48]:
# data preprocessing using pandas ta 
chart_df.set_index(pd.DatetimeIndex(chart_df["datetime"]), inplace=True)

chart_df['bop'] = chart_df.ta.bop(lookahead=False)
chart_df['ebsw'] = chart_df.ta.ebsw(lookahead=False)
chart_df['cmf'] = chart_df.ta.cmf(lookahead=False)
chart_df['vwap'] = chart_df.ta.vwap(lookahead=False)  

chart_df['rsi/100'] = chart_df.ta.rsi(lookahead=False) / 100 
chart_df['rsx/100'] = chart_df.ta.rsx(lookahead=False) / 100 

chart_df['high/low'] = chart_df['high'] / chart_df['low'] 
chart_df['close/open'] = chart_df['close'] / chart_df['open']
chart_df['high/open'] = chart_df['high'] / chart_df['open'] 
chart_df['low/open'] = chart_df['low'] / chart_df['open']

chart_df['hwma'] = chart_df.ta.hwma(lookahead=False)
chart_df['linreg'] = chart_df.ta.linreg(lookahead=False)
chart_df['hwma/close'] = chart_df['hwma'] / chart_df['close'] 
chart_df['linreg/close'] = chart_df['linreg'] / chart_df['close']


# differencing 
for l in range(1,12): 
    for col in ['open','high','low','close','volume', 'vwap']: 
        val = chart_df[col].values 
        val_ret = [None for _ in range(l)] 
        for i in range(l, len(val)):
            if val[i-l] == 0:
                ret = 1
            else:
                ret = val[i] / val[i-l] 
            val_ret.append(ret) 
        chart_df['{}_change_{}'.format(col, l)] = val_ret   



In [49]:
chart_df = chart_df.dropna() 
chart_df.head(10)

Unnamed: 0_level_0,open,high,low,close,volume,datetime,hour,day,month,year,...,low_change_10,close_change_10,volume_change_10,vwap_change_10,open_change_11,high_change_11,low_change_11,close_change_11,volume_change_11,vwap_change_11
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-08-23 16:00:00,4226.94,4259.58,4103.51,4117.07,174.637585,2017-08-23 16:00:00,16,23,8,2017,...,1.112373,1.065938,1.522754,1.075381,1.049637,1.046454,1.038863,1.025167,1.725297,1.025898
2017-08-23 20:00:00,4136.48,4178.65,4069.8,4114.01,152.616402,2017-08-23 20:00:00,20,23,8,2017,...,1.197,1.083182,0.91266,1.102681,1.03,1.0405,1.103235,1.065146,1.33074,1.074368
2017-08-24 00:00:00,4147.0,4204.55,4085.01,4113.58,123.52015,2017-08-24 00:00:00,0,24,8,2017,...,1.089313,1.063908,0.557071,1.089171,1.073518,1.086849,1.201474,1.083068,0.738662,1.100511
2017-08-24 04:00:00,4113.58,4177.64,4090.39,4153.32,123.17946,2017-08-24 04:00:00,4,24,8,2017,...,1.080168,1.076483,0.686778,1.083785,1.083068,1.06564,1.090747,1.074186,0.555535,1.08997
2017-08-24 08:00:00,4153.8,4250.94,4150.0,4202.0,171.486488,2017-08-24 08:00:00,8,24,8,2017,...,1.076951,1.054655,1.07491,1.081851,1.07431,1.065366,1.095909,1.0891,0.956111,1.090613
2017-08-24 12:00:00,4211.78,4250.9,4136.33,4222.0,146.997342,2017-08-24 12:00:00,12,24,8,2017,...,1.048757,1.04505,1.184005,1.078527,1.091635,1.035587,1.073404,1.059675,0.921407,1.084527
2017-08-24 16:00:00,4223.06,4250.77,4178.22,4216.07,111.579358,2017-08-24 16:00:00,16,24,8,2017,...,1.04094,1.028207,0.698863,1.02446,1.059872,1.046591,1.059378,1.043582,0.898727,1.080284
2017-08-24 20:00:00,4216.1,4371.68,4193.17,4316.01,110.655955,2017-08-24 20:00:00,20,24,8,2017,...,1.039117,1.059175,0.780149,1.028994,1.043589,1.059034,1.044665,1.05258,0.69308,1.028353
2017-08-25 00:00:00,4316.01,4370.0,4295.75,4312.69,141.104526,2017-08-25 00:00:00,0,25,8,2017,...,1.058849,1.017626,1.045033,1.052738,1.05667,1.061744,1.064538,1.05836,0.994818,1.060793
2017-08-25 04:00:00,4312.69,4362.0,4267.5,4329.0,101.509543,2017-08-25 04:00:00,4,25,8,2017,...,1.028152,1.031439,0.42766,1.043557,1.05661,1.028211,1.051886,1.021475,0.751789,1.052061


In [50]:
chart_df = chart_df.drop(columns={'datetime', 
                                  'open', 
                                  'high', 
                                  'low', 
                                  'close', 
                                  'volume', 
                                  'vwap'}) 

In [52]:
train_size = int(chart_df.shape[0] * 0.96) 
train_df = chart_df.iloc[:train_size,:] 

val_size = int(chart_df.shape[0] * 0.01) 
val_df = chart_df.iloc[train_size:train_size+val_size,:] 

test_df = chart_df.iloc[train_size+val_size:,:] 

train_df.shape, val_df.shape, test_df.shape 


((10326, 84), (107, 84), (324, 84))

In [54]:
input_columns = [] 
for col in train_df.columns:
    if col != 'Targets' and col != 'year':
        input_columns.append(col)         


In [55]:
X_train = train_df[input_columns].values  
Y_train = train_df['Targets'].values

X_val = val_df[input_columns].values 
Y_val = val_df['Targets'].values 

X_test = test_df[input_columns].values
Y_test = test_df['Targets'].values



X_train.shape, Y_train.shape, X_val.shape, Y_val.shape, X_test.shape, Y_test.shape  


((10326, 82), (10326,), (107, 82), (107,), (324, 82), (324,))

In [56]:
class_weights = compute_class_weight(class_weight = "balanced", 
                                     classes = np.unique(Y_train), 
                                     y = Y_train)

d = {0:class_weights[0], 1:class_weights[1], 2:class_weights[2]}

print(d)

{0: 0.6485773506689279, 1: 0.9312770562770563, 2: 2.601662887377173}


In [57]:
clf = TabNetClassifier() 

clf.fit(X_train, 
       Y_train, 
       eval_set=[(X_val, Y_val)], 
       eval_metric=['logloss', 'balanced_accuracy'],
       weights = d, 
       max_epochs=150,
       patience=150)   


Device used : cuda
epoch 0  | loss: 1.26732 | val_0_logloss: 1.06934 | val_0_balanced_accuracy: 0.45975 |  0:00:00s
epoch 1  | loss: 1.05147 | val_0_logloss: 1.08142 | val_0_balanced_accuracy: 0.40169 |  0:00:00s
epoch 2  | loss: 0.96674 | val_0_logloss: 1.10541 | val_0_balanced_accuracy: 0.4682  |  0:00:01s
epoch 3  | loss: 0.90493 | val_0_logloss: 0.96557 | val_0_balanced_accuracy: 0.51797 |  0:00:01s
epoch 4  | loss: 0.82152 | val_0_logloss: 0.74792 | val_0_balanced_accuracy: 0.53241 |  0:00:02s
epoch 5  | loss: 0.78944 | val_0_logloss: 0.81055 | val_0_balanced_accuracy: 0.63825 |  0:00:02s
epoch 6  | loss: 0.71534 | val_0_logloss: 0.69884 | val_0_balanced_accuracy: 0.61167 |  0:00:03s
epoch 7  | loss: 0.68072 | val_0_logloss: 0.6365  | val_0_balanced_accuracy: 0.52012 |  0:00:03s
epoch 8  | loss: 0.67113 | val_0_logloss: 0.66135 | val_0_balanced_accuracy: 0.45591 |  0:00:04s
epoch 9  | loss: 0.63796 | val_0_logloss: 0.63547 | val_0_balanced_accuracy: 0.52842 |  0:00:04s
epoch 10 | 

In [58]:
cnt = 0
pred = clf.predict(X_test)
for i in range(len(pred)):
    if Y_test[i] == float(pred[i]):
        cnt += 1 
        
print("accuracy : {}".format(cnt / len(pred) * 100)) 


accuracy : 77.77777777777779


In [59]:
print(f1_score(Y_test, pred, average='macro'))
print(f1_score(Y_test, pred, average='micro'))
print(f1_score(Y_test, pred, average='weighted')) 


0.6082829021979425
0.7777777777777778
0.784099913070445


In [60]:
clf.save_model("tabnet_clf_chart_only")

Successfully saved model at tabnet_clf_chart_only.zip


'tabnet_clf_chart_only.zip'

# Another Version

In [108]:
with open('BTC_USDT-4h-8.json') as f:
    d = json.load(f)
    
chart_df = pd.DataFrame(d)
chart_df = chart_df.rename(columns={0:"timestamp",
                                    1:"open",
                                    2:"high",
                                    3:"low",
                                    4:"close",
                                    5:"volume"})

In [109]:
chart_df = process(chart_df)


hours = []
days = [] 
months = [] 
years = [] 
for dt in tqdm(chart_df['datetime']):
    hour = pd.to_datetime(dt).hour 
    day = pd.to_datetime(dt).day 
    month = pd.to_datetime(dt).month 
    year = pd.to_datetime(dt).year 
    hours.append(hour) 
    days.append(day) 
    months.append(month)
    years.append(year) 

chart_df['hour'] = hours
chart_df['day'] = days 
chart_df['month'] = months 
chart_df['year'] = years 

chart_df.head()



100%|██████████| 10797/10797 [00:03<00:00, 3273.78it/s]


Unnamed: 0,open,high,low,close,volume,datetime,hour,day,month,year
0,4261.48,4349.99,4261.32,4349.99,82.088865,2017-08-17 04:00:00,4,17,8,2017
1,4333.32,4485.39,4333.32,4427.3,63.619882,2017-08-17 08:00:00,8,17,8,2017
2,4436.06,4485.39,4333.42,4352.34,174.562001,2017-08-17 12:00:00,12,17,8,2017
3,4352.33,4354.84,4200.74,4325.23,225.109716,2017-08-17 16:00:00,16,17,8,2017
4,4307.56,4369.69,4258.56,4285.08,249.769913,2017-08-17 20:00:00,20,17,8,2017


In [110]:
targets = [] 
close = chart_df['open'].values 
high = chart_df['high'].values 
low = chart_df['low'].values 

threshold = 0.0075

for i in range(close.shape[0]-1):
    high_volatility = (high[i+1]-close[i]) / close[i] 
    low_volatility = (low[i+1]-close[i]) / close[i] 
    if high_volatility >= threshold:
        targets.append(0) # long position 
    elif low_volatility <= -threshold:
        targets.append(1) # short position 
    else:
        targets.append(2) # deemed low volatility, do not trade 

targets.append(None) 

chart_df['Targets'] = targets 

chart_df.head(3)


Unnamed: 0,open,high,low,close,volume,datetime,hour,day,month,year,Targets
0,4261.48,4349.99,4261.32,4349.99,82.088865,2017-08-17 04:00:00,4,17,8,2017,0.0
1,4333.32,4485.39,4333.32,4427.3,63.619882,2017-08-17 08:00:00,8,17,8,2017,0.0
2,4436.06,4485.39,4333.42,4352.34,174.562001,2017-08-17 12:00:00,12,17,8,2017,1.0


In [111]:
# data preprocessing using pandas ta 
chart_df.set_index(pd.DatetimeIndex(chart_df["datetime"]), inplace=True)

chart_df['bop'] = chart_df.ta.bop(lookahead=False)
chart_df['ebsw'] = chart_df.ta.ebsw(lookahead=False)
chart_df['cmf'] = chart_df.ta.cmf(lookahead=False)
chart_df['vwap'] = chart_df.ta.vwap(lookahead=False)  

chart_df['rsi/100'] = chart_df.ta.rsi(lookahead=False) / 100 
chart_df['rsx/100'] = chart_df.ta.rsx(lookahead=False) / 100 

chart_df['high/low'] = chart_df['high'] / chart_df['low'] 
chart_df['close/open'] = chart_df['close'] / chart_df['open']
chart_df['high/open'] = chart_df['high'] / chart_df['open'] 
chart_df['low/open'] = chart_df['low'] / chart_df['open']

chart_df['hwma'] = chart_df.ta.hwma(lookahead=False)
chart_df['linreg'] = chart_df.ta.linreg(lookahead=False)
chart_df['hwma/close'] = chart_df['hwma'] / chart_df['close'] 
chart_df['linreg/close'] = chart_df['linreg'] / chart_df['close']

chart_df['ema_10'] = chart_df.ta.ema(length=10, lookahead=False) 
chart_df['ema_60'] = chart_df.ta.ema(length=60, lookahead=False) 
chart_df['ema_120'] = chart_df.ta.ema(length=120, lookahead=False) 


# differencing 
for l in range(1,12): 
    for col in ['open','high','low','close','volume', 'vwap', 'ema_10', 'ema_60', 'ema_120']: 
        val = chart_df[col].values 
        val_ret = [None for _ in range(l)] 
        for i in range(l, len(val)):
            if val[i-l] == 0:
                ret = 1
            else:
                ret = val[i] / val[i-l] 
            val_ret.append(ret) 
        chart_df['{}_change_{}'.format(col, l)] = val_ret   





In [112]:
chart_df = chart_df.dropna() 

chart_df = chart_df.drop(columns={'datetime', 
                                  'open', 
                                  'high', 
                                  'low', 
                                  'close', 
                                  'volume', 
                                  'vwap',
                                  'ema_10',
                                  'ema_60',
                                  'ema_120'})  



In [113]:
train_size = int(chart_df.shape[0] * 0.96) 
train_df = chart_df.iloc[:train_size,:] 

val_size = int(chart_df.shape[0] * 0.01) 
val_df = chart_df.iloc[train_size:train_size+val_size,:] 

test_df = chart_df.iloc[train_size+val_size:,:] 

train_df.shape, val_df.shape, test_df.shape 


((10239, 117), (106, 117), (321, 117))

In [114]:
input_columns = [] 
for col in train_df.columns:
    if col != 'Targets' and col != 'year':
        input_columns.append(col)         


X_train = train_df[input_columns].values  
Y_train = train_df['Targets'].values

X_val = val_df[input_columns].values 
Y_val = val_df['Targets'].values 

X_test = test_df[input_columns].values
Y_test = test_df['Targets'].values



X_train.shape, Y_train.shape, X_val.shape, Y_val.shape, X_test.shape, Y_test.shape  


((10239, 115), (10239,), (106, 115), (106,), (321, 115), (321,))

In [115]:
class_weights = compute_class_weight(class_weight = "balanced", 
                                     classes = np.unique(Y_train), 
                                     y = Y_train)

d = {0:class_weights[0], 1:class_weights[1], 2:class_weights[2]}

print(d)

{0: 0.6513358778625954, 1: 0.9284548422198041, 2: 2.579743008314437}


In [116]:
clf = TabNetClassifier() 

clf.fit(X_train, 
       Y_train, 
       eval_set=[(X_val, Y_val)], 
       eval_metric=['logloss', 'balanced_accuracy'],
       weights = d, 
       max_epochs=150,
       patience=150)   


Device used : cuda
epoch 0  | loss: 1.12747 | val_0_logloss: 1.09501 | val_0_balanced_accuracy: 0.2671  |  0:00:00s
epoch 1  | loss: 1.02403 | val_0_logloss: 1.0788  | val_0_balanced_accuracy: 0.3847  |  0:00:00s
epoch 2  | loss: 0.925   | val_0_logloss: 1.13721 | val_0_balanced_accuracy: 0.47619 |  0:00:01s
epoch 3  | loss: 0.81803 | val_0_logloss: 1.02919 | val_0_balanced_accuracy: 0.53362 |  0:00:01s
epoch 4  | loss: 0.76772 | val_0_logloss: 0.77394 | val_0_balanced_accuracy: 0.57201 |  0:00:02s
epoch 5  | loss: 0.73423 | val_0_logloss: 0.78094 | val_0_balanced_accuracy: 0.51544 |  0:00:02s
epoch 6  | loss: 0.70015 | val_0_logloss: 1.11125 | val_0_balanced_accuracy: 0.35354 |  0:00:03s
epoch 7  | loss: 0.66899 | val_0_logloss: 1.3733  | val_0_balanced_accuracy: 0.33333 |  0:00:03s
epoch 8  | loss: 0.6591  | val_0_logloss: 1.12689 | val_0_balanced_accuracy: 0.33333 |  0:00:04s
epoch 9  | loss: 0.64171 | val_0_logloss: 1.07512 | val_0_balanced_accuracy: 0.33333 |  0:00:04s
epoch 10 | 

In [117]:
cnt = 0
pred = clf.predict(X_test)
for i in range(len(pred)):
    if Y_test[i] == float(pred[i]):
        cnt += 1 
        
print("accuracy : {}".format(cnt / len(pred) * 100)) 


accuracy : 77.25856697819314


In [118]:
print(f1_score(Y_test, pred, average='macro'))
print(f1_score(Y_test, pred, average='micro'))
print(f1_score(Y_test, pred, average='weighted')) 


0.6495516781776324
0.7725856697819314
0.7890243253373738


In [119]:
clf.save_model("tabnet_clf_chart_only_v2")

Successfully saved model at tabnet_clf_chart_only_v2.zip


'tabnet_clf_chart_only_v2.zip'