In [1]:
import os
opj = os.path.join

import ccxt 
import matplotlib.pyplot as plt 
import pandas as pd 
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor 
from sklearn.metrics import f1_score, mean_squared_error, mean_absolute_error
import torch

In [2]:
df_files = [
    "crypto-gt.csv",
    "deberta.csv",
    "default.csv",
    "roberta.csv",
    "bert.csv"
]

results = {}
saveas = "experiments.csv"
checkpoint_save_dir = "ckpts"
exp_times = 10

In [3]:
def experiment(df_file, N):
    exp_name = df_file.split(".")[0]
    # load data
    chart_df = pd.read_csv(df_file)
    i1 = 0
    i2 = len(chart_df)-1
    st = "2018-02-14 14:00:00"
    en = "2022-04-15 23:00:00"
    filt = []
    for index, row in chart_df.iterrows():
        if chart_df.iloc[i1]['datetime'] < row.datetime and row.datetime < st:
            i1 = index
        if en < row.datetime and row.datetime < chart_df.iloc[i2]['datetime']:
            i2 = index
    chart_df = chart_df[i1+1:i2]
    chart_df.index = range(len(chart_df))
    
    # Train High Model
    ## train/validation/test split 
    train_size = int(chart_df.shape[0] * 0.8) 
    train_df = chart_df.iloc[:train_size,:] 

    val_size = int(chart_df.shape[0] * 0.1) 
    val_df = chart_df.iloc[train_size:train_size+val_size,:]  

    test_df = chart_df.iloc[train_size+val_size:, :] 
    
    categorical_columns = ["months", "days", "hours"]
    features = train_df.columns

    cat_idxs = [0, 1, 2] 
    cat_dims = [13, 32, 25] 

    tabnet_params = {"cat_idxs":cat_idxs, 
                     "cat_dims":cat_dims, 
                     "cat_emb_dim":1, 
                     "optimizer_fn":torch.optim.Adam} 
    
    input_columns = [] 
    for col in train_df.columns:
        if col != 'low_delta' and col != 'years' and col != 'datetime': 
            input_columns.append(col) 

    X_train = train_df[input_columns].values 
    Y_train = train_df['high_delta'].values 
    Y_train = Y_train.reshape((-1,1))

    X_val = val_df[input_columns].values
    Y_val = val_df['high_delta'].values 
    Y_val = Y_val.reshape((-1,1))

    X_test = test_df[input_columns].values 
    Y_test = test_df['high_delta'].values  
    Y_test = Y_test.reshape((-1,1))
    
    reg_high = TabNetRegressor(**tabnet_params) 

    reg_high.fit(X_train, Y_train, 
                 eval_set=[(X_val, Y_val)], 
                 max_epochs=100, 
                 patience=100)  
    
    Y_pred = reg_high.predict(X_test).flatten() 
    
    exp = f"{exp_name}-high"
    if exp not in results:
        results[exp] = []
        
    results[exp].append(
        {
            "MSE": mean_squared_error(Y_test, Y_pred),
            "MAE": mean_absolute_error(Y_test, Y_pred)
        }
    )
    
    saveas = opj(checkpoint_save_dir, f"{exp}-N")
    reg_high.save_model(saveas)
    
    # Train Low Model
    input_columns = [] 
    for col in train_df.columns:
        if col != 'high_delta' and col != 'years' and col != 'datetime': 
            input_columns.append(col) 

    X_train = train_df[input_columns].values 
    Y_train = train_df['low_delta'].values 
    Y_train = Y_train.reshape((-1,1))

    X_val = val_df[input_columns].values
    Y_val = val_df['low_delta'].values 
    Y_val = Y_val.reshape((-1,1))

    X_test = test_df[input_columns].values 
    Y_test = test_df['low_delta'].values  
    Y_test = Y_test.reshape((-1,1))
    
    reg_low = TabNetRegressor(**tabnet_params) 

    reg_low.fit(X_train, Y_train, 
                eval_set=[(X_val, Y_val)], 
                max_epochs=200, 
                patience=200)  
    
    Y_pred = reg_low.predict(X_test).flatten() 
    exp = f"{exp_name}-low"
    if exp not in results:
        results[exp] = []
        
    results[exp].append(
        {
            "MSE": mean_squared_error(Y_test, Y_pred),
            "MAE": mean_absolute_error(Y_test, Y_pred)
        }
    )
    
    saveas = opj(checkpoint_save_dir, f"{exp}-{N}")
    reg_high.save_model(saveas)

In [4]:
for df_file in df_files:
    for i in range(exp_times):
        experiment(df_file, i)

Device used : cuda
epoch 0  | loss: 0.30603 | val_0_mse: 0.00768 |  0:00:00s
epoch 1  | loss: 0.05294 | val_0_mse: 0.00075 |  0:00:01s
epoch 2  | loss: 0.013   | val_0_mse: 0.00038 |  0:00:02s
epoch 3  | loss: 0.00455 | val_0_mse: 0.00042 |  0:00:02s
epoch 4  | loss: 0.00227 | val_0_mse: 0.00023 |  0:00:03s
epoch 5  | loss: 0.00152 | val_0_mse: 0.00018 |  0:00:03s
epoch 6  | loss: 0.00132 | val_0_mse: 0.00016 |  0:00:04s
epoch 7  | loss: 0.00129 | val_0_mse: 0.00017 |  0:00:05s
epoch 8  | loss: 0.00121 | val_0_mse: 0.00016 |  0:00:05s
epoch 9  | loss: 0.00119 | val_0_mse: 0.00017 |  0:00:06s
epoch 10 | loss: 0.00116 | val_0_mse: 0.00017 |  0:00:07s
epoch 11 | loss: 0.00114 | val_0_mse: 0.00015 |  0:00:08s
epoch 12 | loss: 0.00113 | val_0_mse: 0.00016 |  0:00:09s
epoch 13 | loss: 0.00114 | val_0_mse: 0.00015 |  0:00:09s
epoch 14 | loss: 0.0011  | val_0_mse: 0.00017 |  0:00:10s
epoch 15 | loss: 0.00109 | val_0_mse: 0.00015 |  0:00:11s
epoch 16 | loss: 0.00111 | val_0_mse: 0.00015 |  0:00

epoch 37 | loss: 0.00078 | val_0_mse: 0.00013 |  0:00:30s
epoch 38 | loss: 0.00075 | val_0_mse: 0.00013 |  0:00:31s
epoch 39 | loss: 0.00075 | val_0_mse: 0.00013 |  0:00:31s
epoch 40 | loss: 0.00071 | val_0_mse: 0.00014 |  0:00:32s
epoch 41 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:33s
epoch 42 | loss: 0.00064 | val_0_mse: 0.00014 |  0:00:34s
epoch 43 | loss: 0.00063 | val_0_mse: 0.00014 |  0:00:35s
epoch 44 | loss: 0.00061 | val_0_mse: 0.00013 |  0:00:36s
epoch 45 | loss: 0.00058 | val_0_mse: 0.00015 |  0:00:37s
epoch 46 | loss: 0.00057 | val_0_mse: 0.00013 |  0:00:38s
epoch 47 | loss: 0.00056 | val_0_mse: 0.00013 |  0:00:39s
epoch 48 | loss: 0.00055 | val_0_mse: 0.00013 |  0:00:40s
epoch 49 | loss: 0.00054 | val_0_mse: 0.00013 |  0:00:41s
epoch 50 | loss: 0.00055 | val_0_mse: 0.00013 |  0:00:42s
epoch 51 | loss: 0.00054 | val_0_mse: 0.00013 |  0:00:43s
epoch 52 | loss: 0.00051 | val_0_mse: 0.00013 |  0:00:43s
epoch 53 | loss: 0.0005  | val_0_mse: 0.00013 |  0:00:44s
epoch 54 | los

KeyboardInterrupt: 

In [5]:
if not os.path.exists(checkpoint_save_dir):
    os.makedirs(checkpoint_save_dir, exist_ok=True)
with open(saveas, 'w') as f:
    for key in results.keys():
        f.write("%s, %s\n" % (key, results[key]))