In [1]:
import os
opj = os.path.join

import ccxt 
import matplotlib.pyplot as plt 
import pandas as pd 
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor 
from sklearn.metrics import f1_score, mean_squared_error, mean_absolute_error
import torch

In [2]:
df_sets = [
    [
        "data/4hours-v2", 
        [
            "deberta.csv",
            "roberta.csv",
            "bert.csv"
        ],
        "sent-4hours-v3"
    ],
    [
        "data/8hours-v2", 
        [
            "deberta.csv",
            "roberta.csv",
            "bert.csv"
        ],
        "sent-8hours-v3"
    ],
    [
        "data/12hours-v2", 
        [
            "deberta.csv",
            "roberta.csv",
            "bert.csv"
        ],
        "sent-12hours-v3"
    ],
]

exp_times = 10

In [3]:
def experiment(df_dir, df_file, N):
    exp_name = df_file.split(".")[0]
    # load data
    data_dir = opj(df_dir, df_file)
    chart_df = pd.read_csv(data_dir)
    i1 = 0
    i2 = len(chart_df)-1
    st = "2018-02-14 14:00:00"
    en = "2022-04-15 23:00:00"
    filt = []
    for index, row in chart_df.iterrows():
        if chart_df.iloc[i1]['datetime'] < row.datetime and row.datetime < st:
            i1 = index
        if en < row.datetime and row.datetime < chart_df.iloc[i2]['datetime']:
            i2 = index
    chart_df = chart_df[i1+1:i2]
    chart_df.index = range(len(chart_df))
    
    # Train High Model
    ## train/validation/test split 
    train_size = int(chart_df.shape[0] * 0.8) 
    train_df = chart_df.iloc[:train_size,:] 

    val_size = int(chart_df.shape[0] * 0.1) 
    val_df = chart_df.iloc[train_size:train_size+val_size,:]  

    test_df = chart_df.iloc[train_size+val_size:, :] 
    
    categorical_columns = ["months", "days", "hours"]
    features = train_df.columns

    cat_idxs = [0, 1, 2] 
    cat_dims = [13, 32, 25] 

    tabnet_params = {"cat_idxs":cat_idxs, 
                     "cat_dims":cat_dims, 
                     "cat_emb_dim":1, 
                     "optimizer_fn":torch.optim.Adam,
                     "seed": N,
                     "verbose": 0,
                    } 
    
    input_columns = [] 
    skips = ['high_delta', 'low_delta', 'years', 'datetime']
    for col in train_df.columns:
        if col in skips: continue
        input_columns.append(col) 

    X_train = train_df[input_columns].values 
    Y_train = train_df['high_delta'].values 
    Y_train = Y_train.reshape((-1,1))

    X_val = val_df[input_columns].values
    Y_val = val_df['high_delta'].values 
    Y_val = Y_val.reshape((-1,1))

    X_test = test_df[input_columns].values 
    Y_test = test_df['high_delta'].values  
    Y_test = Y_test.reshape((-1,1))
    
    reg_high = TabNetRegressor(**tabnet_params) 

    reg_high.fit(X_train, Y_train, 
                 eval_set=[(X_val, Y_val)], 
                 max_epochs=200, 
                 patience=200)  
    
    Y_pred = reg_high.predict(X_test).flatten() 
    
    exp = f"{exp_name}-high"
    if exp not in results:
        results[exp] = []
        
    results[exp].append(
        {
            "MSE": mean_squared_error(Y_test, Y_pred),
            "MAE": mean_absolute_error(Y_test, Y_pred)
        }
    )
    
    saveas = opj(checkpoint_save_dir, f"{exp}-{N}")
    reg_high.save_model(saveas)
    
    # Train Low Model
    input_columns = []             
    skips = ['high_delta', 'low_delta', 'years', 'datetime']
    for col in train_df.columns:
        if col in skips: continue
        input_columns.append(col) 

    X_train = train_df[input_columns].values 
    Y_train = train_df['low_delta'].values 
    Y_train = Y_train.reshape((-1,1))

    X_val = val_df[input_columns].values
    Y_val = val_df['low_delta'].values 
    Y_val = Y_val.reshape((-1,1))

    X_test = test_df[input_columns].values 
    Y_test = test_df['low_delta'].values  
    Y_test = Y_test.reshape((-1,1))
    
    reg_low = TabNetRegressor(**tabnet_params) 

    reg_low.fit(X_train, Y_train, 
                eval_set=[(X_val, Y_val)], 
                max_epochs=200, 
                patience=200)  
    
    Y_pred = reg_low.predict(X_test).flatten() 
    exp = f"{exp_name}-low"
    if exp not in results:
        results[exp] = []
        
    results[exp].append(
        {
            "MSE": mean_squared_error(Y_test, Y_pred),
            "MAE": mean_absolute_error(Y_test, Y_pred)
        }
    )
    
    saveas = opj(checkpoint_save_dir, f"{exp}-{N}")
    reg_high.save_model(saveas)

In [4]:
for df_dir, df_files, experiment_name in df_sets:
    print(df_dir, experiment_name)
    results = {}
    saveas = opj("results", f"{experiment_name}.csv")
    checkpoint_save_dir = opj("ckpts", experiment_name)
    for df_file in df_files:
        print(df_file)
        for i in range(exp_times):
            experiment(df_dir, df_file, i)
    if not os.path.exists(checkpoint_save_dir):
        os.makedirs(checkpoint_save_dir, exist_ok=True)
    with open(saveas, 'w') as f:
        for key in results.keys():
            f.write("%s, %s\n" % (key, results[key]))

data/4hours-v2 sent-4hours-v3
deberta.csv
Stop training because you reached max_epochs = 200 with best_epoch = 100 and best_val_0_mse = 0.00012
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-4hours-v3/deberta-high-0.zip
Stop training because you reached max_epochs = 200 with best_epoch = 190 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-4hours-v3/deberta-low-0.zip
Stop training because you reached max_epochs = 200 with best_epoch = 75 and best_val_0_mse = 0.00012
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-4hours-v3/deberta-high-1.zip
Stop training because you reached max_epochs = 200 with best_epoch = 72 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-4hours-v3/deberta-low-1.zip
Stop training because you reached max_epochs = 200 with best_epoch = 1

Successfully saved model at ckpts/sent-4hours-v3/roberta-high-8.zip
Stop training because you reached max_epochs = 200 with best_epoch = 87 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-4hours-v3/roberta-low-8.zip
Stop training because you reached max_epochs = 200 with best_epoch = 89 and best_val_0_mse = 0.00012
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-4hours-v3/roberta-high-9.zip
Stop training because you reached max_epochs = 200 with best_epoch = 170 and best_val_0_mse = 0.00014
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-4hours-v3/roberta-low-9.zip
bert.csv
Stop training because you reached max_epochs = 200 with best_epoch = 93 and best_val_0_mse = 0.00012
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-4hours-v3/bert-high-0.zip
Stop training because you reached max_e

Successfully saved model at ckpts/sent-8hours-v3/deberta-low-6.zip
Stop training because you reached max_epochs = 200 with best_epoch = 171 and best_val_0_mse = 0.00012
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-8hours-v3/deberta-high-7.zip
Stop training because you reached max_epochs = 200 with best_epoch = 170 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-8hours-v3/deberta-low-7.zip
Stop training because you reached max_epochs = 200 with best_epoch = 192 and best_val_0_mse = 0.00012
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-8hours-v3/deberta-high-8.zip
Stop training because you reached max_epochs = 200 with best_epoch = 95 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-8hours-v3/deberta-low-8.zip
Stop training because you reached max_epochs

Successfully saved model at ckpts/sent-8hours-v3/bert-high-5.zip
Stop training because you reached max_epochs = 200 with best_epoch = 70 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-8hours-v3/bert-low-5.zip
Stop training because you reached max_epochs = 200 with best_epoch = 86 and best_val_0_mse = 0.00012
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-8hours-v3/bert-high-6.zip
Stop training because you reached max_epochs = 200 with best_epoch = 92 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-8hours-v3/bert-low-6.zip
Stop training because you reached max_epochs = 200 with best_epoch = 182 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-8hours-v3/bert-high-7.zip
Stop training because you reached max_epochs = 200 with best

Successfully saved model at ckpts/sent-12hours-v3/roberta-low-3.zip
Stop training because you reached max_epochs = 200 with best_epoch = 194 and best_val_0_mse = 0.00012
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-12hours-v3/roberta-high-4.zip
Stop training because you reached max_epochs = 200 with best_epoch = 115 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-12hours-v3/roberta-low-4.zip
Stop training because you reached max_epochs = 200 with best_epoch = 39 and best_val_0_mse = 0.00012
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-12hours-v3/roberta-high-5.zip
Stop training because you reached max_epochs = 200 with best_epoch = 76 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sent-12hours-v3/roberta-low-5.zip
Stop training because you reached max_ep