In [1]:
import os
opj = os.path.join

import ccxt 
import matplotlib.pyplot as plt 
import pandas as pd 
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor 
from sklearn.metrics import f1_score, mean_squared_error, mean_absolute_error
import torch

In [2]:
df_dir = "data/8hours"
df_files = [
    "crypto-gt.csv",
    "deberta.csv",
    "roberta.csv",
    "bert.csv"
]

experiment_name = "no_neutral_8hours"
saveas = opj("results", f"{experiment_name}.csv")
checkpoint_save_dir = opj("ckpts", experiment_name)
exp_times = 10
results = {}

In [3]:
def experiment(df_dir, df_file, N):
    exp_name = df_file.split(".")[0]
    # load data
    data_dir = opj(df_dir, df_file)
    chart_df = pd.read_csv(data_dir)
    i1 = 0
    i2 = len(chart_df)-1
    st = "2018-02-14 14:00:00"
    en = "2022-04-15 23:00:00"
    filt = []
    for index, row in chart_df.iterrows():
        if chart_df.iloc[i1]['datetime'] < row.datetime and row.datetime < st:
            i1 = index
        if en < row.datetime and row.datetime < chart_df.iloc[i2]['datetime']:
            i2 = index
    chart_df = chart_df[i1+1:i2]
    chart_df.index = range(len(chart_df))
    
    # Train High Model
    ## train/validation/test split 
    train_size = int(chart_df.shape[0] * 0.8) 
    train_df = chart_df.iloc[:train_size,:] 

    val_size = int(chart_df.shape[0] * 0.1) 
    val_df = chart_df.iloc[train_size:train_size+val_size,:]  

    test_df = chart_df.iloc[train_size+val_size:, :] 
    
    categorical_columns = ["months", "days", "hours"]
    features = train_df.columns

    cat_idxs = [0, 1, 2] 
    cat_dims = [13, 32, 25] 

    tabnet_params = {"cat_idxs":cat_idxs, 
                     "cat_dims":cat_dims, 
                     "cat_emb_dim":1, 
                     "optimizer_fn":torch.optim.Adam,
                     "seed": N
                    } 
    
    input_columns = [] 
    skips = ['low_delta', 'years', 'datetime', 'sent_2']
    for col in train_df.columns:
        if col in skips: continue
        input_columns.append(col) 

    X_train = train_df[input_columns].values 
    Y_train = train_df['high_delta'].values 
    Y_train = Y_train.reshape((-1,1))

    X_val = val_df[input_columns].values
    Y_val = val_df['high_delta'].values 
    Y_val = Y_val.reshape((-1,1))

    X_test = test_df[input_columns].values 
    Y_test = test_df['high_delta'].values  
    Y_test = Y_test.reshape((-1,1))
    
    reg_high = TabNetRegressor(**tabnet_params) 

    reg_high.fit(X_train, Y_train, 
                 eval_set=[(X_val, Y_val)], 
                 max_epochs=100, 
                 patience=100)  
    
    Y_pred = reg_high.predict(X_test).flatten() 
    
    exp = f"{exp_name}-high"
    if exp not in results:
        results[exp] = []
        
    results[exp].append(
        {
            "MSE": mean_squared_error(Y_test, Y_pred),
            "MAE": mean_absolute_error(Y_test, Y_pred)
        }
    )
    
    saveas = opj(checkpoint_save_dir, f"{exp}-{N}")
    reg_high.save_model(saveas)
    
    # Train Low Model
    input_columns = []             
    skips = ['high_delta', 'years', 'datetime', 'sent_2']
    for col in train_df.columns:
        if col in skips: continue
        input_columns.append(col) 

    X_train = train_df[input_columns].values 
    Y_train = train_df['low_delta'].values 
    Y_train = Y_train.reshape((-1,1))

    X_val = val_df[input_columns].values
    Y_val = val_df['low_delta'].values 
    Y_val = Y_val.reshape((-1,1))

    X_test = test_df[input_columns].values 
    Y_test = test_df['low_delta'].values  
    Y_test = Y_test.reshape((-1,1))
    
    reg_low = TabNetRegressor(**tabnet_params) 

    reg_low.fit(X_train, Y_train, 
                eval_set=[(X_val, Y_val)], 
                max_epochs=200, 
                patience=200)  
    
    Y_pred = reg_low.predict(X_test).flatten() 
    exp = f"{exp_name}-low"
    if exp not in results:
        results[exp] = []
        
    results[exp].append(
        {
            "MSE": mean_squared_error(Y_test, Y_pred),
            "MAE": mean_absolute_error(Y_test, Y_pred)
        }
    )
    
    saveas = opj(checkpoint_save_dir, f"{exp}-{N}")
    reg_high.save_model(saveas)

In [None]:
for df_file in df_files:
    for i in range(exp_times):
        experiment(df_dir, df_file, i)

Device used : cuda
epoch 0  | loss: 1.15256 | val_0_mse: 0.02786 |  0:00:00s
epoch 1  | loss: 0.14336 | val_0_mse: 0.00461 |  0:00:01s
epoch 2  | loss: 0.05778 | val_0_mse: 0.00263 |  0:00:02s
epoch 3  | loss: 0.0229  | val_0_mse: 0.00235 |  0:00:03s
epoch 4  | loss: 0.01241 | val_0_mse: 0.00145 |  0:00:04s
epoch 5  | loss: 0.00709 | val_0_mse: 0.00068 |  0:00:04s
epoch 6  | loss: 0.00366 | val_0_mse: 0.00051 |  0:00:05s
epoch 7  | loss: 0.00274 | val_0_mse: 0.00037 |  0:00:06s
epoch 8  | loss: 0.00236 | val_0_mse: 0.00033 |  0:00:07s
epoch 9  | loss: 0.00223 | val_0_mse: 0.00026 |  0:00:07s
epoch 10 | loss: 0.00184 | val_0_mse: 0.00022 |  0:00:08s
epoch 11 | loss: 0.00167 | val_0_mse: 0.00021 |  0:00:09s
epoch 12 | loss: 0.00156 | val_0_mse: 0.00021 |  0:00:10s
epoch 13 | loss: 0.00152 | val_0_mse: 0.00022 |  0:00:10s
epoch 14 | loss: 0.00156 | val_0_mse: 0.00018 |  0:00:11s
epoch 15 | loss: 0.0014  | val_0_mse: 0.00019 |  0:00:12s
epoch 16 | loss: 0.0014  | val_0_mse: 0.00019 |  0:00

epoch 37 | loss: 0.00074 | val_0_mse: 0.00013 |  0:00:29s
epoch 38 | loss: 0.00073 | val_0_mse: 0.00014 |  0:00:30s
epoch 39 | loss: 0.00072 | val_0_mse: 0.00014 |  0:00:30s
epoch 40 | loss: 0.00071 | val_0_mse: 0.00012 |  0:00:31s
epoch 41 | loss: 0.00069 | val_0_mse: 0.00013 |  0:00:32s
epoch 42 | loss: 0.0007  | val_0_mse: 0.00013 |  0:00:33s
epoch 43 | loss: 0.00068 | val_0_mse: 0.00013 |  0:00:34s
epoch 44 | loss: 0.00066 | val_0_mse: 0.00013 |  0:00:34s
epoch 45 | loss: 0.00065 | val_0_mse: 0.00012 |  0:00:35s
epoch 46 | loss: 0.00065 | val_0_mse: 0.00012 |  0:00:36s
epoch 47 | loss: 0.00064 | val_0_mse: 0.00013 |  0:00:36s
epoch 48 | loss: 0.00064 | val_0_mse: 0.00013 |  0:00:37s
epoch 49 | loss: 0.00062 | val_0_mse: 0.00012 |  0:00:38s
epoch 50 | loss: 0.00062 | val_0_mse: 0.00012 |  0:00:39s
epoch 51 | loss: 0.00062 | val_0_mse: 0.00012 |  0:00:39s
epoch 52 | loss: 0.0006  | val_0_mse: 0.00013 |  0:00:40s
epoch 53 | loss: 0.0006  | val_0_mse: 0.00013 |  0:00:41s
epoch 54 | los

epoch 179| loss: 0.0003  | val_0_mse: 9e-05   |  0:02:19s
epoch 180| loss: 0.00029 | val_0_mse: 0.0001  |  0:02:20s
epoch 181| loss: 0.00029 | val_0_mse: 0.0001  |  0:02:21s
epoch 182| loss: 0.0003  | val_0_mse: 0.0001  |  0:02:22s
epoch 183| loss: 0.00032 | val_0_mse: 0.0001  |  0:02:22s
epoch 184| loss: 0.00029 | val_0_mse: 0.0001  |  0:02:23s
epoch 185| loss: 0.00029 | val_0_mse: 0.0001  |  0:02:24s
epoch 186| loss: 0.0003  | val_0_mse: 0.0001  |  0:02:25s
epoch 187| loss: 0.0003  | val_0_mse: 9e-05   |  0:02:25s
epoch 188| loss: 0.00029 | val_0_mse: 0.0001  |  0:02:26s
epoch 189| loss: 0.00029 | val_0_mse: 0.00011 |  0:02:27s
epoch 190| loss: 0.00029 | val_0_mse: 0.00011 |  0:02:28s
epoch 191| loss: 0.00029 | val_0_mse: 0.00011 |  0:02:28s
epoch 192| loss: 0.00029 | val_0_mse: 0.0001  |  0:02:29s
epoch 193| loss: 0.00028 | val_0_mse: 0.0001  |  0:02:30s
epoch 194| loss: 0.00029 | val_0_mse: 0.00011 |  0:02:30s
epoch 195| loss: 0.00028 | val_0_mse: 0.0001  |  0:02:31s
epoch 196| los

epoch 12 | loss: 0.00117 | val_0_mse: 0.00017 |  0:00:10s
epoch 13 | loss: 0.00114 | val_0_mse: 0.00017 |  0:00:10s
epoch 14 | loss: 0.00113 | val_0_mse: 0.00017 |  0:00:11s
epoch 15 | loss: 0.00114 | val_0_mse: 0.00017 |  0:00:12s
epoch 16 | loss: 0.00111 | val_0_mse: 0.00016 |  0:00:12s
epoch 17 | loss: 0.00111 | val_0_mse: 0.00017 |  0:00:13s
epoch 18 | loss: 0.00109 | val_0_mse: 0.00016 |  0:00:14s
epoch 19 | loss: 0.00109 | val_0_mse: 0.00016 |  0:00:15s
epoch 20 | loss: 0.00107 | val_0_mse: 0.00017 |  0:00:16s
epoch 21 | loss: 0.00107 | val_0_mse: 0.00017 |  0:00:16s
epoch 22 | loss: 0.00105 | val_0_mse: 0.00017 |  0:00:17s
epoch 23 | loss: 0.00105 | val_0_mse: 0.00017 |  0:00:18s
epoch 24 | loss: 0.00104 | val_0_mse: 0.00018 |  0:00:19s
epoch 25 | loss: 0.00102 | val_0_mse: 0.00017 |  0:00:19s
epoch 26 | loss: 0.00103 | val_0_mse: 0.00017 |  0:00:20s
epoch 27 | loss: 0.00101 | val_0_mse: 0.00017 |  0:00:21s
epoch 28 | loss: 0.00102 | val_0_mse: 0.00018 |  0:00:22s
epoch 29 | los

epoch 154| loss: 0.00029 | val_0_mse: 0.00017 |  0:02:02s
epoch 155| loss: 0.00029 | val_0_mse: 0.00015 |  0:02:03s
epoch 156| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:04s
epoch 157| loss: 0.00028 | val_0_mse: 0.00013 |  0:02:05s
epoch 158| loss: 0.00028 | val_0_mse: 0.00013 |  0:02:05s
epoch 159| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:06s
epoch 160| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:07s
epoch 161| loss: 0.00028 | val_0_mse: 0.00013 |  0:02:07s
epoch 162| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:08s
epoch 163| loss: 0.00028 | val_0_mse: 0.00013 |  0:02:09s
epoch 164| loss: 0.00028 | val_0_mse: 0.00013 |  0:02:10s
epoch 165| loss: 0.00028 | val_0_mse: 0.00014 |  0:02:10s
epoch 166| loss: 0.00028 | val_0_mse: 0.00015 |  0:02:11s
epoch 167| loss: 0.00028 | val_0_mse: 0.00013 |  0:02:12s
epoch 168| loss: 0.00028 | val_0_mse: 0.00012 |  0:02:13s
epoch 169| loss: 0.00027 | val_0_mse: 0.00012 |  0:02:14s
epoch 170| loss: 0.00028 | val_0_mse: 0.00012 |  0:02:14s
epoch 171| los

epoch 92 | loss: 0.00051 | val_0_mse: 0.00014 |  0:01:12s
epoch 93 | loss: 0.00051 | val_0_mse: 0.00014 |  0:01:13s
epoch 94 | loss: 0.00051 | val_0_mse: 0.00014 |  0:01:14s
epoch 95 | loss: 0.0005  | val_0_mse: 0.00014 |  0:01:15s
epoch 96 | loss: 0.0005  | val_0_mse: 0.00014 |  0:01:15s
epoch 97 | loss: 0.00051 | val_0_mse: 0.00015 |  0:01:16s
epoch 98 | loss: 0.0005  | val_0_mse: 0.00014 |  0:01:17s
epoch 99 | loss: 0.0005  | val_0_mse: 0.00015 |  0:01:18s
Stop training because you reached max_epochs = 100 with best_epoch = 96 and best_val_0_mse = 0.00014
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/no_neutral_8hours/crypto-gt-high-2.zip
Device used : cuda
epoch 0  | loss: 0.70648 | val_0_mse: 0.01183 |  0:00:00s
epoch 1  | loss: 0.13658 | val_0_mse: 0.02627 |  0:00:01s
epoch 2  | loss: 0.03488 | val_0_mse: 0.00418 |  0:00:02s
epoch 3  | loss: 0.01099 | val_0_mse: 0.0011  |  0:00:03s
epoch 4  | loss: 0.00434 | val_0_mse: 0.00052 |  0:00:03s


epoch 130| loss: 0.00039 | val_0_mse: 0.00013 |  0:01:43s
epoch 131| loss: 0.00037 | val_0_mse: 0.00013 |  0:01:44s
epoch 132| loss: 0.00037 | val_0_mse: 0.00013 |  0:01:44s
epoch 133| loss: 0.00037 | val_0_mse: 0.00014 |  0:01:45s
epoch 134| loss: 0.00038 | val_0_mse: 0.00013 |  0:01:46s
epoch 135| loss: 0.00036 | val_0_mse: 0.00013 |  0:01:47s
epoch 136| loss: 0.00037 | val_0_mse: 0.00013 |  0:01:47s
epoch 137| loss: 0.00037 | val_0_mse: 0.00014 |  0:01:48s
epoch 138| loss: 0.00036 | val_0_mse: 0.00014 |  0:01:49s
epoch 139| loss: 0.00037 | val_0_mse: 0.00014 |  0:01:50s
epoch 140| loss: 0.00037 | val_0_mse: 0.00014 |  0:01:50s
epoch 141| loss: 0.00036 | val_0_mse: 0.00014 |  0:01:51s
epoch 142| loss: 0.00037 | val_0_mse: 0.00013 |  0:01:52s
epoch 143| loss: 0.00036 | val_0_mse: 0.00013 |  0:01:52s
epoch 144| loss: 0.00035 | val_0_mse: 0.00013 |  0:01:53s
epoch 145| loss: 0.00035 | val_0_mse: 0.00013 |  0:01:54s
epoch 146| loss: 0.00035 | val_0_mse: 0.00013 |  0:01:55s
epoch 147| los

epoch 68 | loss: 0.00059 | val_0_mse: 0.00014 |  0:01:04s
epoch 69 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:04s
epoch 70 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:05s
epoch 71 | loss: 0.00058 | val_0_mse: 0.00014 |  0:01:07s
epoch 72 | loss: 0.00058 | val_0_mse: 0.00015 |  0:01:08s
epoch 73 | loss: 0.00057 | val_0_mse: 0.00013 |  0:01:08s
epoch 74 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:09s
epoch 75 | loss: 0.00058 | val_0_mse: 0.00014 |  0:01:10s
epoch 76 | loss: 0.00057 | val_0_mse: 0.00014 |  0:01:12s
epoch 77 | loss: 0.00056 | val_0_mse: 0.00015 |  0:01:13s
epoch 78 | loss: 0.00056 | val_0_mse: 0.00014 |  0:01:14s
epoch 79 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:15s
epoch 80 | loss: 0.00054 | val_0_mse: 0.00015 |  0:01:16s
epoch 81 | loss: 0.00054 | val_0_mse: 0.00014 |  0:01:17s
epoch 82 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:18s
epoch 83 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:19s
epoch 84 | loss: 0.00053 | val_0_mse: 0.00013 |  0:01:20s
epoch 85 | los

epoch 106| loss: 0.00049 | val_0_mse: 0.00013 |  0:01:50s
epoch 107| loss: 0.00049 | val_0_mse: 0.00013 |  0:01:51s
epoch 108| loss: 0.00049 | val_0_mse: 0.00012 |  0:01:52s
epoch 109| loss: 0.00048 | val_0_mse: 0.00013 |  0:01:53s
epoch 110| loss: 0.00048 | val_0_mse: 0.00012 |  0:01:54s
epoch 111| loss: 0.00049 | val_0_mse: 0.00012 |  0:01:55s
epoch 112| loss: 0.00049 | val_0_mse: 0.00013 |  0:01:56s
epoch 113| loss: 0.00049 | val_0_mse: 0.00012 |  0:01:57s
epoch 114| loss: 0.00047 | val_0_mse: 0.00012 |  0:01:58s
epoch 115| loss: 0.00048 | val_0_mse: 0.00012 |  0:01:59s
epoch 116| loss: 0.00046 | val_0_mse: 0.00013 |  0:02:01s
epoch 117| loss: 0.00046 | val_0_mse: 0.00013 |  0:02:02s
epoch 118| loss: 0.00047 | val_0_mse: 0.00012 |  0:02:03s
epoch 119| loss: 0.00048 | val_0_mse: 0.00013 |  0:02:04s
epoch 120| loss: 0.00047 | val_0_mse: 0.00013 |  0:02:05s
epoch 121| loss: 0.00046 | val_0_mse: 0.00013 |  0:02:06s
epoch 122| loss: 0.00046 | val_0_mse: 0.00013 |  0:02:07s
epoch 123| los

epoch 44 | loss: 0.00063 | val_0_mse: 0.00013 |  0:00:43s
epoch 45 | loss: 0.00062 | val_0_mse: 0.00014 |  0:00:44s
epoch 46 | loss: 0.00062 | val_0_mse: 0.00014 |  0:00:45s
epoch 47 | loss: 0.00064 | val_0_mse: 0.00013 |  0:00:46s
epoch 48 | loss: 0.00062 | val_0_mse: 0.00013 |  0:00:47s
epoch 49 | loss: 0.00063 | val_0_mse: 0.00013 |  0:00:48s
epoch 50 | loss: 0.0006  | val_0_mse: 0.00013 |  0:00:49s
epoch 51 | loss: 0.0006  | val_0_mse: 0.00013 |  0:00:50s
epoch 52 | loss: 0.00059 | val_0_mse: 0.00012 |  0:00:51s
epoch 53 | loss: 0.00059 | val_0_mse: 0.00014 |  0:00:52s
epoch 54 | loss: 0.00058 | val_0_mse: 0.00013 |  0:00:53s
epoch 55 | loss: 0.00058 | val_0_mse: 0.00014 |  0:00:54s
epoch 56 | loss: 0.00057 | val_0_mse: 0.00013 |  0:00:55s
epoch 57 | loss: 0.00057 | val_0_mse: 0.00013 |  0:00:56s
epoch 58 | loss: 0.00059 | val_0_mse: 0.00014 |  0:00:57s
epoch 59 | loss: 0.00056 | val_0_mse: 0.00013 |  0:00:58s
epoch 60 | loss: 0.00055 | val_0_mse: 0.00014 |  0:00:59s
epoch 61 | los

epoch 82 | loss: 0.0006  | val_0_mse: 0.00013 |  0:01:22s
epoch 83 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:23s
epoch 84 | loss: 0.00057 | val_0_mse: 0.00013 |  0:01:24s
epoch 85 | loss: 0.00057 | val_0_mse: 0.00012 |  0:01:26s
epoch 86 | loss: 0.00055 | val_0_mse: 0.00012 |  0:01:27s
epoch 87 | loss: 0.00055 | val_0_mse: 0.00013 |  0:01:28s
epoch 88 | loss: 0.00054 | val_0_mse: 0.00012 |  0:01:29s
epoch 89 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:30s
epoch 90 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:31s
epoch 91 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:32s
epoch 92 | loss: 0.00053 | val_0_mse: 0.00013 |  0:01:33s
epoch 93 | loss: 0.00052 | val_0_mse: 0.00013 |  0:01:34s
epoch 94 | loss: 0.00051 | val_0_mse: 0.00013 |  0:01:35s
epoch 95 | loss: 0.00058 | val_0_mse: 0.00014 |  0:01:36s
epoch 96 | loss: 0.00052 | val_0_mse: 0.00015 |  0:01:38s
epoch 97 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:39s
epoch 98 | loss: 0.00051 | val_0_mse: 0.00013 |  0:01:40s
epoch 99 | los

epoch 20 | loss: 0.00093 | val_0_mse: 0.00016 |  0:00:21s
epoch 21 | loss: 0.0009  | val_0_mse: 0.00016 |  0:00:22s
epoch 22 | loss: 0.00088 | val_0_mse: 0.00015 |  0:00:23s
epoch 23 | loss: 0.00086 | val_0_mse: 0.00016 |  0:00:24s
epoch 24 | loss: 0.00084 | val_0_mse: 0.00016 |  0:00:25s
epoch 25 | loss: 0.00082 | val_0_mse: 0.00016 |  0:00:26s
epoch 26 | loss: 0.00081 | val_0_mse: 0.00016 |  0:00:27s
epoch 27 | loss: 0.00081 | val_0_mse: 0.00015 |  0:00:28s
epoch 28 | loss: 0.00078 | val_0_mse: 0.00015 |  0:00:29s
epoch 29 | loss: 0.00077 | val_0_mse: 0.00015 |  0:00:30s
epoch 30 | loss: 0.00075 | val_0_mse: 0.00014 |  0:00:31s
epoch 31 | loss: 0.00075 | val_0_mse: 0.00014 |  0:00:32s
epoch 32 | loss: 0.00074 | val_0_mse: 0.00015 |  0:00:33s
epoch 33 | loss: 0.00073 | val_0_mse: 0.00014 |  0:00:34s
epoch 34 | loss: 0.00072 | val_0_mse: 0.00014 |  0:00:35s
epoch 35 | loss: 0.00071 | val_0_mse: 0.00013 |  0:00:36s
epoch 36 | loss: 0.00071 | val_0_mse: 0.00013 |  0:00:37s
epoch 37 | los

epoch 58 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:01s
epoch 59 | loss: 0.00057 | val_0_mse: 0.00012 |  0:01:02s
epoch 60 | loss: 0.00057 | val_0_mse: 0.00011 |  0:01:03s
epoch 61 | loss: 0.00057 | val_0_mse: 0.0001  |  0:01:04s
epoch 62 | loss: 0.00056 | val_0_mse: 0.00011 |  0:01:05s
epoch 63 | loss: 0.00055 | val_0_mse: 0.00012 |  0:01:06s
epoch 64 | loss: 0.00055 | val_0_mse: 0.00012 |  0:01:07s
epoch 65 | loss: 0.00054 | val_0_mse: 0.0001  |  0:01:08s
epoch 66 | loss: 0.00054 | val_0_mse: 0.00011 |  0:01:09s
epoch 67 | loss: 0.00054 | val_0_mse: 0.00011 |  0:01:10s
epoch 68 | loss: 0.00053 | val_0_mse: 0.0001  |  0:01:11s
epoch 69 | loss: 0.00052 | val_0_mse: 9e-05   |  0:01:12s
epoch 70 | loss: 0.00052 | val_0_mse: 9e-05   |  0:01:13s
epoch 71 | loss: 0.00052 | val_0_mse: 0.0001  |  0:01:14s
epoch 72 | loss: 0.00051 | val_0_mse: 8e-05   |  0:01:15s
epoch 73 | loss: 0.00051 | val_0_mse: 8e-05   |  0:01:16s
epoch 74 | loss: 0.00055 | val_0_mse: 9e-05   |  0:01:17s
epoch 75 | los

Successfully saved model at ckpts/no_neutral_8hours/crypto-gt-low-5.zip
Device used : cuda
epoch 0  | loss: 0.79179 | val_0_mse: 0.01409 |  0:00:01s
epoch 1  | loss: 0.13758 | val_0_mse: 0.00878 |  0:00:02s
epoch 2  | loss: 0.04525 | val_0_mse: 0.00471 |  0:00:03s
epoch 3  | loss: 0.01576 | val_0_mse: 0.00164 |  0:00:04s
epoch 4  | loss: 0.00685 | val_0_mse: 0.00046 |  0:00:05s
epoch 5  | loss: 0.00448 | val_0_mse: 0.00038 |  0:00:06s
epoch 6  | loss: 0.00279 | val_0_mse: 0.00017 |  0:00:07s
epoch 7  | loss: 0.00183 | val_0_mse: 0.00017 |  0:00:08s
epoch 8  | loss: 0.00158 | val_0_mse: 0.00016 |  0:00:09s
epoch 9  | loss: 0.00154 | val_0_mse: 0.00019 |  0:00:10s
epoch 10 | loss: 0.00132 | val_0_mse: 0.00018 |  0:00:11s
epoch 11 | loss: 0.00129 | val_0_mse: 0.00018 |  0:00:12s
epoch 12 | loss: 0.00121 | val_0_mse: 0.00016 |  0:00:13s
epoch 13 | loss: 0.00116 | val_0_mse: 0.00017 |  0:00:14s
epoch 14 | loss: 0.00109 | val_0_mse: 0.00016 |  0:00:15s
epoch 15 | loss: 0.00107 | val_0_mse: 0

epoch 36 | loss: 0.00079 | val_0_mse: 0.00013 |  0:00:38s
epoch 37 | loss: 0.0008  | val_0_mse: 0.00014 |  0:00:40s
epoch 38 | loss: 0.0008  | val_0_mse: 0.00013 |  0:00:41s
epoch 39 | loss: 0.00077 | val_0_mse: 0.00014 |  0:00:42s
epoch 40 | loss: 0.00078 | val_0_mse: 0.00013 |  0:00:43s
epoch 41 | loss: 0.00076 | val_0_mse: 0.00014 |  0:00:43s
epoch 42 | loss: 0.00076 | val_0_mse: 0.00014 |  0:00:44s
epoch 43 | loss: 0.00073 | val_0_mse: 0.00015 |  0:00:45s
epoch 44 | loss: 0.00075 | val_0_mse: 0.00013 |  0:00:46s
epoch 45 | loss: 0.00072 | val_0_mse: 0.00013 |  0:00:46s
epoch 46 | loss: 0.00075 | val_0_mse: 0.00013 |  0:00:47s
epoch 47 | loss: 0.00076 | val_0_mse: 0.00015 |  0:00:48s
epoch 48 | loss: 0.00073 | val_0_mse: 0.00014 |  0:00:49s
epoch 49 | loss: 0.00071 | val_0_mse: 0.00014 |  0:00:50s
epoch 50 | loss: 0.00071 | val_0_mse: 0.00014 |  0:00:51s
epoch 51 | loss: 0.00072 | val_0_mse: 0.00013 |  0:00:52s
epoch 52 | loss: 0.0007  | val_0_mse: 0.00013 |  0:00:52s
epoch 53 | los

epoch 178| loss: 0.00036 | val_0_mse: 0.00013 |  0:02:34s
epoch 179| loss: 0.00034 | val_0_mse: 0.00012 |  0:02:35s
epoch 180| loss: 0.00036 | val_0_mse: 0.00013 |  0:02:35s
epoch 181| loss: 0.00035 | val_0_mse: 0.00012 |  0:02:36s
epoch 182| loss: 0.00033 | val_0_mse: 0.00013 |  0:02:37s
epoch 183| loss: 0.00033 | val_0_mse: 0.00012 |  0:02:38s
epoch 184| loss: 0.00033 | val_0_mse: 0.00012 |  0:02:38s
epoch 185| loss: 0.00031 | val_0_mse: 0.00012 |  0:02:39s
epoch 186| loss: 0.00031 | val_0_mse: 0.00012 |  0:02:40s
epoch 187| loss: 0.00036 | val_0_mse: 0.00017 |  0:02:41s
epoch 188| loss: 0.00038 | val_0_mse: 0.00013 |  0:02:42s
epoch 189| loss: 0.00037 | val_0_mse: 0.00014 |  0:02:43s
epoch 190| loss: 0.00038 | val_0_mse: 0.00012 |  0:02:43s
epoch 191| loss: 0.00036 | val_0_mse: 0.00012 |  0:02:44s
epoch 192| loss: 0.00038 | val_0_mse: 0.00012 |  0:02:45s
epoch 193| loss: 0.00034 | val_0_mse: 0.00012 |  0:02:45s
epoch 194| loss: 0.00032 | val_0_mse: 0.00014 |  0:02:46s
epoch 195| los

epoch 11 | loss: 0.00117 | val_0_mse: 0.00019 |  0:00:09s
epoch 12 | loss: 0.00116 | val_0_mse: 0.00018 |  0:00:09s
epoch 13 | loss: 0.00113 | val_0_mse: 0.00018 |  0:00:10s
epoch 14 | loss: 0.00109 | val_0_mse: 0.00018 |  0:00:11s
epoch 15 | loss: 0.00104 | val_0_mse: 0.00018 |  0:00:12s
epoch 16 | loss: 0.00104 | val_0_mse: 0.00018 |  0:00:13s
epoch 17 | loss: 0.001   | val_0_mse: 0.00018 |  0:00:13s
epoch 18 | loss: 0.00099 | val_0_mse: 0.00018 |  0:00:14s
epoch 19 | loss: 0.00098 | val_0_mse: 0.00018 |  0:00:15s
epoch 20 | loss: 0.00096 | val_0_mse: 0.00017 |  0:00:16s
epoch 21 | loss: 0.00093 | val_0_mse: 0.00018 |  0:00:16s
epoch 22 | loss: 0.00094 | val_0_mse: 0.00018 |  0:00:17s
epoch 23 | loss: 0.00091 | val_0_mse: 0.00017 |  0:00:18s
epoch 24 | loss: 0.00091 | val_0_mse: 0.00018 |  0:00:18s
epoch 25 | loss: 0.0009  | val_0_mse: 0.00018 |  0:00:19s
epoch 26 | loss: 0.00089 | val_0_mse: 0.00018 |  0:00:20s
epoch 27 | loss: 0.00089 | val_0_mse: 0.00018 |  0:00:21s
epoch 28 | los

epoch 153| loss: 0.00036 | val_0_mse: 0.0001  |  0:02:01s
epoch 154| loss: 0.00035 | val_0_mse: 0.0001  |  0:02:02s
epoch 155| loss: 0.00035 | val_0_mse: 9e-05   |  0:02:02s
epoch 156| loss: 0.00035 | val_0_mse: 9e-05   |  0:02:03s
epoch 157| loss: 0.00035 | val_0_mse: 9e-05   |  0:02:04s
epoch 158| loss: 0.00035 | val_0_mse: 0.0001  |  0:02:05s
epoch 159| loss: 0.00035 | val_0_mse: 0.0001  |  0:02:06s
epoch 160| loss: 0.00035 | val_0_mse: 9e-05   |  0:02:06s
epoch 161| loss: 0.00035 | val_0_mse: 0.0001  |  0:02:07s
epoch 162| loss: 0.00035 | val_0_mse: 9e-05   |  0:02:08s
epoch 163| loss: 0.00034 | val_0_mse: 9e-05   |  0:02:09s
epoch 164| loss: 0.00034 | val_0_mse: 9e-05   |  0:02:10s
epoch 165| loss: 0.00034 | val_0_mse: 0.0001  |  0:02:10s
epoch 166| loss: 0.00034 | val_0_mse: 9e-05   |  0:02:11s
epoch 167| loss: 0.00034 | val_0_mse: 9e-05   |  0:02:12s
epoch 168| loss: 0.00034 | val_0_mse: 0.0001  |  0:02:13s
epoch 169| loss: 0.00035 | val_0_mse: 0.0001  |  0:02:13s
epoch 170| los

epoch 91 | loss: 0.00039 | val_0_mse: 0.00013 |  0:01:10s
epoch 92 | loss: 0.00038 | val_0_mse: 0.00013 |  0:01:11s
epoch 93 | loss: 0.00037 | val_0_mse: 0.00013 |  0:01:12s
epoch 94 | loss: 0.00038 | val_0_mse: 0.00013 |  0:01:13s
epoch 95 | loss: 0.00038 | val_0_mse: 0.00012 |  0:01:13s
epoch 96 | loss: 0.00038 | val_0_mse: 0.00012 |  0:01:14s
epoch 97 | loss: 0.00037 | val_0_mse: 0.00012 |  0:01:15s
epoch 98 | loss: 0.00036 | val_0_mse: 0.00012 |  0:01:16s
epoch 99 | loss: 0.00036 | val_0_mse: 0.00013 |  0:01:17s
Stop training because you reached max_epochs = 100 with best_epoch = 72 and best_val_0_mse = 0.00012
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/no_neutral_8hours/crypto-gt-high-8.zip
Device used : cuda
epoch 0  | loss: 0.98596 | val_0_mse: 0.03775 |  0:00:00s
epoch 1  | loss: 0.15904 | val_0_mse: 0.00497 |  0:00:01s
epoch 2  | loss: 0.047   | val_0_mse: 0.00514 |  0:00:02s
epoch 3  | loss: 0.01875 | val_0_mse: 0.00187 |  0:00:03s


epoch 129| loss: 0.00039 | val_0_mse: 0.00014 |  0:01:52s
epoch 130| loss: 0.00042 | val_0_mse: 0.00014 |  0:01:53s
epoch 131| loss: 0.00038 | val_0_mse: 0.00013 |  0:01:54s
epoch 132| loss: 0.00037 | val_0_mse: 0.00013 |  0:01:55s
epoch 133| loss: 0.00038 | val_0_mse: 0.00013 |  0:01:55s
epoch 134| loss: 0.00037 | val_0_mse: 0.00013 |  0:01:56s
epoch 135| loss: 0.00037 | val_0_mse: 0.00013 |  0:01:57s
epoch 136| loss: 0.00036 | val_0_mse: 0.00013 |  0:01:58s
epoch 137| loss: 0.00036 | val_0_mse: 0.00013 |  0:01:58s
epoch 138| loss: 0.00036 | val_0_mse: 0.00012 |  0:01:59s
epoch 139| loss: 0.00036 | val_0_mse: 0.00022 |  0:02:00s
epoch 140| loss: 0.00046 | val_0_mse: 0.00018 |  0:02:01s
epoch 141| loss: 0.00038 | val_0_mse: 0.00013 |  0:02:02s
epoch 142| loss: 0.00037 | val_0_mse: 0.00012 |  0:02:03s
epoch 143| loss: 0.00036 | val_0_mse: 0.00013 |  0:02:03s
epoch 144| loss: 0.00035 | val_0_mse: 0.00012 |  0:02:04s
epoch 145| loss: 0.00034 | val_0_mse: 0.00012 |  0:02:05s
epoch 146| los

epoch 67 | loss: 0.0005  | val_0_mse: 0.00013 |  0:01:02s
epoch 68 | loss: 0.00049 | val_0_mse: 0.00014 |  0:01:03s
epoch 69 | loss: 0.00048 | val_0_mse: 0.00011 |  0:01:03s
epoch 70 | loss: 0.00048 | val_0_mse: 0.00013 |  0:01:04s
epoch 71 | loss: 0.00048 | val_0_mse: 0.00011 |  0:01:05s
epoch 72 | loss: 0.00047 | val_0_mse: 0.00012 |  0:01:05s
epoch 73 | loss: 0.00046 | val_0_mse: 0.00012 |  0:01:06s
epoch 74 | loss: 0.00045 | val_0_mse: 0.00011 |  0:01:07s
epoch 75 | loss: 0.00045 | val_0_mse: 0.00012 |  0:01:08s
epoch 76 | loss: 0.00044 | val_0_mse: 0.00012 |  0:01:08s
epoch 77 | loss: 0.00044 | val_0_mse: 0.00013 |  0:01:09s
epoch 78 | loss: 0.00043 | val_0_mse: 0.00011 |  0:01:10s
epoch 79 | loss: 0.00043 | val_0_mse: 0.00011 |  0:01:11s
epoch 80 | loss: 0.00042 | val_0_mse: 0.00012 |  0:01:12s
epoch 81 | loss: 0.00041 | val_0_mse: 0.00011 |  0:01:12s
epoch 82 | loss: 0.00041 | val_0_mse: 0.00012 |  0:01:13s
epoch 83 | loss: 0.0004  | val_0_mse: 0.00012 |  0:01:14s
epoch 84 | los

epoch 105| loss: 0.00038 | val_0_mse: 0.00013 |  0:01:24s
epoch 106| loss: 0.00039 | val_0_mse: 0.0001  |  0:01:25s
epoch 107| loss: 0.00043 | val_0_mse: 0.00016 |  0:01:26s
epoch 108| loss: 0.00045 | val_0_mse: 0.00017 |  0:01:27s
epoch 109| loss: 0.00045 | val_0_mse: 0.00011 |  0:01:28s
epoch 110| loss: 0.00039 | val_0_mse: 0.00011 |  0:01:29s
epoch 111| loss: 0.00039 | val_0_mse: 0.00011 |  0:01:30s
epoch 112| loss: 0.00036 | val_0_mse: 0.0001  |  0:01:31s
epoch 113| loss: 0.00036 | val_0_mse: 0.0001  |  0:01:32s
epoch 114| loss: 0.00036 | val_0_mse: 0.0001  |  0:01:32s
epoch 115| loss: 0.00035 | val_0_mse: 0.0001  |  0:01:33s
epoch 116| loss: 0.00036 | val_0_mse: 0.0001  |  0:01:34s
epoch 117| loss: 0.00035 | val_0_mse: 0.00011 |  0:01:35s
epoch 118| loss: 0.00035 | val_0_mse: 0.00012 |  0:01:36s
epoch 119| loss: 0.00035 | val_0_mse: 0.00012 |  0:01:36s
epoch 120| loss: 0.00036 | val_0_mse: 0.0001  |  0:01:37s
epoch 121| loss: 0.00034 | val_0_mse: 0.0001  |  0:01:38s
epoch 122| los

epoch 43 | loss: 0.00085 | val_0_mse: 0.00014 |  0:00:35s
epoch 44 | loss: 0.00084 | val_0_mse: 0.00015 |  0:00:36s
epoch 45 | loss: 0.00084 | val_0_mse: 0.00015 |  0:00:37s
epoch 46 | loss: 0.00081 | val_0_mse: 0.00015 |  0:00:38s
epoch 47 | loss: 0.0008  | val_0_mse: 0.00014 |  0:00:38s
epoch 48 | loss: 0.00079 | val_0_mse: 0.00014 |  0:00:39s
epoch 49 | loss: 0.00078 | val_0_mse: 0.00015 |  0:00:40s
epoch 50 | loss: 0.00078 | val_0_mse: 0.00014 |  0:00:41s
epoch 51 | loss: 0.00077 | val_0_mse: 0.00014 |  0:00:42s
epoch 52 | loss: 0.00077 | val_0_mse: 0.00014 |  0:00:43s
epoch 53 | loss: 0.00076 | val_0_mse: 0.00014 |  0:00:43s
epoch 54 | loss: 0.00075 | val_0_mse: 0.00013 |  0:00:44s
epoch 55 | loss: 0.00075 | val_0_mse: 0.00014 |  0:00:45s
epoch 56 | loss: 0.00073 | val_0_mse: 0.00015 |  0:00:46s
epoch 57 | loss: 0.00073 | val_0_mse: 0.00013 |  0:00:47s
epoch 58 | loss: 0.00072 | val_0_mse: 0.00013 |  0:00:47s
epoch 59 | loss: 0.00071 | val_0_mse: 0.00014 |  0:00:48s
epoch 60 | los

epoch 81 | loss: 0.00057 | val_0_mse: 0.00013 |  0:01:17s
epoch 82 | loss: 0.00056 | val_0_mse: 0.00013 |  0:01:18s
epoch 83 | loss: 0.00055 | val_0_mse: 0.00013 |  0:01:19s
epoch 84 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:20s
epoch 85 | loss: 0.00055 | val_0_mse: 0.00013 |  0:01:21s
epoch 86 | loss: 0.00056 | val_0_mse: 0.00013 |  0:01:22s
epoch 87 | loss: 0.00055 | val_0_mse: 0.00013 |  0:01:23s
epoch 88 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:24s
epoch 89 | loss: 0.00054 | val_0_mse: 0.00014 |  0:01:25s
epoch 90 | loss: 0.00053 | val_0_mse: 0.00013 |  0:01:26s
epoch 91 | loss: 0.00052 | val_0_mse: 0.00014 |  0:01:27s
epoch 92 | loss: 0.00053 | val_0_mse: 0.00013 |  0:01:28s
epoch 93 | loss: 0.00052 | val_0_mse: 0.00013 |  0:01:29s
epoch 94 | loss: 0.00052 | val_0_mse: 0.00013 |  0:01:30s
epoch 95 | loss: 0.00051 | val_0_mse: 0.00013 |  0:01:31s
epoch 96 | loss: 0.00051 | val_0_mse: 0.00012 |  0:01:32s
epoch 97 | loss: 0.00051 | val_0_mse: 0.00012 |  0:01:33s
epoch 98 | los

epoch 19 | loss: 0.00108 | val_0_mse: 0.00018 |  0:00:30s
epoch 20 | loss: 0.00108 | val_0_mse: 0.00018 |  0:00:31s
epoch 21 | loss: 0.00107 | val_0_mse: 0.00018 |  0:00:33s
epoch 22 | loss: 0.00105 | val_0_mse: 0.00018 |  0:00:35s
epoch 23 | loss: 0.00104 | val_0_mse: 0.00019 |  0:00:36s
epoch 24 | loss: 0.00102 | val_0_mse: 0.00019 |  0:00:38s
epoch 25 | loss: 0.001   | val_0_mse: 0.00019 |  0:00:39s
epoch 26 | loss: 0.00101 | val_0_mse: 0.00018 |  0:00:41s
epoch 27 | loss: 0.00098 | val_0_mse: 0.00018 |  0:00:42s
epoch 28 | loss: 0.00096 | val_0_mse: 0.00018 |  0:00:44s
epoch 29 | loss: 0.00096 | val_0_mse: 0.00018 |  0:00:45s
epoch 30 | loss: 0.00094 | val_0_mse: 0.00017 |  0:00:47s
epoch 31 | loss: 0.00091 | val_0_mse: 0.00017 |  0:00:48s
epoch 32 | loss: 0.00089 | val_0_mse: 0.00016 |  0:00:50s
epoch 33 | loss: 0.00088 | val_0_mse: 0.00017 |  0:00:51s
epoch 34 | loss: 0.00086 | val_0_mse: 0.00017 |  0:00:53s
epoch 35 | loss: 0.00084 | val_0_mse: 0.00017 |  0:00:54s
epoch 36 | los

epoch 57 | loss: 0.00074 | val_0_mse: 0.00014 |  0:01:23s
epoch 58 | loss: 0.00074 | val_0_mse: 0.00016 |  0:01:25s
epoch 59 | loss: 0.00074 | val_0_mse: 0.00015 |  0:01:27s
epoch 60 | loss: 0.00074 | val_0_mse: 0.00018 |  0:01:28s
epoch 61 | loss: 0.00073 | val_0_mse: 0.00016 |  0:01:30s
epoch 62 | loss: 0.00072 | val_0_mse: 0.00016 |  0:01:31s
epoch 63 | loss: 0.00072 | val_0_mse: 0.00015 |  0:01:33s
epoch 64 | loss: 0.00072 | val_0_mse: 0.00014 |  0:01:34s
epoch 65 | loss: 0.00072 | val_0_mse: 0.00014 |  0:01:36s
epoch 66 | loss: 0.0007  | val_0_mse: 0.00014 |  0:01:37s
epoch 67 | loss: 0.0007  | val_0_mse: 0.00015 |  0:01:39s
epoch 68 | loss: 0.00069 | val_0_mse: 0.00014 |  0:01:40s
epoch 69 | loss: 0.00069 | val_0_mse: 0.00015 |  0:01:42s
epoch 70 | loss: 0.00069 | val_0_mse: 0.00017 |  0:01:43s
epoch 71 | loss: 0.00069 | val_0_mse: 0.00014 |  0:01:44s
epoch 72 | loss: 0.00069 | val_0_mse: 0.00014 |  0:01:46s
epoch 73 | loss: 0.00067 | val_0_mse: 0.00014 |  0:01:47s
epoch 74 | los

epoch 199| loss: 0.00032 | val_0_mse: 0.00014 |  0:04:55s
Stop training because you reached max_epochs = 200 with best_epoch = 164 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/no_neutral_8hours/deberta-low-1.zip
Device used : cuda
epoch 0  | loss: 0.60646 | val_0_mse: 0.00719 |  0:00:01s
epoch 1  | loss: 0.11416 | val_0_mse: 0.00524 |  0:00:02s
epoch 2  | loss: 0.04135 | val_0_mse: 0.0052  |  0:00:04s
epoch 3  | loss: 0.0124  | val_0_mse: 0.00077 |  0:00:05s
epoch 4  | loss: 0.00491 | val_0_mse: 0.0002  |  0:00:07s
epoch 5  | loss: 0.00251 | val_0_mse: 0.00023 |  0:00:09s
epoch 6  | loss: 0.00187 | val_0_mse: 0.00015 |  0:00:10s
epoch 7  | loss: 0.00158 | val_0_mse: 0.00014 |  0:00:12s
epoch 8  | loss: 0.00135 | val_0_mse: 0.00014 |  0:00:13s
epoch 9  | loss: 0.00127 | val_0_mse: 0.00015 |  0:00:15s
epoch 10 | loss: 0.00124 | val_0_mse: 0.00015 |  0:00:16s
epoch 11 | loss: 0.00118 | val_0_mse: 0.00015 |  0:00:18s
ep

epoch 32 | loss: 0.00079 | val_0_mse: 0.00017 |  0:00:51s
epoch 33 | loss: 0.00078 | val_0_mse: 0.00017 |  0:00:52s
epoch 34 | loss: 0.00077 | val_0_mse: 0.00017 |  0:00:54s
epoch 35 | loss: 0.00076 | val_0_mse: 0.00016 |  0:00:55s
epoch 36 | loss: 0.00076 | val_0_mse: 0.00017 |  0:00:57s
epoch 37 | loss: 0.00074 | val_0_mse: 0.00017 |  0:00:58s
epoch 38 | loss: 0.00073 | val_0_mse: 0.00016 |  0:00:59s
epoch 39 | loss: 0.00072 | val_0_mse: 0.00017 |  0:01:01s
epoch 40 | loss: 0.00072 | val_0_mse: 0.00016 |  0:01:02s
epoch 41 | loss: 0.00072 | val_0_mse: 0.00016 |  0:01:04s
epoch 42 | loss: 0.00071 | val_0_mse: 0.00016 |  0:01:06s
epoch 43 | loss: 0.00071 | val_0_mse: 0.00016 |  0:01:07s
epoch 44 | loss: 0.00069 | val_0_mse: 0.00015 |  0:01:09s
epoch 45 | loss: 0.00069 | val_0_mse: 0.00016 |  0:01:10s
epoch 46 | loss: 0.00068 | val_0_mse: 0.00015 |  0:01:12s
epoch 47 | loss: 0.00066 | val_0_mse: 0.00015 |  0:01:13s
epoch 48 | loss: 0.00067 | val_0_mse: 0.00015 |  0:01:15s
epoch 49 | los

epoch 174| loss: 0.00038 | val_0_mse: 0.00014 |  0:04:26s
epoch 175| loss: 0.00039 | val_0_mse: 0.00013 |  0:04:28s
epoch 176| loss: 0.00038 | val_0_mse: 0.00014 |  0:04:29s
epoch 177| loss: 0.00037 | val_0_mse: 0.00013 |  0:04:31s
epoch 178| loss: 0.00037 | val_0_mse: 0.00014 |  0:04:32s
epoch 179| loss: 0.00042 | val_0_mse: 0.00018 |  0:04:34s
epoch 180| loss: 0.0004  | val_0_mse: 0.00015 |  0:04:36s
epoch 181| loss: 0.00039 | val_0_mse: 0.00015 |  0:04:37s
epoch 182| loss: 0.00038 | val_0_mse: 0.00014 |  0:04:39s
epoch 183| loss: 0.00037 | val_0_mse: 0.00013 |  0:04:40s
epoch 184| loss: 0.00037 | val_0_mse: 0.00014 |  0:04:42s
epoch 185| loss: 0.00037 | val_0_mse: 0.00014 |  0:04:43s
epoch 186| loss: 0.00036 | val_0_mse: 0.00013 |  0:04:45s
epoch 187| loss: 0.00036 | val_0_mse: 0.00013 |  0:04:46s
epoch 188| loss: 0.00036 | val_0_mse: 0.00013 |  0:04:48s
epoch 189| loss: 0.00036 | val_0_mse: 0.00013 |  0:04:49s
epoch 190| loss: 0.00036 | val_0_mse: 0.00013 |  0:04:51s
epoch 191| los

epoch 7  | loss: 0.00228 | val_0_mse: 0.0003  |  0:00:12s
epoch 8  | loss: 0.00256 | val_0_mse: 0.00028 |  0:00:13s
epoch 9  | loss: 0.00193 | val_0_mse: 0.00025 |  0:00:15s
epoch 10 | loss: 0.00167 | val_0_mse: 0.00023 |  0:00:17s
epoch 11 | loss: 0.0016  | val_0_mse: 0.00022 |  0:00:18s
epoch 12 | loss: 0.00142 | val_0_mse: 0.00022 |  0:00:20s
epoch 13 | loss: 0.00133 | val_0_mse: 0.00021 |  0:00:21s
epoch 14 | loss: 0.00138 | val_0_mse: 0.00019 |  0:00:23s
epoch 15 | loss: 0.00127 | val_0_mse: 0.00018 |  0:00:24s
epoch 16 | loss: 0.00122 | val_0_mse: 0.00018 |  0:00:26s
epoch 17 | loss: 0.00119 | val_0_mse: 0.00018 |  0:00:27s
epoch 18 | loss: 0.00124 | val_0_mse: 0.0002  |  0:00:28s
epoch 19 | loss: 0.00118 | val_0_mse: 0.00018 |  0:00:30s
epoch 20 | loss: 0.00114 | val_0_mse: 0.00018 |  0:00:31s
epoch 21 | loss: 0.00131 | val_0_mse: 0.00018 |  0:00:33s
epoch 22 | loss: 0.00134 | val_0_mse: 0.00017 |  0:00:34s
epoch 23 | loss: 0.00114 | val_0_mse: 0.00016 |  0:00:36s
epoch 24 | los

epoch 149| loss: 0.00035 | val_0_mse: 0.00014 |  0:03:16s
epoch 150| loss: 0.00036 | val_0_mse: 0.00013 |  0:03:17s
epoch 151| loss: 0.00035 | val_0_mse: 0.00015 |  0:03:18s
epoch 152| loss: 0.00038 | val_0_mse: 0.0002  |  0:03:19s
epoch 153| loss: 0.00041 | val_0_mse: 0.00018 |  0:03:20s
epoch 154| loss: 0.00037 | val_0_mse: 0.00014 |  0:03:22s
epoch 155| loss: 0.00036 | val_0_mse: 0.00013 |  0:03:23s
epoch 156| loss: 0.00035 | val_0_mse: 0.00013 |  0:03:24s
epoch 157| loss: 0.00035 | val_0_mse: 0.00014 |  0:03:25s
epoch 158| loss: 0.00035 | val_0_mse: 0.00013 |  0:03:26s
epoch 159| loss: 0.00035 | val_0_mse: 0.00013 |  0:03:27s
epoch 160| loss: 0.00034 | val_0_mse: 0.00013 |  0:03:28s
epoch 161| loss: 0.00034 | val_0_mse: 0.00013 |  0:03:29s
epoch 162| loss: 0.00034 | val_0_mse: 0.00013 |  0:03:30s
epoch 163| loss: 0.00034 | val_0_mse: 0.00013 |  0:03:31s
epoch 164| loss: 0.00034 | val_0_mse: 0.00013 |  0:03:32s
epoch 165| loss: 0.00034 | val_0_mse: 0.00013 |  0:03:33s
epoch 166| los

epoch 87 | loss: 0.00049 | val_0_mse: 0.00013 |  0:01:32s
epoch 88 | loss: 0.00049 | val_0_mse: 0.00013 |  0:01:33s
epoch 89 | loss: 0.00048 | val_0_mse: 0.00012 |  0:01:34s
epoch 90 | loss: 0.00047 | val_0_mse: 0.00011 |  0:01:35s
epoch 91 | loss: 0.00047 | val_0_mse: 0.00012 |  0:01:36s
epoch 92 | loss: 0.00048 | val_0_mse: 0.00013 |  0:01:37s
epoch 93 | loss: 0.00046 | val_0_mse: 0.00011 |  0:01:38s
epoch 94 | loss: 0.00047 | val_0_mse: 0.00011 |  0:01:39s
epoch 95 | loss: 0.00046 | val_0_mse: 0.00013 |  0:01:40s
epoch 96 | loss: 0.00045 | val_0_mse: 0.00011 |  0:01:42s
epoch 97 | loss: 0.00045 | val_0_mse: 0.00011 |  0:01:43s
epoch 98 | loss: 0.00044 | val_0_mse: 0.00011 |  0:01:44s
epoch 99 | loss: 0.00044 | val_0_mse: 0.00013 |  0:01:45s
Stop training because you reached max_epochs = 100 with best_epoch = 97 and best_val_0_mse = 0.00011
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/no_neutral_8hours/deberta-high-4.zip
Device used : cuda
ep

epoch 125| loss: 0.00042 | val_0_mse: 0.00014 |  0:02:10s
epoch 126| loss: 0.00042 | val_0_mse: 0.00013 |  0:02:11s
epoch 127| loss: 0.00043 | val_0_mse: 0.00015 |  0:02:12s
epoch 128| loss: 0.00042 | val_0_mse: 0.00013 |  0:02:13s
epoch 129| loss: 0.00041 | val_0_mse: 0.00013 |  0:02:14s
epoch 130| loss: 0.00041 | val_0_mse: 0.00014 |  0:02:15s
epoch 131| loss: 0.00042 | val_0_mse: 0.00013 |  0:02:16s
epoch 132| loss: 0.00041 | val_0_mse: 0.00013 |  0:02:17s
epoch 133| loss: 0.00041 | val_0_mse: 0.00014 |  0:02:19s
epoch 134| loss: 0.00042 | val_0_mse: 0.00013 |  0:02:19s
epoch 135| loss: 0.00042 | val_0_mse: 0.00013 |  0:02:20s
epoch 136| loss: 0.00041 | val_0_mse: 0.00014 |  0:02:21s
epoch 137| loss: 0.0004  | val_0_mse: 0.00013 |  0:02:22s
epoch 138| loss: 0.00041 | val_0_mse: 0.00013 |  0:02:24s
epoch 139| loss: 0.0004  | val_0_mse: 0.00014 |  0:02:25s
epoch 140| loss: 0.00043 | val_0_mse: 0.00014 |  0:02:26s
epoch 141| loss: 0.00044 | val_0_mse: 0.00013 |  0:02:27s
epoch 142| los

epoch 63 | loss: 0.00051 | val_0_mse: 0.00013 |  0:01:22s
epoch 64 | loss: 0.0005  | val_0_mse: 0.00012 |  0:01:23s
epoch 65 | loss: 0.0005  | val_0_mse: 0.00013 |  0:01:25s
epoch 66 | loss: 0.00049 | val_0_mse: 0.00013 |  0:01:26s
epoch 67 | loss: 0.00049 | val_0_mse: 0.00013 |  0:01:27s
epoch 68 | loss: 0.00048 | val_0_mse: 0.00013 |  0:01:28s
epoch 69 | loss: 0.00048 | val_0_mse: 0.00013 |  0:01:29s
epoch 70 | loss: 0.00047 | val_0_mse: 0.00012 |  0:01:30s
epoch 71 | loss: 0.00047 | val_0_mse: 0.00012 |  0:01:32s
epoch 72 | loss: 0.00046 | val_0_mse: 0.00013 |  0:01:33s
epoch 73 | loss: 0.00045 | val_0_mse: 0.00012 |  0:01:34s
epoch 74 | loss: 0.00046 | val_0_mse: 0.00012 |  0:01:35s
epoch 75 | loss: 0.00045 | val_0_mse: 0.00014 |  0:01:36s
epoch 76 | loss: 0.00045 | val_0_mse: 0.00012 |  0:01:38s
epoch 77 | loss: 0.00045 | val_0_mse: 0.00012 |  0:01:39s
epoch 78 | loss: 0.00044 | val_0_mse: 0.00012 |  0:01:40s
epoch 79 | loss: 0.00044 | val_0_mse: 0.00012 |  0:01:41s
epoch 80 | los

epoch 101| loss: 0.0004  | val_0_mse: 9e-05   |  0:02:06s
epoch 102| loss: 0.00039 | val_0_mse: 0.00011 |  0:02:07s
epoch 103| loss: 0.00038 | val_0_mse: 9e-05   |  0:02:08s
epoch 104| loss: 0.00043 | val_0_mse: 0.0001  |  0:02:10s
epoch 105| loss: 0.00039 | val_0_mse: 0.0001  |  0:02:11s
epoch 106| loss: 0.00037 | val_0_mse: 9e-05   |  0:02:12s
epoch 107| loss: 0.00037 | val_0_mse: 0.0001  |  0:02:14s
epoch 108| loss: 0.00037 | val_0_mse: 9e-05   |  0:02:15s
epoch 109| loss: 0.00037 | val_0_mse: 9e-05   |  0:02:16s
epoch 110| loss: 0.00036 | val_0_mse: 8e-05   |  0:02:17s
epoch 111| loss: 0.00036 | val_0_mse: 8e-05   |  0:02:19s
epoch 112| loss: 0.00035 | val_0_mse: 8e-05   |  0:02:20s
epoch 113| loss: 0.00035 | val_0_mse: 9e-05   |  0:02:21s
epoch 114| loss: 0.00035 | val_0_mse: 9e-05   |  0:02:22s
epoch 115| loss: 0.00035 | val_0_mse: 8e-05   |  0:02:24s
epoch 116| loss: 0.00035 | val_0_mse: 8e-05   |  0:02:25s
epoch 117| loss: 0.00035 | val_0_mse: 9e-05   |  0:02:26s
epoch 118| los

epoch 39 | loss: 0.00072 | val_0_mse: 0.00015 |  0:00:50s
epoch 40 | loss: 0.00071 | val_0_mse: 0.00017 |  0:00:51s
epoch 41 | loss: 0.00069 | val_0_mse: 0.00015 |  0:00:52s
epoch 42 | loss: 0.00069 | val_0_mse: 0.00015 |  0:00:54s
epoch 43 | loss: 0.00067 | val_0_mse: 0.00015 |  0:00:55s
epoch 44 | loss: 0.00068 | val_0_mse: 0.00016 |  0:00:56s
epoch 45 | loss: 0.00067 | val_0_mse: 0.00015 |  0:00:57s
epoch 46 | loss: 0.00067 | val_0_mse: 0.00015 |  0:00:58s
epoch 47 | loss: 0.00066 | val_0_mse: 0.00014 |  0:01:00s
epoch 48 | loss: 0.00066 | val_0_mse: 0.00015 |  0:01:01s
epoch 49 | loss: 0.00065 | val_0_mse: 0.00014 |  0:01:02s
epoch 50 | loss: 0.00066 | val_0_mse: 0.00015 |  0:01:03s
epoch 51 | loss: 0.00065 | val_0_mse: 0.00015 |  0:01:05s
epoch 52 | loss: 0.00064 | val_0_mse: 0.00014 |  0:01:06s
epoch 53 | loss: 0.00063 | val_0_mse: 0.00014 |  0:01:07s
epoch 54 | loss: 0.00062 | val_0_mse: 0.00013 |  0:01:09s
epoch 55 | loss: 0.00062 | val_0_mse: 0.00014 |  0:01:10s
epoch 56 | los

epoch 77 | loss: 0.0007  | val_0_mse: 0.00013 |  0:01:35s
epoch 78 | loss: 0.00069 | val_0_mse: 0.00014 |  0:01:36s
epoch 79 | loss: 0.00068 | val_0_mse: 0.00015 |  0:01:37s
epoch 80 | loss: 0.00068 | val_0_mse: 0.00014 |  0:01:38s
epoch 81 | loss: 0.00068 | val_0_mse: 0.00013 |  0:01:39s
epoch 82 | loss: 0.00067 | val_0_mse: 0.00014 |  0:01:41s
epoch 83 | loss: 0.00068 | val_0_mse: 0.00014 |  0:01:42s
epoch 84 | loss: 0.00066 | val_0_mse: 0.00014 |  0:01:43s
epoch 85 | loss: 0.00065 | val_0_mse: 0.00014 |  0:01:44s
epoch 86 | loss: 0.00066 | val_0_mse: 0.00014 |  0:01:46s
epoch 87 | loss: 0.00066 | val_0_mse: 0.00013 |  0:01:47s
epoch 88 | loss: 0.00065 | val_0_mse: 0.00014 |  0:01:48s
epoch 89 | loss: 0.00064 | val_0_mse: 0.00014 |  0:01:49s
epoch 90 | loss: 0.00064 | val_0_mse: 0.00013 |  0:01:50s
epoch 91 | loss: 0.00062 | val_0_mse: 0.00014 |  0:01:51s
epoch 92 | loss: 0.00065 | val_0_mse: 0.00014 |  0:01:52s
epoch 93 | loss: 0.00063 | val_0_mse: 0.00014 |  0:01:53s
epoch 94 | los

epoch 15 | loss: 0.00103 | val_0_mse: 0.00018 |  0:00:16s
epoch 16 | loss: 0.00102 | val_0_mse: 0.00017 |  0:00:18s
epoch 17 | loss: 0.00103 | val_0_mse: 0.00018 |  0:00:19s
epoch 18 | loss: 0.00098 | val_0_mse: 0.00018 |  0:00:20s
epoch 19 | loss: 0.00096 | val_0_mse: 0.00018 |  0:00:21s
epoch 20 | loss: 0.00093 | val_0_mse: 0.00018 |  0:00:22s
epoch 21 | loss: 0.00092 | val_0_mse: 0.00018 |  0:00:23s
epoch 22 | loss: 0.0009  | val_0_mse: 0.00019 |  0:00:24s
epoch 23 | loss: 0.00089 | val_0_mse: 0.00018 |  0:00:25s
epoch 24 | loss: 0.00089 | val_0_mse: 0.00018 |  0:00:26s
epoch 25 | loss: 0.00087 | val_0_mse: 0.00019 |  0:00:27s
epoch 26 | loss: 0.00091 | val_0_mse: 0.00019 |  0:00:28s
epoch 27 | loss: 0.00088 | val_0_mse: 0.00019 |  0:00:29s
epoch 28 | loss: 0.00091 | val_0_mse: 0.00018 |  0:00:30s
epoch 29 | loss: 0.00084 | val_0_mse: 0.00019 |  0:00:31s
epoch 30 | loss: 0.00088 | val_0_mse: 0.00018 |  0:00:32s
epoch 31 | loss: 0.00083 | val_0_mse: 0.00018 |  0:00:33s
epoch 32 | los

epoch 53 | loss: 0.00041 | val_0_mse: 0.00017 |  0:00:57s
epoch 54 | loss: 0.00039 | val_0_mse: 0.00015 |  0:00:58s
epoch 55 | loss: 0.00038 | val_0_mse: 0.00016 |  0:01:00s
epoch 56 | loss: 0.00038 | val_0_mse: 0.00016 |  0:01:01s
epoch 57 | loss: 0.00038 | val_0_mse: 0.00016 |  0:01:01s
epoch 58 | loss: 0.00037 | val_0_mse: 0.00015 |  0:01:03s
epoch 59 | loss: 0.00037 | val_0_mse: 0.00015 |  0:01:04s
epoch 60 | loss: 0.00035 | val_0_mse: 0.00015 |  0:01:05s
epoch 61 | loss: 0.00036 | val_0_mse: 0.00015 |  0:01:06s
epoch 62 | loss: 0.00036 | val_0_mse: 0.00015 |  0:01:07s
epoch 63 | loss: 0.00035 | val_0_mse: 0.00015 |  0:01:08s
epoch 64 | loss: 0.00036 | val_0_mse: 0.00015 |  0:01:09s
epoch 65 | loss: 0.00035 | val_0_mse: 0.00014 |  0:01:10s
epoch 66 | loss: 0.00034 | val_0_mse: 0.00014 |  0:01:11s
epoch 67 | loss: 0.00034 | val_0_mse: 0.00014 |  0:01:12s
epoch 68 | loss: 0.00033 | val_0_mse: 0.00013 |  0:01:13s
epoch 69 | loss: 0.00034 | val_0_mse: 0.00016 |  0:01:14s
epoch 70 | los

epoch 195| loss: 0.00019 | val_0_mse: 9e-05   |  0:03:35s
epoch 196| loss: 0.00018 | val_0_mse: 9e-05   |  0:03:36s
epoch 197| loss: 0.00019 | val_0_mse: 9e-05   |  0:03:37s
epoch 198| loss: 0.00019 | val_0_mse: 9e-05   |  0:03:39s
epoch 199| loss: 0.00018 | val_0_mse: 9e-05   |  0:03:40s
Stop training because you reached max_epochs = 200 with best_epoch = 154 and best_val_0_mse = 8e-05
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/no_neutral_8hours/deberta-low-7.zip
Device used : cuda
epoch 0  | loss: 1.11553 | val_0_mse: 0.03805 |  0:00:01s
epoch 1  | loss: 0.19317 | val_0_mse: 0.00861 |  0:00:02s
epoch 2  | loss: 0.07731 | val_0_mse: 0.00395 |  0:00:03s
epoch 3  | loss: 0.03091 | val_0_mse: 0.00161 |  0:00:04s
epoch 4  | loss: 0.01552 | val_0_mse: 0.00062 |  0:00:05s
epoch 5  | loss: 0.00855 | val_0_mse: 0.00093 |  0:00:07s
epoch 6  | loss: 0.00616 | val_0_mse: 0.00029 |  0:00:08s
epoch 7  | loss: 0.00407 | val_0_mse: 0.00038 |  0:00:09s
epoc

epoch 28 | loss: 0.00091 | val_0_mse: 0.00017 |  0:00:36s
epoch 29 | loss: 0.00098 | val_0_mse: 0.00018 |  0:00:37s
epoch 30 | loss: 0.00089 | val_0_mse: 0.00017 |  0:00:39s
epoch 31 | loss: 0.00086 | val_0_mse: 0.00018 |  0:00:40s
epoch 32 | loss: 0.00086 | val_0_mse: 0.00017 |  0:00:41s
epoch 33 | loss: 0.00084 | val_0_mse: 0.00017 |  0:00:42s
epoch 34 | loss: 0.00082 | val_0_mse: 0.00017 |  0:00:44s
epoch 35 | loss: 0.00082 | val_0_mse: 0.00016 |  0:00:45s
epoch 36 | loss: 0.00084 | val_0_mse: 0.00016 |  0:00:46s
epoch 37 | loss: 0.00079 | val_0_mse: 0.00015 |  0:00:48s
epoch 38 | loss: 0.00078 | val_0_mse: 0.00016 |  0:00:49s
epoch 39 | loss: 0.00077 | val_0_mse: 0.00015 |  0:00:50s
epoch 40 | loss: 0.00077 | val_0_mse: 0.00015 |  0:00:52s
epoch 41 | loss: 0.00074 | val_0_mse: 0.00016 |  0:00:53s
epoch 42 | loss: 0.00075 | val_0_mse: 0.00016 |  0:00:54s
epoch 43 | loss: 0.00071 | val_0_mse: 0.00015 |  0:00:55s
epoch 44 | loss: 0.00072 | val_0_mse: 0.00015 |  0:00:57s
epoch 45 | los

epoch 170| loss: 0.00035 | val_0_mse: 0.00012 |  0:03:35s
epoch 171| loss: 0.00036 | val_0_mse: 0.00013 |  0:03:36s
epoch 172| loss: 0.00035 | val_0_mse: 0.00012 |  0:03:38s
epoch 173| loss: 0.00035 | val_0_mse: 0.00012 |  0:03:39s
epoch 174| loss: 0.00034 | val_0_mse: 0.00013 |  0:03:40s
epoch 175| loss: 0.00035 | val_0_mse: 0.00012 |  0:03:42s
epoch 176| loss: 0.00035 | val_0_mse: 0.00014 |  0:03:43s
epoch 177| loss: 0.00037 | val_0_mse: 0.00014 |  0:03:44s
epoch 178| loss: 0.00036 | val_0_mse: 0.00014 |  0:03:45s
epoch 179| loss: 0.00035 | val_0_mse: 0.00013 |  0:03:47s
epoch 180| loss: 0.00035 | val_0_mse: 0.00013 |  0:03:48s
epoch 181| loss: 0.00034 | val_0_mse: 0.00013 |  0:03:49s
epoch 182| loss: 0.00035 | val_0_mse: 0.00019 |  0:03:50s
epoch 183| loss: 0.00039 | val_0_mse: 0.00012 |  0:03:51s
epoch 184| loss: 0.00036 | val_0_mse: 0.00016 |  0:03:53s
epoch 185| loss: 0.00036 | val_0_mse: 0.00012 |  0:03:54s
epoch 186| loss: 0.00034 | val_0_mse: 0.00013 |  0:03:55s
epoch 187| los

epoch 3  | loss: 0.00461 | val_0_mse: 0.00022 |  0:00:04s
epoch 4  | loss: 0.0025  | val_0_mse: 0.00029 |  0:00:05s
epoch 5  | loss: 0.00191 | val_0_mse: 0.00025 |  0:00:07s
epoch 6  | loss: 0.00164 | val_0_mse: 0.00021 |  0:00:08s
epoch 7  | loss: 0.00145 | val_0_mse: 0.00019 |  0:00:09s
epoch 8  | loss: 0.00139 | val_0_mse: 0.00021 |  0:00:11s
epoch 9  | loss: 0.00126 | val_0_mse: 0.0002  |  0:00:12s
epoch 10 | loss: 0.00133 | val_0_mse: 0.00017 |  0:00:13s
epoch 11 | loss: 0.00116 | val_0_mse: 0.00017 |  0:00:14s
epoch 12 | loss: 0.00114 | val_0_mse: 0.00018 |  0:00:16s
epoch 13 | loss: 0.00118 | val_0_mse: 0.00018 |  0:00:17s
epoch 14 | loss: 0.00109 | val_0_mse: 0.00018 |  0:00:18s
epoch 15 | loss: 0.00103 | val_0_mse: 0.00017 |  0:00:19s
epoch 16 | loss: 0.00104 | val_0_mse: 0.00015 |  0:00:21s
epoch 17 | loss: 0.001   | val_0_mse: 0.00017 |  0:00:22s
epoch 18 | loss: 0.00101 | val_0_mse: 0.00016 |  0:00:23s
epoch 19 | loss: 0.00096 | val_0_mse: 0.00016 |  0:00:24s
epoch 20 | los

epoch 145| loss: 0.00022 | val_0_mse: 5e-05   |  0:02:56s
epoch 146| loss: 0.00024 | val_0_mse: 3e-05   |  0:02:57s
epoch 147| loss: 0.00023 | val_0_mse: 3e-05   |  0:02:58s
epoch 148| loss: 0.00022 | val_0_mse: 4e-05   |  0:02:59s
epoch 149| loss: 0.00023 | val_0_mse: 3e-05   |  0:03:00s
epoch 150| loss: 0.00022 | val_0_mse: 3e-05   |  0:03:01s
epoch 151| loss: 0.00022 | val_0_mse: 3e-05   |  0:03:02s
epoch 152| loss: 0.00023 | val_0_mse: 3e-05   |  0:03:03s
epoch 153| loss: 0.00022 | val_0_mse: 3e-05   |  0:03:04s
epoch 154| loss: 0.00022 | val_0_mse: 3e-05   |  0:03:05s
epoch 155| loss: 0.00023 | val_0_mse: 3e-05   |  0:03:06s
epoch 156| loss: 0.00021 | val_0_mse: 3e-05   |  0:03:07s
epoch 157| loss: 0.00021 | val_0_mse: 3e-05   |  0:03:08s
epoch 158| loss: 0.00021 | val_0_mse: 3e-05   |  0:03:09s
epoch 159| loss: 0.00021 | val_0_mse: 3e-05   |  0:03:10s
epoch 160| loss: 0.0002  | val_0_mse: 3e-05   |  0:03:11s
epoch 161| loss: 0.00021 | val_0_mse: 4e-05   |  0:03:13s
epoch 162| los

epoch 83 | loss: 0.00042 | val_0_mse: 0.00013 |  0:01:28s
epoch 84 | loss: 0.00041 | val_0_mse: 0.00013 |  0:01:29s
epoch 85 | loss: 0.00042 | val_0_mse: 0.00013 |  0:01:30s
epoch 86 | loss: 0.00041 | val_0_mse: 0.00013 |  0:01:31s
epoch 87 | loss: 0.00041 | val_0_mse: 0.00013 |  0:01:32s
epoch 88 | loss: 0.00041 | val_0_mse: 0.00013 |  0:01:33s
epoch 89 | loss: 0.0004  | val_0_mse: 0.00013 |  0:01:34s
epoch 90 | loss: 0.0004  | val_0_mse: 0.00013 |  0:01:35s
epoch 91 | loss: 0.0004  | val_0_mse: 0.00013 |  0:01:36s
epoch 92 | loss: 0.0004  | val_0_mse: 0.00013 |  0:01:37s
epoch 93 | loss: 0.00039 | val_0_mse: 0.00013 |  0:01:38s
epoch 94 | loss: 0.00039 | val_0_mse: 0.00013 |  0:01:39s
epoch 95 | loss: 0.00039 | val_0_mse: 0.00013 |  0:01:40s
epoch 96 | loss: 0.00039 | val_0_mse: 0.00013 |  0:01:42s
epoch 97 | loss: 0.00039 | val_0_mse: 0.00013 |  0:01:42s
epoch 98 | loss: 0.00039 | val_0_mse: 0.00013 |  0:01:43s
epoch 99 | loss: 0.00038 | val_0_mse: 0.00013 |  0:01:44s
Stop training 

epoch 121| loss: 0.00032 | val_0_mse: 4e-05   |  0:02:06s
epoch 122| loss: 0.0003  | val_0_mse: 4e-05   |  0:02:08s
epoch 123| loss: 0.00031 | val_0_mse: 4e-05   |  0:02:09s
epoch 124| loss: 0.00031 | val_0_mse: 4e-05   |  0:02:09s
epoch 125| loss: 0.00031 | val_0_mse: 4e-05   |  0:02:11s
epoch 126| loss: 0.0003  | val_0_mse: 5e-05   |  0:02:12s
epoch 127| loss: 0.0003  | val_0_mse: 4e-05   |  0:02:13s
epoch 128| loss: 0.00029 | val_0_mse: 4e-05   |  0:02:14s
epoch 129| loss: 0.00031 | val_0_mse: 4e-05   |  0:02:15s
epoch 130| loss: 0.00029 | val_0_mse: 4e-05   |  0:02:16s
epoch 131| loss: 0.00029 | val_0_mse: 4e-05   |  0:02:17s
epoch 132| loss: 0.0003  | val_0_mse: 4e-05   |  0:02:18s
epoch 133| loss: 0.0003  | val_0_mse: 5e-05   |  0:02:19s
epoch 134| loss: 0.0003  | val_0_mse: 5e-05   |  0:02:20s
epoch 135| loss: 0.0003  | val_0_mse: 5e-05   |  0:02:21s
epoch 136| loss: 0.0003  | val_0_mse: 4e-05   |  0:02:22s
epoch 137| loss: 0.00028 | val_0_mse: 4e-05   |  0:02:23s
epoch 138| los

epoch 59 | loss: 0.00067 | val_0_mse: 0.00013 |  0:01:00s
epoch 60 | loss: 0.00066 | val_0_mse: 0.00013 |  0:01:01s
epoch 61 | loss: 0.00065 | val_0_mse: 0.00014 |  0:01:02s
epoch 62 | loss: 0.00064 | val_0_mse: 0.00014 |  0:01:03s
epoch 63 | loss: 0.00064 | val_0_mse: 0.00014 |  0:01:04s
epoch 64 | loss: 0.00063 | val_0_mse: 0.00014 |  0:01:04s
epoch 65 | loss: 0.00063 | val_0_mse: 0.00014 |  0:01:05s
epoch 66 | loss: 0.00063 | val_0_mse: 0.00013 |  0:01:06s
epoch 67 | loss: 0.00062 | val_0_mse: 0.00013 |  0:01:07s
epoch 68 | loss: 0.00061 | val_0_mse: 0.00014 |  0:01:08s
epoch 69 | loss: 0.00061 | val_0_mse: 0.00015 |  0:01:09s
epoch 70 | loss: 0.00061 | val_0_mse: 0.00014 |  0:01:10s
epoch 71 | loss: 0.0006  | val_0_mse: 0.00013 |  0:01:11s
epoch 72 | loss: 0.0006  | val_0_mse: 0.00013 |  0:01:12s
epoch 73 | loss: 0.00059 | val_0_mse: 0.00013 |  0:01:13s
epoch 74 | loss: 0.00058 | val_0_mse: 0.00014 |  0:01:14s
epoch 75 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:15s
epoch 76 | los

epoch 97 | loss: 0.00046 | val_0_mse: 0.00014 |  0:01:37s
epoch 98 | loss: 0.00045 | val_0_mse: 0.00012 |  0:01:38s
epoch 99 | loss: 0.00044 | val_0_mse: 0.00012 |  0:01:39s
epoch 100| loss: 0.00044 | val_0_mse: 0.00013 |  0:01:40s
epoch 101| loss: 0.00043 | val_0_mse: 0.00012 |  0:01:41s
epoch 102| loss: 0.00043 | val_0_mse: 0.00013 |  0:01:42s
epoch 103| loss: 0.00044 | val_0_mse: 0.00013 |  0:01:43s
epoch 104| loss: 0.00046 | val_0_mse: 0.00015 |  0:01:44s
epoch 105| loss: 0.00048 | val_0_mse: 0.00013 |  0:01:45s
epoch 106| loss: 0.00043 | val_0_mse: 0.00012 |  0:01:46s
epoch 107| loss: 0.00041 | val_0_mse: 0.00012 |  0:01:48s
epoch 108| loss: 0.00042 | val_0_mse: 0.00012 |  0:01:49s
epoch 109| loss: 0.0004  | val_0_mse: 0.00011 |  0:01:50s
epoch 110| loss: 0.0004  | val_0_mse: 0.00012 |  0:01:51s
epoch 111| loss: 0.0004  | val_0_mse: 0.00011 |  0:01:52s
epoch 112| loss: 0.00038 | val_0_mse: 0.00011 |  0:01:53s
epoch 113| loss: 0.0004  | val_0_mse: 0.00011 |  0:01:54s
epoch 114| los

epoch 35 | loss: 0.00077 | val_0_mse: 0.00013 |  0:00:33s
epoch 36 | loss: 0.00076 | val_0_mse: 0.00013 |  0:00:34s
epoch 37 | loss: 0.00075 | val_0_mse: 0.00013 |  0:00:35s
epoch 38 | loss: 0.00074 | val_0_mse: 0.00013 |  0:00:35s
epoch 39 | loss: 0.00073 | val_0_mse: 0.00013 |  0:00:36s
epoch 40 | loss: 0.00072 | val_0_mse: 0.00013 |  0:00:37s
epoch 41 | loss: 0.00072 | val_0_mse: 0.00013 |  0:00:38s
epoch 42 | loss: 0.00072 | val_0_mse: 0.00013 |  0:00:39s
epoch 43 | loss: 0.0007  | val_0_mse: 0.00013 |  0:00:39s
epoch 44 | loss: 0.00068 | val_0_mse: 0.00013 |  0:00:40s
epoch 45 | loss: 0.00068 | val_0_mse: 0.00013 |  0:00:41s
epoch 46 | loss: 0.00068 | val_0_mse: 0.00013 |  0:00:42s
epoch 47 | loss: 0.00067 | val_0_mse: 0.00013 |  0:00:43s
epoch 48 | loss: 0.00066 | val_0_mse: 0.00013 |  0:00:44s
epoch 49 | loss: 0.00065 | val_0_mse: 0.00013 |  0:00:45s
epoch 50 | loss: 0.00065 | val_0_mse: 0.00013 |  0:00:45s
epoch 51 | loss: 0.00064 | val_0_mse: 0.00013 |  0:00:46s
epoch 52 | los

epoch 73 | loss: 0.0006  | val_0_mse: 0.00014 |  0:01:04s
epoch 74 | loss: 0.00059 | val_0_mse: 0.00014 |  0:01:04s
epoch 75 | loss: 0.00059 | val_0_mse: 0.00014 |  0:01:05s
epoch 76 | loss: 0.00059 | val_0_mse: 0.00014 |  0:01:06s
epoch 77 | loss: 0.00058 | val_0_mse: 0.00014 |  0:01:07s
epoch 78 | loss: 0.00059 | val_0_mse: 0.00014 |  0:01:08s
epoch 79 | loss: 0.00057 | val_0_mse: 0.00013 |  0:01:09s
epoch 80 | loss: 0.00058 | val_0_mse: 0.00014 |  0:01:10s
epoch 81 | loss: 0.00057 | val_0_mse: 0.00013 |  0:01:10s
epoch 82 | loss: 0.00057 | val_0_mse: 0.00014 |  0:01:11s
epoch 83 | loss: 0.00056 | val_0_mse: 0.00014 |  0:01:12s
epoch 84 | loss: 0.00058 | val_0_mse: 0.00014 |  0:01:13s
epoch 85 | loss: 0.00056 | val_0_mse: 0.00014 |  0:01:14s
epoch 86 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:15s
epoch 87 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:16s
epoch 88 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:17s
epoch 89 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:18s
epoch 90 | los

epoch 11 | loss: 0.00127 | val_0_mse: 0.00018 |  0:00:10s
epoch 12 | loss: 0.00122 | val_0_mse: 0.00016 |  0:00:11s
epoch 13 | loss: 0.00115 | val_0_mse: 0.00016 |  0:00:12s
epoch 14 | loss: 0.00115 | val_0_mse: 0.00016 |  0:00:12s
epoch 15 | loss: 0.00108 | val_0_mse: 0.00016 |  0:00:13s
epoch 16 | loss: 0.00105 | val_0_mse: 0.00017 |  0:00:14s
epoch 17 | loss: 0.00101 | val_0_mse: 0.00016 |  0:00:15s
epoch 18 | loss: 0.00103 | val_0_mse: 0.00017 |  0:00:16s
epoch 19 | loss: 0.00099 | val_0_mse: 0.00017 |  0:00:17s
epoch 20 | loss: 0.00096 | val_0_mse: 0.00016 |  0:00:17s
epoch 21 | loss: 0.00096 | val_0_mse: 0.00017 |  0:00:18s
epoch 22 | loss: 0.00094 | val_0_mse: 0.00018 |  0:00:19s
epoch 23 | loss: 0.00093 | val_0_mse: 0.00017 |  0:00:20s
epoch 24 | loss: 0.0009  | val_0_mse: 0.00016 |  0:00:21s
epoch 25 | loss: 0.00089 | val_0_mse: 0.00016 |  0:00:22s
epoch 26 | loss: 0.0009  | val_0_mse: 0.00017 |  0:00:23s
epoch 27 | loss: 0.00086 | val_0_mse: 0.00016 |  0:00:24s
epoch 28 | los

epoch 49 | loss: 0.00074 | val_0_mse: 0.00015 |  0:00:43s
epoch 50 | loss: 0.00073 | val_0_mse: 0.00015 |  0:00:44s
epoch 51 | loss: 0.00071 | val_0_mse: 0.00015 |  0:00:45s
epoch 52 | loss: 0.00071 | val_0_mse: 0.00015 |  0:00:46s
epoch 53 | loss: 0.00071 | val_0_mse: 0.00016 |  0:00:47s
epoch 54 | loss: 0.00071 | val_0_mse: 0.00015 |  0:00:48s
epoch 55 | loss: 0.0007  | val_0_mse: 0.00015 |  0:00:49s
epoch 56 | loss: 0.00069 | val_0_mse: 0.00015 |  0:00:49s
epoch 57 | loss: 0.00068 | val_0_mse: 0.00015 |  0:00:50s
epoch 58 | loss: 0.00068 | val_0_mse: 0.00014 |  0:00:51s
epoch 59 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:52s
epoch 60 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:53s
epoch 61 | loss: 0.00067 | val_0_mse: 0.00015 |  0:00:54s
epoch 62 | loss: 0.00066 | val_0_mse: 0.00016 |  0:00:55s
epoch 63 | loss: 0.00065 | val_0_mse: 0.00014 |  0:00:55s
epoch 64 | loss: 0.00065 | val_0_mse: 0.00014 |  0:00:56s
epoch 65 | loss: 0.00065 | val_0_mse: 0.00015 |  0:00:57s
epoch 66 | los

epoch 191| loss: 0.00028 | val_0_mse: 8e-05   |  0:02:49s
epoch 192| loss: 0.00027 | val_0_mse: 8e-05   |  0:02:50s
epoch 193| loss: 0.00027 | val_0_mse: 7e-05   |  0:02:51s
epoch 194| loss: 0.00028 | val_0_mse: 7e-05   |  0:02:52s
epoch 195| loss: 0.00027 | val_0_mse: 7e-05   |  0:02:53s
epoch 196| loss: 0.00028 | val_0_mse: 7e-05   |  0:02:54s
epoch 197| loss: 0.00026 | val_0_mse: 8e-05   |  0:02:55s
epoch 198| loss: 0.00026 | val_0_mse: 8e-05   |  0:02:56s
epoch 199| loss: 0.00027 | val_0_mse: 7e-05   |  0:02:57s
Stop training because you reached max_epochs = 200 with best_epoch = 196 and best_val_0_mse = 7e-05
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/no_neutral_8hours/roberta-low-3.zip
Device used : cuda
epoch 0  | loss: 0.60147 | val_0_mse: 0.04063 |  0:00:00s
epoch 1  | loss: 0.17262 | val_0_mse: 0.01267 |  0:00:01s
epoch 2  | loss: 0.03906 | val_0_mse: 0.00755 |  0:00:03s
epoch 3  | loss: 0.01386 | val_0_mse: 0.00187 |  0:00:04s
epoc

epoch 24 | loss: 0.00126 | val_0_mse: 0.00017 |  0:00:26s
epoch 25 | loss: 0.00105 | val_0_mse: 0.00017 |  0:00:27s
epoch 26 | loss: 0.00104 | val_0_mse: 0.00015 |  0:00:28s
epoch 27 | loss: 0.00103 | val_0_mse: 0.00015 |  0:00:29s
epoch 28 | loss: 0.00103 | val_0_mse: 0.00015 |  0:00:30s
epoch 29 | loss: 0.001   | val_0_mse: 0.00016 |  0:00:31s
epoch 30 | loss: 0.00101 | val_0_mse: 0.00016 |  0:00:32s
epoch 31 | loss: 0.00101 | val_0_mse: 0.00015 |  0:00:33s
epoch 32 | loss: 0.001   | val_0_mse: 0.00015 |  0:00:34s
epoch 33 | loss: 0.00102 | val_0_mse: 0.00016 |  0:00:35s
epoch 34 | loss: 0.00104 | val_0_mse: 0.00015 |  0:00:36s
epoch 35 | loss: 0.00101 | val_0_mse: 0.00015 |  0:00:37s
epoch 36 | loss: 0.00099 | val_0_mse: 0.00015 |  0:00:38s
epoch 37 | loss: 0.00098 | val_0_mse: 0.00015 |  0:00:39s
epoch 38 | loss: 0.00099 | val_0_mse: 0.00016 |  0:00:40s
epoch 39 | loss: 0.00097 | val_0_mse: 0.00015 |  0:00:41s
epoch 40 | loss: 0.00094 | val_0_mse: 0.00015 |  0:00:42s
epoch 41 | los

epoch 166| loss: 0.00043 | val_0_mse: 8e-05   |  0:03:26s
epoch 167| loss: 0.00043 | val_0_mse: 8e-05   |  0:03:27s
epoch 168| loss: 0.00043 | val_0_mse: 8e-05   |  0:03:28s
epoch 169| loss: 0.00043 | val_0_mse: 8e-05   |  0:03:30s
epoch 170| loss: 0.00043 | val_0_mse: 8e-05   |  0:03:31s
epoch 171| loss: 0.00043 | val_0_mse: 8e-05   |  0:03:32s
epoch 172| loss: 0.00041 | val_0_mse: 8e-05   |  0:03:34s
epoch 173| loss: 0.00042 | val_0_mse: 8e-05   |  0:03:35s
epoch 174| loss: 0.00041 | val_0_mse: 8e-05   |  0:03:36s
epoch 175| loss: 0.00041 | val_0_mse: 0.00011 |  0:03:38s
epoch 176| loss: 0.00044 | val_0_mse: 0.0001  |  0:03:39s
epoch 177| loss: 0.00044 | val_0_mse: 8e-05   |  0:03:40s
epoch 178| loss: 0.0004  | val_0_mse: 8e-05   |  0:03:41s
epoch 179| loss: 0.00041 | val_0_mse: 9e-05   |  0:03:43s
epoch 180| loss: 0.00041 | val_0_mse: 9e-05   |  0:03:44s
epoch 181| loss: 0.0004  | val_0_mse: 8e-05   |  0:03:45s
epoch 182| loss: 0.0004  | val_0_mse: 8e-05   |  0:03:46s
epoch 183| los

In [None]:
if not os.path.exists(checkpoint_save_dir):
    os.makedirs(checkpoint_save_dir, exist_ok=True)
with open(saveas, 'w') as f:
    for key in results.keys():
        f.write("%s, %s\n" % (key, results[key]))