In [1]:
import os
opj = os.path.join

import ccxt 
import matplotlib.pyplot as plt 
import pandas as pd 
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor 
from sklearn.metrics import f1_score, mean_squared_error, mean_absolute_error
import torch

In [2]:
df_dir = "data/8hours"
df_files = [
    "crypto-gt.csv",
    "deberta.csv",
    "roberta.csv",
    "bert.csv"
]

experiment_name = "sentiment_all_8hours"
saveas = opj("results", f"{experiment_name}.csv")
checkpoint_save_dir = opj("ckpts", experiment_name)
exp_times = 10
results = {}

In [3]:
def experiment(df_dir, df_file, N):
    exp_name = df_file.split(".")[0]
    # load data
    data_dir = opj(df_dir, df_file)
    chart_df = pd.read_csv(data_dir)
    i1 = 0
    i2 = len(chart_df)-1
    st = "2018-02-14 14:00:00"
    en = "2022-04-15 23:00:00"
    filt = []
    for index, row in chart_df.iterrows():
        if chart_df.iloc[i1]['datetime'] < row.datetime and row.datetime < st:
            i1 = index
        if en < row.datetime and row.datetime < chart_df.iloc[i2]['datetime']:
            i2 = index
    chart_df = chart_df[i1+1:i2]
    chart_df.index = range(len(chart_df))
    
    # Train High Model
    ## train/validation/test split 
    train_size = int(chart_df.shape[0] * 0.8) 
    train_df = chart_df.iloc[:train_size,:] 

    val_size = int(chart_df.shape[0] * 0.1) 
    val_df = chart_df.iloc[train_size:train_size+val_size,:]  

    test_df = chart_df.iloc[train_size+val_size:, :] 
    
    categorical_columns = ["months", "days", "hours"]
    features = train_df.columns

    cat_idxs = [0, 1, 2] 
    cat_dims = [13, 32, 25] 

    tabnet_params = {"cat_idxs":cat_idxs, 
                     "cat_dims":cat_dims, 
                     "cat_emb_dim":1, 
                     "optimizer_fn":torch.optim.Adam,
                     "seed": N
                    } 
    
    input_columns = [] 
    for col in train_df.columns:
        if col != 'low_delta' and col != 'years' and col != 'datetime': 
            input_columns.append(col) 

    X_train = train_df[input_columns].values 
    Y_train = train_df['high_delta'].values 
    Y_train = Y_train.reshape((-1,1))

    X_val = val_df[input_columns].values
    Y_val = val_df['high_delta'].values 
    Y_val = Y_val.reshape((-1,1))

    X_test = test_df[input_columns].values 
    Y_test = test_df['high_delta'].values  
    Y_test = Y_test.reshape((-1,1))
    
    reg_high = TabNetRegressor(**tabnet_params) 

    reg_high.fit(X_train, Y_train, 
                 eval_set=[(X_val, Y_val)], 
                 max_epochs=100, 
                 patience=100)  
    
    Y_pred = reg_high.predict(X_test).flatten() 
    
    exp = f"{exp_name}-high"
    if exp not in results:
        results[exp] = []
        
    results[exp].append(
        {
            "MSE": mean_squared_error(Y_test, Y_pred),
            "MAE": mean_absolute_error(Y_test, Y_pred)
        }
    )
    
    saveas = opj(checkpoint_save_dir, f"{exp}-{N}")
    reg_high.save_model(saveas)
    
    # Train Low Model
    input_columns = [] 
    for col in train_df.columns:
        if col != 'high_delta' and col != 'years' and col != 'datetime': 
            input_columns.append(col) 

    X_train = train_df[input_columns].values 
    Y_train = train_df['low_delta'].values 
    Y_train = Y_train.reshape((-1,1))

    X_val = val_df[input_columns].values
    Y_val = val_df['low_delta'].values 
    Y_val = Y_val.reshape((-1,1))

    X_test = test_df[input_columns].values 
    Y_test = test_df['low_delta'].values  
    Y_test = Y_test.reshape((-1,1))
    
    reg_low = TabNetRegressor(**tabnet_params) 

    reg_low.fit(X_train, Y_train, 
                eval_set=[(X_val, Y_val)], 
                max_epochs=200, 
                patience=200)  
    
    Y_pred = reg_low.predict(X_test).flatten() 
    exp = f"{exp_name}-low"
    if exp not in results:
        results[exp] = []
        
    results[exp].append(
        {
            "MSE": mean_squared_error(Y_test, Y_pred),
            "MAE": mean_absolute_error(Y_test, Y_pred)
        }
    )
    
    saveas = opj(checkpoint_save_dir, f"{exp}-{N}")
    reg_high.save_model(saveas)

In [None]:
for df_file in df_files:
    for i in range(exp_times):
        experiment(df_dir, df_file, i)

Device used : cuda
epoch 0  | loss: 0.2573  | val_0_mse: 0.00587 |  0:00:00s
epoch 1  | loss: 0.03208 | val_0_mse: 0.00136 |  0:00:01s
epoch 2  | loss: 0.00714 | val_0_mse: 0.00081 |  0:00:02s
epoch 3  | loss: 0.00271 | val_0_mse: 0.00015 |  0:00:03s
epoch 4  | loss: 0.00183 | val_0_mse: 0.00014 |  0:00:04s
epoch 5  | loss: 0.00144 | val_0_mse: 0.00014 |  0:00:04s
epoch 6  | loss: 0.00137 | val_0_mse: 0.00018 |  0:00:05s
epoch 7  | loss: 0.00127 | val_0_mse: 0.00015 |  0:00:06s
epoch 8  | loss: 0.00127 | val_0_mse: 0.00015 |  0:00:07s
epoch 9  | loss: 0.00116 | val_0_mse: 0.00015 |  0:00:07s
epoch 10 | loss: 0.00113 | val_0_mse: 0.00015 |  0:00:08s
epoch 11 | loss: 0.00111 | val_0_mse: 0.00015 |  0:00:09s
epoch 12 | loss: 0.00109 | val_0_mse: 0.00015 |  0:00:10s
epoch 13 | loss: 0.00105 | val_0_mse: 0.00014 |  0:00:10s
epoch 14 | loss: 0.00104 | val_0_mse: 0.00013 |  0:00:11s
epoch 15 | loss: 0.00103 | val_0_mse: 0.00015 |  0:00:12s
epoch 16 | loss: 0.00102 | val_0_mse: 0.00015 |  0:00

epoch 37 | loss: 0.00074 | val_0_mse: 0.00017 |  0:00:27s
epoch 38 | loss: 0.00073 | val_0_mse: 0.00016 |  0:00:28s
epoch 39 | loss: 0.00073 | val_0_mse: 0.00017 |  0:00:29s
epoch 40 | loss: 0.00072 | val_0_mse: 0.00016 |  0:00:30s
epoch 41 | loss: 0.0007  | val_0_mse: 0.00016 |  0:00:30s
epoch 42 | loss: 0.0007  | val_0_mse: 0.00016 |  0:00:31s
epoch 43 | loss: 0.00069 | val_0_mse: 0.00016 |  0:00:32s
epoch 44 | loss: 0.00068 | val_0_mse: 0.00015 |  0:00:32s
epoch 45 | loss: 0.00067 | val_0_mse: 0.00016 |  0:00:33s
epoch 46 | loss: 0.00067 | val_0_mse: 0.00016 |  0:00:34s
epoch 47 | loss: 0.00065 | val_0_mse: 0.00015 |  0:00:35s
epoch 48 | loss: 0.00066 | val_0_mse: 0.00015 |  0:00:35s
epoch 49 | loss: 0.00065 | val_0_mse: 0.00015 |  0:00:36s
epoch 50 | loss: 0.00064 | val_0_mse: 0.00015 |  0:00:37s
epoch 51 | loss: 0.00064 | val_0_mse: 0.00015 |  0:00:38s
epoch 52 | loss: 0.00063 | val_0_mse: 0.00015 |  0:00:39s
epoch 53 | loss: 0.00063 | val_0_mse: 0.00015 |  0:00:39s
epoch 54 | los

epoch 179| loss: 0.00036 | val_0_mse: 0.00013 |  0:02:15s
epoch 180| loss: 0.00036 | val_0_mse: 0.00013 |  0:02:16s
epoch 181| loss: 0.00036 | val_0_mse: 0.00014 |  0:02:17s
epoch 182| loss: 0.00037 | val_0_mse: 0.00015 |  0:02:17s
epoch 183| loss: 0.00038 | val_0_mse: 0.00013 |  0:02:18s
epoch 184| loss: 0.00037 | val_0_mse: 0.00013 |  0:02:19s
epoch 185| loss: 0.00036 | val_0_mse: 0.00013 |  0:02:20s
epoch 186| loss: 0.00036 | val_0_mse: 0.00013 |  0:02:20s
epoch 187| loss: 0.00036 | val_0_mse: 0.00013 |  0:02:21s
epoch 188| loss: 0.00035 | val_0_mse: 0.00013 |  0:02:22s
epoch 189| loss: 0.00035 | val_0_mse: 0.00013 |  0:02:23s
epoch 190| loss: 0.00036 | val_0_mse: 0.00013 |  0:02:24s
epoch 191| loss: 0.00035 | val_0_mse: 0.00014 |  0:02:24s
epoch 192| loss: 0.00035 | val_0_mse: 0.00013 |  0:02:25s
epoch 193| loss: 0.00035 | val_0_mse: 0.00013 |  0:02:26s
epoch 194| loss: 0.00035 | val_0_mse: 0.00014 |  0:02:26s
epoch 195| loss: 0.00035 | val_0_mse: 0.00013 |  0:02:27s
epoch 196| los

epoch 12 | loss: 0.00137 | val_0_mse: 0.00018 |  0:00:09s
epoch 13 | loss: 0.0013  | val_0_mse: 0.00019 |  0:00:10s
epoch 14 | loss: 0.00127 | val_0_mse: 0.00017 |  0:00:11s
epoch 15 | loss: 0.00122 | val_0_mse: 0.00017 |  0:00:11s
epoch 16 | loss: 0.0012  | val_0_mse: 0.00017 |  0:00:12s
epoch 17 | loss: 0.00116 | val_0_mse: 0.00017 |  0:00:13s
epoch 18 | loss: 0.00113 | val_0_mse: 0.00016 |  0:00:14s
epoch 19 | loss: 0.00112 | val_0_mse: 0.00016 |  0:00:14s
epoch 20 | loss: 0.0011  | val_0_mse: 0.00016 |  0:00:15s
epoch 21 | loss: 0.0011  | val_0_mse: 0.00017 |  0:00:16s
epoch 22 | loss: 0.00109 | val_0_mse: 0.00016 |  0:00:17s
epoch 23 | loss: 0.00108 | val_0_mse: 0.00017 |  0:00:17s
epoch 24 | loss: 0.00105 | val_0_mse: 0.00016 |  0:00:18s
epoch 25 | loss: 0.00106 | val_0_mse: 0.00016 |  0:00:19s
epoch 26 | loss: 0.00102 | val_0_mse: 0.00018 |  0:00:20s
epoch 27 | loss: 0.00102 | val_0_mse: 0.00016 |  0:00:20s
epoch 28 | loss: 0.00101 | val_0_mse: 0.00016 |  0:00:21s
epoch 29 | los

epoch 154| loss: 0.00041 | val_0_mse: 0.00013 |  0:01:56s
epoch 155| loss: 0.00041 | val_0_mse: 0.00013 |  0:01:57s
epoch 156| loss: 0.00041 | val_0_mse: 0.00013 |  0:01:58s
epoch 157| loss: 0.0004  | val_0_mse: 0.00013 |  0:01:59s
epoch 158| loss: 0.0004  | val_0_mse: 0.00012 |  0:01:59s
epoch 159| loss: 0.0004  | val_0_mse: 0.00013 |  0:02:00s
epoch 160| loss: 0.0004  | val_0_mse: 0.00013 |  0:02:01s
epoch 161| loss: 0.0004  | val_0_mse: 0.00013 |  0:02:02s
epoch 162| loss: 0.0004  | val_0_mse: 0.00013 |  0:02:02s
epoch 163| loss: 0.0004  | val_0_mse: 0.00013 |  0:02:03s
epoch 164| loss: 0.0004  | val_0_mse: 0.00013 |  0:02:04s
epoch 165| loss: 0.00039 | val_0_mse: 0.00013 |  0:02:05s
epoch 166| loss: 0.00039 | val_0_mse: 0.00013 |  0:02:05s
epoch 167| loss: 0.0004  | val_0_mse: 0.00013 |  0:02:06s
epoch 168| loss: 0.0004  | val_0_mse: 0.00013 |  0:02:07s
epoch 169| loss: 0.00039 | val_0_mse: 0.00013 |  0:02:08s
epoch 170| loss: 0.0004  | val_0_mse: 0.00014 |  0:02:08s
epoch 171| los

epoch 91 | loss: 0.00048 | val_0_mse: 9e-05   |  0:01:09s
epoch 92 | loss: 0.00047 | val_0_mse: 0.0001  |  0:01:10s
epoch 93 | loss: 0.00047 | val_0_mse: 0.0001  |  0:01:10s
epoch 94 | loss: 0.00047 | val_0_mse: 8e-05   |  0:01:11s
epoch 95 | loss: 0.00047 | val_0_mse: 9e-05   |  0:01:12s
epoch 96 | loss: 0.00045 | val_0_mse: 9e-05   |  0:01:13s
epoch 97 | loss: 0.00045 | val_0_mse: 8e-05   |  0:01:14s
epoch 98 | loss: 0.00044 | val_0_mse: 8e-05   |  0:01:14s
epoch 99 | loss: 0.00044 | val_0_mse: 9e-05   |  0:01:15s
Stop training because you reached max_epochs = 100 with best_epoch = 98 and best_val_0_mse = 8e-05
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sentiment_all_8hours/crypto-gt-high-2.zip
Device used : cuda
epoch 0  | loss: 3.48614 | val_0_mse: 0.15736 |  0:00:00s
epoch 1  | loss: 0.31414 | val_0_mse: 0.08014 |  0:00:01s
epoch 2  | loss: 0.08006 | val_0_mse: 0.03286 |  0:00:02s
epoch 3  | loss: 0.02843 | val_0_mse: 0.00374 |  0:00:02s

epoch 128| loss: 0.00037 | val_0_mse: 0.0001  |  0:01:38s
epoch 129| loss: 0.00035 | val_0_mse: 0.0001  |  0:01:38s
epoch 130| loss: 0.00035 | val_0_mse: 0.0001  |  0:01:39s
epoch 131| loss: 0.00035 | val_0_mse: 0.0001  |  0:01:40s
epoch 132| loss: 0.00035 | val_0_mse: 0.0001  |  0:01:41s
epoch 133| loss: 0.00034 | val_0_mse: 0.00011 |  0:01:41s
epoch 134| loss: 0.00035 | val_0_mse: 0.00011 |  0:01:42s
epoch 135| loss: 0.00035 | val_0_mse: 0.00011 |  0:01:43s
epoch 136| loss: 0.00035 | val_0_mse: 0.0001  |  0:01:44s
epoch 137| loss: 0.00034 | val_0_mse: 0.0001  |  0:01:44s
epoch 138| loss: 0.00034 | val_0_mse: 0.0001  |  0:01:45s
epoch 139| loss: 0.00033 | val_0_mse: 0.0001  |  0:01:46s
epoch 140| loss: 0.00033 | val_0_mse: 0.0001  |  0:01:47s
epoch 141| loss: 0.00033 | val_0_mse: 0.0001  |  0:01:48s
epoch 142| loss: 0.00033 | val_0_mse: 0.0001  |  0:01:49s
epoch 143| loss: 0.00033 | val_0_mse: 0.0001  |  0:01:49s
epoch 144| loss: 0.00032 | val_0_mse: 0.0001  |  0:01:50s
epoch 145| los

epoch 65 | loss: 0.00058 | val_0_mse: 0.00013 |  0:00:52s
epoch 66 | loss: 0.00058 | val_0_mse: 0.00014 |  0:00:53s
epoch 67 | loss: 0.00057 | val_0_mse: 0.00016 |  0:00:54s
epoch 68 | loss: 0.00056 | val_0_mse: 0.00014 |  0:00:55s
epoch 69 | loss: 0.00057 | val_0_mse: 0.00014 |  0:00:56s
epoch 70 | loss: 0.00056 | val_0_mse: 0.00013 |  0:00:57s
epoch 71 | loss: 0.00054 | val_0_mse: 0.00014 |  0:00:58s
epoch 72 | loss: 0.00054 | val_0_mse: 0.00014 |  0:00:59s
epoch 73 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:00s
epoch 74 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:01s
epoch 75 | loss: 0.00052 | val_0_mse: 0.00013 |  0:01:02s
epoch 76 | loss: 0.00052 | val_0_mse: 0.00013 |  0:01:03s
epoch 77 | loss: 0.00052 | val_0_mse: 0.00013 |  0:01:03s
epoch 78 | loss: 0.00051 | val_0_mse: 0.00013 |  0:01:04s
epoch 79 | loss: 0.00051 | val_0_mse: 0.00013 |  0:01:05s
epoch 80 | loss: 0.0005  | val_0_mse: 0.00013 |  0:01:06s
epoch 81 | loss: 0.0005  | val_0_mse: 0.00012 |  0:01:07s
epoch 82 | los

epoch 102| loss: 0.00056 | val_0_mse: 0.00014 |  0:01:42s
epoch 103| loss: 0.00055 | val_0_mse: 0.00015 |  0:01:43s
epoch 104| loss: 0.00055 | val_0_mse: 0.00014 |  0:01:44s
epoch 105| loss: 0.00055 | val_0_mse: 0.00014 |  0:01:45s
epoch 106| loss: 0.00054 | val_0_mse: 0.00014 |  0:01:46s
epoch 107| loss: 0.00054 | val_0_mse: 0.00014 |  0:01:47s
epoch 108| loss: 0.00054 | val_0_mse: 0.00014 |  0:01:47s
epoch 109| loss: 0.00054 | val_0_mse: 0.00014 |  0:01:48s
epoch 110| loss: 0.00054 | val_0_mse: 0.00014 |  0:01:49s
epoch 111| loss: 0.00054 | val_0_mse: 0.00014 |  0:01:50s
epoch 112| loss: 0.00055 | val_0_mse: 0.00014 |  0:01:51s
epoch 113| loss: 0.00053 | val_0_mse: 0.00014 |  0:01:52s
epoch 114| loss: 0.00055 | val_0_mse: 0.00014 |  0:01:53s
epoch 115| loss: 0.00053 | val_0_mse: 0.00014 |  0:01:54s
epoch 116| loss: 0.00053 | val_0_mse: 0.00014 |  0:01:55s
epoch 117| loss: 0.00053 | val_0_mse: 0.00014 |  0:01:56s
epoch 118| loss: 0.00053 | val_0_mse: 0.00015 |  0:01:57s
epoch 119| los

epoch 39 | loss: 0.00068 | val_0_mse: 0.00019 |  0:00:39s
epoch 40 | loss: 0.00068 | val_0_mse: 0.00018 |  0:00:39s
epoch 41 | loss: 0.00067 | val_0_mse: 0.00017 |  0:00:40s
epoch 42 | loss: 0.00067 | val_0_mse: 0.00018 |  0:00:41s
epoch 43 | loss: 0.00066 | val_0_mse: 0.00019 |  0:00:42s
epoch 44 | loss: 0.00065 | val_0_mse: 0.00018 |  0:00:43s
epoch 45 | loss: 0.00064 | val_0_mse: 0.00018 |  0:00:44s
epoch 46 | loss: 0.00064 | val_0_mse: 0.00018 |  0:00:45s
epoch 47 | loss: 0.00063 | val_0_mse: 0.00017 |  0:00:46s
epoch 48 | loss: 0.00063 | val_0_mse: 0.00018 |  0:00:47s
epoch 49 | loss: 0.00062 | val_0_mse: 0.00016 |  0:00:48s
epoch 50 | loss: 0.00061 | val_0_mse: 0.00017 |  0:00:49s
epoch 51 | loss: 0.00062 | val_0_mse: 0.00016 |  0:00:50s
epoch 52 | loss: 0.00063 | val_0_mse: 0.00018 |  0:00:50s
epoch 53 | loss: 0.00061 | val_0_mse: 0.00018 |  0:00:51s
epoch 54 | loss: 0.00061 | val_0_mse: 0.00017 |  0:00:52s
epoch 55 | loss: 0.00066 | val_0_mse: 0.00019 |  0:00:53s
epoch 56 | los

epoch 76 | loss: 0.00061 | val_0_mse: 0.00015 |  0:01:14s
epoch 77 | loss: 0.0006  | val_0_mse: 0.00014 |  0:01:15s
epoch 78 | loss: 0.0006  | val_0_mse: 0.00014 |  0:01:16s
epoch 79 | loss: 0.00059 | val_0_mse: 0.00014 |  0:01:17s
epoch 80 | loss: 0.0006  | val_0_mse: 0.00016 |  0:01:18s
epoch 81 | loss: 0.00058 | val_0_mse: 0.00015 |  0:01:19s
epoch 82 | loss: 0.00057 | val_0_mse: 0.00014 |  0:01:20s
epoch 83 | loss: 0.00057 | val_0_mse: 0.00015 |  0:01:21s
epoch 84 | loss: 0.00057 | val_0_mse: 0.00017 |  0:01:21s
epoch 85 | loss: 0.00056 | val_0_mse: 0.00015 |  0:01:22s
epoch 86 | loss: 0.00055 | val_0_mse: 0.00013 |  0:01:23s
epoch 87 | loss: 0.00055 | val_0_mse: 0.00013 |  0:01:24s
epoch 88 | loss: 0.00054 | val_0_mse: 0.00015 |  0:01:25s
epoch 89 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:25s
epoch 90 | loss: 0.00055 | val_0_mse: 0.00013 |  0:01:26s
epoch 91 | loss: 0.00054 | val_0_mse: 0.00015 |  0:01:27s
epoch 92 | loss: 0.00054 | val_0_mse: 0.00017 |  0:01:28s
epoch 93 | los

epoch 13 | loss: 0.0011  | val_0_mse: 0.00018 |  0:00:14s
epoch 14 | loss: 0.00113 | val_0_mse: 0.00017 |  0:00:15s
epoch 15 | loss: 0.00106 | val_0_mse: 0.00018 |  0:00:16s
epoch 16 | loss: 0.00102 | val_0_mse: 0.00017 |  0:00:17s
epoch 17 | loss: 0.001   | val_0_mse: 0.00018 |  0:00:18s
epoch 18 | loss: 0.00103 | val_0_mse: 0.00017 |  0:00:19s
epoch 19 | loss: 0.00096 | val_0_mse: 0.00017 |  0:00:20s
epoch 20 | loss: 0.00093 | val_0_mse: 0.00017 |  0:00:21s
epoch 21 | loss: 0.00093 | val_0_mse: 0.00017 |  0:00:22s
epoch 22 | loss: 0.00091 | val_0_mse: 0.00017 |  0:00:23s
epoch 23 | loss: 0.00089 | val_0_mse: 0.00017 |  0:00:24s
epoch 24 | loss: 0.00088 | val_0_mse: 0.00016 |  0:00:25s
epoch 25 | loss: 0.00088 | val_0_mse: 0.00016 |  0:00:26s
epoch 26 | loss: 0.00085 | val_0_mse: 0.00016 |  0:00:27s
epoch 27 | loss: 0.00085 | val_0_mse: 0.00015 |  0:00:28s
epoch 28 | loss: 0.00085 | val_0_mse: 0.00016 |  0:00:29s
epoch 29 | loss: 0.00082 | val_0_mse: 0.00015 |  0:00:30s
epoch 30 | los

epoch 50 | loss: 0.00075 | val_0_mse: 0.00015 |  0:00:50s
epoch 51 | loss: 0.00073 | val_0_mse: 0.00016 |  0:00:51s
epoch 52 | loss: 0.00073 | val_0_mse: 0.00015 |  0:00:52s
epoch 53 | loss: 0.0007  | val_0_mse: 0.00013 |  0:00:53s
epoch 54 | loss: 0.00069 | val_0_mse: 0.00014 |  0:00:54s
epoch 55 | loss: 0.00068 | val_0_mse: 0.00013 |  0:00:55s
epoch 56 | loss: 0.00066 | val_0_mse: 0.00013 |  0:00:56s
epoch 57 | loss: 0.00068 | val_0_mse: 0.00013 |  0:00:57s
epoch 58 | loss: 0.00067 | val_0_mse: 0.00016 |  0:00:58s
epoch 59 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:58s
epoch 60 | loss: 0.00064 | val_0_mse: 0.00013 |  0:00:59s
epoch 61 | loss: 0.00062 | val_0_mse: 0.00015 |  0:01:00s
epoch 62 | loss: 0.00061 | val_0_mse: 0.00013 |  0:01:01s
epoch 63 | loss: 0.00061 | val_0_mse: 0.00015 |  0:01:02s
epoch 64 | loss: 0.00062 | val_0_mse: 0.00013 |  0:01:03s
epoch 65 | loss: 0.00059 | val_0_mse: 0.00013 |  0:01:04s
epoch 66 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:05s
epoch 67 | los

epoch 192| loss: 0.00025 | val_0_mse: 9e-05   |  0:03:10s
epoch 193| loss: 0.00026 | val_0_mse: 8e-05   |  0:03:11s
epoch 194| loss: 0.00025 | val_0_mse: 9e-05   |  0:03:12s
epoch 195| loss: 0.00026 | val_0_mse: 8e-05   |  0:03:13s
epoch 196| loss: 0.00025 | val_0_mse: 8e-05   |  0:03:14s
epoch 197| loss: 0.00026 | val_0_mse: 8e-05   |  0:03:15s
epoch 198| loss: 0.00025 | val_0_mse: 8e-05   |  0:03:16s
epoch 199| loss: 0.00027 | val_0_mse: 7e-05   |  0:03:17s
Stop training because you reached max_epochs = 200 with best_epoch = 199 and best_val_0_mse = 7e-05
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sentiment_all_8hours/crypto-gt-low-5.zip
Device used : cuda
epoch 0  | loss: 0.29053 | val_0_mse: 0.00943 |  0:00:01s
epoch 1  | loss: 0.05089 | val_0_mse: 0.00609 |  0:00:01s
epoch 2  | loss: 0.01686 | val_0_mse: 0.00672 |  0:00:02s
epoch 3  | loss: 0.00674 | val_0_mse: 0.00516 |  0:00:03s
epoch 4  | loss: 0.00402 | val_0_mse: 0.00173 |  0:00:04s

epoch 25 | loss: 0.00098 | val_0_mse: 0.00014 |  0:00:24s
epoch 26 | loss: 0.00095 | val_0_mse: 0.00013 |  0:00:25s
epoch 27 | loss: 0.00094 | val_0_mse: 0.00014 |  0:00:26s
epoch 28 | loss: 0.00093 | val_0_mse: 0.00014 |  0:00:27s
epoch 29 | loss: 0.00091 | val_0_mse: 0.00014 |  0:00:28s
epoch 30 | loss: 0.00089 | val_0_mse: 0.00014 |  0:00:29s
epoch 31 | loss: 0.00087 | val_0_mse: 0.00014 |  0:00:30s
epoch 32 | loss: 0.00085 | val_0_mse: 0.00014 |  0:00:31s
epoch 33 | loss: 0.00084 | val_0_mse: 0.00014 |  0:00:32s
epoch 34 | loss: 0.00081 | val_0_mse: 0.00014 |  0:00:33s
epoch 35 | loss: 0.0008  | val_0_mse: 0.00013 |  0:00:34s
epoch 36 | loss: 0.00077 | val_0_mse: 0.00013 |  0:00:35s
epoch 37 | loss: 0.00076 | val_0_mse: 0.00013 |  0:00:36s
epoch 38 | loss: 0.00074 | val_0_mse: 0.00013 |  0:00:37s
epoch 39 | loss: 0.00072 | val_0_mse: 0.00013 |  0:00:38s
epoch 40 | loss: 0.0007  | val_0_mse: 0.00014 |  0:00:38s
epoch 41 | loss: 0.00068 | val_0_mse: 0.00014 |  0:00:39s
epoch 42 | los

epoch 167| loss: 0.0004  | val_0_mse: 0.00013 |  0:02:29s
epoch 168| loss: 0.00039 | val_0_mse: 0.00013 |  0:02:30s
epoch 169| loss: 0.00039 | val_0_mse: 0.00013 |  0:02:31s
epoch 170| loss: 0.00039 | val_0_mse: 0.00013 |  0:02:31s
epoch 171| loss: 0.0004  | val_0_mse: 0.00013 |  0:02:32s
epoch 172| loss: 0.00039 | val_0_mse: 0.00013 |  0:02:33s
epoch 173| loss: 0.00038 | val_0_mse: 0.00013 |  0:02:33s
epoch 174| loss: 0.00038 | val_0_mse: 0.00013 |  0:02:34s
epoch 175| loss: 0.00038 | val_0_mse: 0.00013 |  0:02:35s
epoch 176| loss: 0.00038 | val_0_mse: 0.00013 |  0:02:36s
epoch 177| loss: 0.00037 | val_0_mse: 0.00013 |  0:02:36s
epoch 178| loss: 0.00038 | val_0_mse: 0.00013 |  0:02:37s
epoch 179| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:38s
epoch 180| loss: 0.00038 | val_0_mse: 0.00013 |  0:02:39s
epoch 181| loss: 0.00038 | val_0_mse: 0.00013 |  0:02:40s
epoch 182| loss: 0.00037 | val_0_mse: 0.00013 |  0:02:41s
epoch 183| loss: 0.00037 | val_0_mse: 0.00013 |  0:02:41s
epoch 184| los

epoch 0  | loss: 0.55189 | val_0_mse: 0.02773 |  0:00:00s
epoch 1  | loss: 0.08962 | val_0_mse: 0.00328 |  0:00:01s
epoch 2  | loss: 0.02377 | val_0_mse: 0.00129 |  0:00:02s
epoch 3  | loss: 0.0091  | val_0_mse: 0.00109 |  0:00:03s
epoch 4  | loss: 0.0049  | val_0_mse: 0.00063 |  0:00:04s
epoch 5  | loss: 0.00337 | val_0_mse: 0.00043 |  0:00:05s
epoch 6  | loss: 0.00254 | val_0_mse: 0.0004  |  0:00:06s
epoch 7  | loss: 0.00217 | val_0_mse: 0.00037 |  0:00:06s
epoch 8  | loss: 0.00173 | val_0_mse: 0.00033 |  0:00:07s
epoch 9  | loss: 0.0016  | val_0_mse: 0.00029 |  0:00:08s
epoch 10 | loss: 0.0014  | val_0_mse: 0.00027 |  0:00:09s
epoch 11 | loss: 0.00137 | val_0_mse: 0.00025 |  0:00:09s
epoch 12 | loss: 0.00134 | val_0_mse: 0.00025 |  0:00:10s
epoch 13 | loss: 0.00128 | val_0_mse: 0.00024 |  0:00:11s
epoch 14 | loss: 0.00126 | val_0_mse: 0.00023 |  0:00:12s
epoch 15 | loss: 0.00121 | val_0_mse: 0.00023 |  0:00:13s
epoch 16 | loss: 0.00114 | val_0_mse: 0.00023 |  0:00:13s
epoch 17 | los

epoch 142| loss: 0.00042 | val_0_mse: 0.00014 |  0:01:49s
epoch 143| loss: 0.00042 | val_0_mse: 0.00013 |  0:01:50s
epoch 144| loss: 0.00041 | val_0_mse: 0.00016 |  0:01:51s
epoch 145| loss: 0.00042 | val_0_mse: 0.00015 |  0:01:51s
epoch 146| loss: 0.00041 | val_0_mse: 0.00013 |  0:01:52s
epoch 147| loss: 0.0004  | val_0_mse: 0.00016 |  0:01:53s
epoch 148| loss: 0.0004  | val_0_mse: 0.00014 |  0:01:54s
epoch 149| loss: 0.00039 | val_0_mse: 0.00015 |  0:01:55s
epoch 150| loss: 0.0004  | val_0_mse: 0.00014 |  0:01:56s
epoch 151| loss: 0.00038 | val_0_mse: 0.00014 |  0:01:57s
epoch 152| loss: 0.00038 | val_0_mse: 0.00014 |  0:01:57s
epoch 153| loss: 0.00038 | val_0_mse: 0.00014 |  0:01:58s
epoch 154| loss: 0.00038 | val_0_mse: 0.00014 |  0:01:59s
epoch 155| loss: 0.00038 | val_0_mse: 0.00015 |  0:02:00s
epoch 156| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:01s
epoch 157| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:01s
epoch 158| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:02s
epoch 159| los

epoch 79 | loss: 0.0006  | val_0_mse: 0.00014 |  0:01:00s
epoch 80 | loss: 0.00059 | val_0_mse: 0.00014 |  0:01:00s
epoch 81 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:01s
epoch 82 | loss: 0.00058 | val_0_mse: 0.00014 |  0:01:02s
epoch 83 | loss: 0.00058 | val_0_mse: 0.00014 |  0:01:03s
epoch 84 | loss: 0.00058 | val_0_mse: 0.00014 |  0:01:03s
epoch 85 | loss: 0.00057 | val_0_mse: 0.00014 |  0:01:04s
epoch 86 | loss: 0.00057 | val_0_mse: 0.00014 |  0:01:05s
epoch 87 | loss: 0.00056 | val_0_mse: 0.00014 |  0:01:06s
epoch 88 | loss: 0.00056 | val_0_mse: 0.00014 |  0:01:07s
epoch 89 | loss: 0.00057 | val_0_mse: 0.00014 |  0:01:08s
epoch 90 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:09s
epoch 91 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:10s
epoch 92 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:11s
epoch 93 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:12s
epoch 94 | loss: 0.00053 | val_0_mse: 0.00013 |  0:01:13s
epoch 95 | loss: 0.00053 | val_0_mse: 0.00013 |  0:01:13s
epoch 96 | los

epoch 116| loss: 0.00041 | val_0_mse: 6e-05   |  0:01:28s
epoch 117| loss: 0.0004  | val_0_mse: 6e-05   |  0:01:29s
epoch 118| loss: 0.0004  | val_0_mse: 6e-05   |  0:01:30s
epoch 119| loss: 0.0004  | val_0_mse: 6e-05   |  0:01:31s
epoch 120| loss: 0.00041 | val_0_mse: 6e-05   |  0:01:32s
epoch 121| loss: 0.00041 | val_0_mse: 7e-05   |  0:01:33s
epoch 122| loss: 0.0004  | val_0_mse: 6e-05   |  0:01:34s
epoch 123| loss: 0.0004  | val_0_mse: 6e-05   |  0:01:35s
epoch 124| loss: 0.0004  | val_0_mse: 6e-05   |  0:01:36s
epoch 125| loss: 0.00039 | val_0_mse: 6e-05   |  0:01:37s
epoch 126| loss: 0.00038 | val_0_mse: 6e-05   |  0:01:38s
epoch 127| loss: 0.00038 | val_0_mse: 6e-05   |  0:01:38s
epoch 128| loss: 0.00038 | val_0_mse: 6e-05   |  0:01:39s
epoch 129| loss: 0.00037 | val_0_mse: 7e-05   |  0:01:40s
epoch 130| loss: 0.00037 | val_0_mse: 6e-05   |  0:01:41s
epoch 131| loss: 0.00036 | val_0_mse: 6e-05   |  0:01:42s
epoch 132| loss: 0.00036 | val_0_mse: 7e-05   |  0:01:43s
epoch 133| los

epoch 53 | loss: 0.00063 | val_0_mse: 0.00013 |  0:00:47s
epoch 54 | loss: 0.00062 | val_0_mse: 0.00013 |  0:00:47s
epoch 55 | loss: 0.00061 | val_0_mse: 0.00013 |  0:00:48s
epoch 56 | loss: 0.0006  | val_0_mse: 0.00013 |  0:00:49s
epoch 57 | loss: 0.00058 | val_0_mse: 0.00013 |  0:00:50s
epoch 58 | loss: 0.00057 | val_0_mse: 0.00013 |  0:00:51s
epoch 59 | loss: 0.00057 | val_0_mse: 0.00013 |  0:00:51s
epoch 60 | loss: 0.00056 | val_0_mse: 0.00013 |  0:00:52s
epoch 61 | loss: 0.00055 | val_0_mse: 0.00014 |  0:00:53s
epoch 62 | loss: 0.00054 | val_0_mse: 0.00013 |  0:00:53s
epoch 63 | loss: 0.00053 | val_0_mse: 0.00013 |  0:00:54s
epoch 64 | loss: 0.00053 | val_0_mse: 0.00012 |  0:00:55s
epoch 65 | loss: 0.00052 | val_0_mse: 0.00013 |  0:00:56s
epoch 66 | loss: 0.00051 | val_0_mse: 0.00014 |  0:00:56s
epoch 67 | loss: 0.0005  | val_0_mse: 0.00013 |  0:00:57s
epoch 68 | loss: 0.0005  | val_0_mse: 0.00013 |  0:00:58s
epoch 69 | loss: 0.00049 | val_0_mse: 0.00013 |  0:00:59s
epoch 70 | los

epoch 90 | loss: 0.00047 | val_0_mse: 0.00011 |  0:01:19s
epoch 91 | loss: 0.00046 | val_0_mse: 0.00012 |  0:01:20s
epoch 92 | loss: 0.00046 | val_0_mse: 0.00011 |  0:01:21s
epoch 93 | loss: 0.00045 | val_0_mse: 0.0001  |  0:01:21s
epoch 94 | loss: 0.00045 | val_0_mse: 0.0001  |  0:01:22s
epoch 95 | loss: 0.00046 | val_0_mse: 0.0001  |  0:01:23s
epoch 96 | loss: 0.00046 | val_0_mse: 0.00011 |  0:01:23s
epoch 97 | loss: 0.00045 | val_0_mse: 0.00011 |  0:01:24s
epoch 98 | loss: 0.00043 | val_0_mse: 0.00013 |  0:01:25s
epoch 99 | loss: 0.00044 | val_0_mse: 0.00012 |  0:01:26s
epoch 100| loss: 0.00043 | val_0_mse: 9e-05   |  0:01:27s
epoch 101| loss: 0.00043 | val_0_mse: 9e-05   |  0:01:28s
epoch 102| loss: 0.00042 | val_0_mse: 9e-05   |  0:01:29s
epoch 103| loss: 0.00041 | val_0_mse: 0.0001  |  0:01:29s
epoch 104| loss: 0.00042 | val_0_mse: 9e-05   |  0:01:30s
epoch 105| loss: 0.00041 | val_0_mse: 9e-05   |  0:01:31s
epoch 106| loss: 0.00041 | val_0_mse: 0.0001  |  0:01:32s
epoch 107| los

epoch 27 | loss: 0.00088 | val_0_mse: 0.00015 |  0:00:22s
epoch 28 | loss: 0.00088 | val_0_mse: 0.00016 |  0:00:22s
epoch 29 | loss: 0.00086 | val_0_mse: 0.00016 |  0:00:23s
epoch 30 | loss: 0.00085 | val_0_mse: 0.00016 |  0:00:24s
epoch 31 | loss: 0.00085 | val_0_mse: 0.00014 |  0:00:24s
epoch 32 | loss: 0.00084 | val_0_mse: 0.00014 |  0:00:25s
epoch 33 | loss: 0.00084 | val_0_mse: 0.00015 |  0:00:26s
epoch 34 | loss: 0.00081 | val_0_mse: 0.00014 |  0:00:27s
epoch 35 | loss: 0.00081 | val_0_mse: 0.00014 |  0:00:27s
epoch 36 | loss: 0.00083 | val_0_mse: 0.00014 |  0:00:28s
epoch 37 | loss: 0.00081 | val_0_mse: 0.00014 |  0:00:29s
epoch 38 | loss: 0.00079 | val_0_mse: 0.00016 |  0:00:30s
epoch 39 | loss: 0.00078 | val_0_mse: 0.00019 |  0:00:31s
epoch 40 | loss: 0.00079 | val_0_mse: 0.00018 |  0:00:31s
epoch 41 | loss: 0.00078 | val_0_mse: 0.00016 |  0:00:32s
epoch 42 | loss: 0.00077 | val_0_mse: 0.00014 |  0:00:33s
epoch 43 | loss: 0.00076 | val_0_mse: 0.00015 |  0:00:34s
epoch 44 | los

epoch 64 | loss: 0.0006  | val_0_mse: 0.00015 |  0:00:51s
epoch 65 | loss: 0.00059 | val_0_mse: 0.00014 |  0:00:52s
epoch 66 | loss: 0.00059 | val_0_mse: 0.00014 |  0:00:53s
epoch 67 | loss: 0.00058 | val_0_mse: 0.00013 |  0:00:53s
epoch 68 | loss: 0.00058 | val_0_mse: 0.00014 |  0:00:55s
epoch 69 | loss: 0.00057 | val_0_mse: 0.00014 |  0:00:55s
epoch 70 | loss: 0.00057 | val_0_mse: 0.00014 |  0:00:56s
epoch 71 | loss: 0.00057 | val_0_mse: 0.00014 |  0:00:57s
epoch 72 | loss: 0.00057 | val_0_mse: 0.00013 |  0:00:58s
epoch 73 | loss: 0.00056 | val_0_mse: 0.00013 |  0:00:59s
epoch 74 | loss: 0.00056 | val_0_mse: 0.00015 |  0:00:59s
epoch 75 | loss: 0.00056 | val_0_mse: 0.00014 |  0:01:00s
epoch 76 | loss: 0.00055 | val_0_mse: 0.00013 |  0:01:01s
epoch 77 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:02s
epoch 78 | loss: 0.00055 | val_0_mse: 0.00013 |  0:01:03s
epoch 79 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:04s
epoch 80 | loss: 0.00054 | val_0_mse: 0.00014 |  0:01:05s
epoch 81 | los

epoch 1  | loss: 0.08624 | val_0_mse: 0.00119 |  0:00:01s
epoch 2  | loss: 0.03468 | val_0_mse: 0.01001 |  0:00:02s
epoch 3  | loss: 0.01527 | val_0_mse: 0.00104 |  0:00:03s
epoch 4  | loss: 0.00778 | val_0_mse: 0.00054 |  0:00:04s
epoch 5  | loss: 0.00491 | val_0_mse: 0.00057 |  0:00:05s
epoch 6  | loss: 0.00322 | val_0_mse: 0.00023 |  0:00:06s
epoch 7  | loss: 0.00228 | val_0_mse: 0.00055 |  0:00:07s
epoch 8  | loss: 0.00209 | val_0_mse: 0.00021 |  0:00:08s
epoch 9  | loss: 0.0017  | val_0_mse: 0.00023 |  0:00:09s
epoch 10 | loss: 0.00162 | val_0_mse: 0.00028 |  0:00:11s
epoch 11 | loss: 0.00152 | val_0_mse: 0.00016 |  0:00:12s
epoch 12 | loss: 0.00143 | val_0_mse: 0.00023 |  0:00:13s
epoch 13 | loss: 0.00134 | val_0_mse: 0.00025 |  0:00:14s
epoch 14 | loss: 0.00136 | val_0_mse: 0.0002  |  0:00:15s
epoch 15 | loss: 0.00126 | val_0_mse: 0.00022 |  0:00:16s
epoch 16 | loss: 0.00121 | val_0_mse: 0.0002  |  0:00:17s
epoch 17 | loss: 0.00117 | val_0_mse: 0.00021 |  0:00:17s
epoch 18 | los

epoch 38 | loss: 0.00085 | val_0_mse: 0.00016 |  0:00:46s
epoch 39 | loss: 0.00084 | val_0_mse: 0.00015 |  0:00:47s
epoch 40 | loss: 0.00083 | val_0_mse: 0.00015 |  0:00:49s
epoch 41 | loss: 0.00082 | val_0_mse: 0.00016 |  0:00:50s
epoch 42 | loss: 0.00087 | val_0_mse: 0.00015 |  0:00:52s
epoch 43 | loss: 0.00083 | val_0_mse: 0.00018 |  0:00:53s
epoch 44 | loss: 0.00083 | val_0_mse: 0.00016 |  0:00:55s
epoch 45 | loss: 0.0008  | val_0_mse: 0.00015 |  0:00:56s
epoch 46 | loss: 0.0008  | val_0_mse: 0.00014 |  0:00:58s
epoch 47 | loss: 0.00078 | val_0_mse: 0.00016 |  0:00:59s
epoch 48 | loss: 0.00076 | val_0_mse: 0.00016 |  0:01:01s
epoch 49 | loss: 0.00076 | val_0_mse: 0.00015 |  0:01:02s
epoch 50 | loss: 0.00076 | val_0_mse: 0.00017 |  0:01:04s
epoch 51 | loss: 0.00075 | val_0_mse: 0.00016 |  0:01:05s
epoch 52 | loss: 0.00075 | val_0_mse: 0.00015 |  0:01:07s
epoch 53 | loss: 0.00074 | val_0_mse: 0.00015 |  0:01:09s
epoch 54 | loss: 0.00073 | val_0_mse: 0.00015 |  0:01:10s
epoch 55 | los

epoch 180| loss: 0.00039 | val_0_mse: 0.00013 |  0:04:16s
epoch 181| loss: 0.0004  | val_0_mse: 0.00013 |  0:04:17s
epoch 182| loss: 0.00039 | val_0_mse: 0.00013 |  0:04:19s
epoch 183| loss: 0.00039 | val_0_mse: 0.00014 |  0:04:20s
epoch 184| loss: 0.0004  | val_0_mse: 0.00013 |  0:04:21s
epoch 185| loss: 0.00039 | val_0_mse: 0.00013 |  0:04:23s
epoch 186| loss: 0.00039 | val_0_mse: 0.00013 |  0:04:24s
epoch 187| loss: 0.0004  | val_0_mse: 0.00013 |  0:04:26s
epoch 188| loss: 0.0004  | val_0_mse: 0.00015 |  0:04:27s
epoch 189| loss: 0.0004  | val_0_mse: 0.00015 |  0:04:28s
epoch 190| loss: 0.00039 | val_0_mse: 0.00013 |  0:04:30s
epoch 191| loss: 0.0004  | val_0_mse: 0.00013 |  0:04:31s
epoch 192| loss: 0.00039 | val_0_mse: 0.00013 |  0:04:32s
epoch 193| loss: 0.00038 | val_0_mse: 0.00013 |  0:04:34s
epoch 194| loss: 0.00037 | val_0_mse: 0.00013 |  0:04:36s
epoch 195| loss: 0.00038 | val_0_mse: 0.00013 |  0:04:37s
epoch 196| loss: 0.00038 | val_0_mse: 0.00012 |  0:04:39s
epoch 197| los

epoch 13 | loss: 0.00197 | val_0_mse: 0.00029 |  0:00:21s
epoch 14 | loss: 0.00216 | val_0_mse: 0.00027 |  0:00:22s
epoch 15 | loss: 0.00176 | val_0_mse: 0.00027 |  0:00:24s
epoch 16 | loss: 0.00165 | val_0_mse: 0.00026 |  0:00:25s
epoch 17 | loss: 0.00149 | val_0_mse: 0.00026 |  0:00:27s
epoch 18 | loss: 0.00152 | val_0_mse: 0.00026 |  0:00:28s
epoch 19 | loss: 0.00136 | val_0_mse: 0.00026 |  0:00:30s
epoch 20 | loss: 0.00133 | val_0_mse: 0.00026 |  0:00:31s
epoch 21 | loss: 0.00129 | val_0_mse: 0.00026 |  0:00:33s
epoch 22 | loss: 0.00132 | val_0_mse: 0.00026 |  0:00:34s
epoch 23 | loss: 0.00123 | val_0_mse: 0.00026 |  0:00:35s
epoch 24 | loss: 0.00125 | val_0_mse: 0.00026 |  0:00:37s
epoch 25 | loss: 0.00116 | val_0_mse: 0.00026 |  0:00:38s
epoch 26 | loss: 0.00115 | val_0_mse: 0.00025 |  0:00:39s
epoch 27 | loss: 0.00113 | val_0_mse: 0.00025 |  0:00:41s
epoch 28 | loss: 0.00116 | val_0_mse: 0.00025 |  0:00:42s
epoch 29 | loss: 0.00109 | val_0_mse: 0.00025 |  0:00:44s
epoch 30 | los

epoch 155| loss: 0.00038 | val_0_mse: 0.00015 |  0:03:56s
epoch 156| loss: 0.00038 | val_0_mse: 0.00014 |  0:03:57s
epoch 157| loss: 0.00037 | val_0_mse: 0.00015 |  0:03:59s
epoch 158| loss: 0.00038 | val_0_mse: 0.00015 |  0:04:00s
epoch 159| loss: 0.00038 | val_0_mse: 0.00015 |  0:04:02s
epoch 160| loss: 0.00038 | val_0_mse: 0.00015 |  0:04:03s
epoch 161| loss: 0.00038 | val_0_mse: 0.00015 |  0:04:05s
epoch 162| loss: 0.00038 | val_0_mse: 0.00014 |  0:04:06s
epoch 163| loss: 0.00037 | val_0_mse: 0.00014 |  0:04:08s
epoch 164| loss: 0.00036 | val_0_mse: 0.00014 |  0:04:09s
epoch 165| loss: 0.00037 | val_0_mse: 0.00014 |  0:04:11s
epoch 166| loss: 0.00037 | val_0_mse: 0.00016 |  0:04:13s
epoch 167| loss: 0.00037 | val_0_mse: 0.00014 |  0:04:14s
epoch 168| loss: 0.00037 | val_0_mse: 0.00015 |  0:04:15s
epoch 169| loss: 0.00036 | val_0_mse: 0.00014 |  0:04:17s
epoch 170| loss: 0.00036 | val_0_mse: 0.00014 |  0:04:18s
epoch 171| loss: 0.00037 | val_0_mse: 0.00015 |  0:04:20s
epoch 172| los

epoch 92 | loss: 0.00034 | val_0_mse: 7e-05   |  0:02:17s
epoch 93 | loss: 0.00035 | val_0_mse: 6e-05   |  0:02:19s
epoch 94 | loss: 0.00035 | val_0_mse: 6e-05   |  0:02:20s
epoch 95 | loss: 0.00034 | val_0_mse: 8e-05   |  0:02:22s
epoch 96 | loss: 0.00034 | val_0_mse: 9e-05   |  0:02:23s
epoch 97 | loss: 0.00033 | val_0_mse: 7e-05   |  0:02:25s
epoch 98 | loss: 0.00033 | val_0_mse: 7e-05   |  0:02:26s
epoch 99 | loss: 0.00031 | val_0_mse: 6e-05   |  0:02:28s
Stop training because you reached max_epochs = 100 with best_epoch = 99 and best_val_0_mse = 6e-05
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sentiment_all_8hours/deberta-high-3.zip
Device used : cuda
epoch 0  | loss: 2.65847 | val_0_mse: 0.06934 |  0:00:01s
epoch 1  | loss: 0.26849 | val_0_mse: 0.0048  |  0:00:02s
epoch 2  | loss: 0.05368 | val_0_mse: 0.00118 |  0:00:04s
epoch 3  | loss: 0.0181  | val_0_mse: 0.00038 |  0:00:05s
epoch 4  | loss: 0.00921 | val_0_mse: 0.00034 |  0:00:07s
e

epoch 130| loss: 0.00053 | val_0_mse: 0.00014 |  0:03:15s
epoch 131| loss: 0.00052 | val_0_mse: 0.00014 |  0:03:16s
epoch 132| loss: 0.00052 | val_0_mse: 0.00014 |  0:03:18s
epoch 133| loss: 0.00052 | val_0_mse: 0.00013 |  0:03:20s
epoch 134| loss: 0.00051 | val_0_mse: 0.00013 |  0:03:21s
epoch 135| loss: 0.00051 | val_0_mse: 0.00013 |  0:03:23s
epoch 136| loss: 0.00051 | val_0_mse: 0.00014 |  0:03:24s
epoch 137| loss: 0.00051 | val_0_mse: 0.00013 |  0:03:26s
epoch 138| loss: 0.00051 | val_0_mse: 0.00015 |  0:03:27s
epoch 139| loss: 0.00051 | val_0_mse: 0.00013 |  0:03:29s
epoch 140| loss: 0.0005  | val_0_mse: 0.00015 |  0:03:30s
epoch 141| loss: 0.0005  | val_0_mse: 0.00014 |  0:03:31s
epoch 142| loss: 0.0005  | val_0_mse: 0.00013 |  0:03:33s
epoch 143| loss: 0.00051 | val_0_mse: 0.00014 |  0:03:34s
epoch 144| loss: 0.0005  | val_0_mse: 0.00013 |  0:03:36s
epoch 145| loss: 0.0005  | val_0_mse: 0.00014 |  0:03:38s
epoch 146| loss: 0.00049 | val_0_mse: 0.00013 |  0:03:39s
epoch 147| los

epoch 67 | loss: 0.00068 | val_0_mse: 0.00014 |  0:01:23s
epoch 68 | loss: 0.00067 | val_0_mse: 0.00013 |  0:01:24s
epoch 69 | loss: 0.00066 | val_0_mse: 0.00013 |  0:01:25s
epoch 70 | loss: 0.00067 | val_0_mse: 0.00013 |  0:01:26s
epoch 71 | loss: 0.00066 | val_0_mse: 0.00013 |  0:01:27s
epoch 72 | loss: 0.00066 | val_0_mse: 0.00014 |  0:01:28s
epoch 73 | loss: 0.00066 | val_0_mse: 0.00013 |  0:01:29s
epoch 74 | loss: 0.00065 | val_0_mse: 0.00013 |  0:01:29s
epoch 75 | loss: 0.00062 | val_0_mse: 0.00013 |  0:01:31s
epoch 76 | loss: 0.00062 | val_0_mse: 0.00013 |  0:01:32s
epoch 77 | loss: 0.00061 | val_0_mse: 0.00013 |  0:01:33s
epoch 78 | loss: 0.00061 | val_0_mse: 0.00013 |  0:01:34s
epoch 79 | loss: 0.00059 | val_0_mse: 0.00013 |  0:01:35s
epoch 80 | loss: 0.00059 | val_0_mse: 0.00013 |  0:01:35s
epoch 81 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:36s
epoch 82 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:38s
epoch 83 | loss: 0.00057 | val_0_mse: 0.00013 |  0:01:39s
epoch 84 | los

epoch 104| loss: 0.00036 | val_0_mse: 0.00011 |  0:01:50s
epoch 105| loss: 0.00036 | val_0_mse: 0.00011 |  0:01:51s
epoch 106| loss: 0.00035 | val_0_mse: 0.00011 |  0:01:52s
epoch 107| loss: 0.00035 | val_0_mse: 0.00011 |  0:01:53s
epoch 108| loss: 0.00035 | val_0_mse: 0.00011 |  0:01:54s
epoch 109| loss: 0.00035 | val_0_mse: 0.00011 |  0:01:55s
epoch 110| loss: 0.00034 | val_0_mse: 0.00011 |  0:01:56s
epoch 111| loss: 0.00035 | val_0_mse: 0.00011 |  0:01:57s
epoch 112| loss: 0.00034 | val_0_mse: 0.00012 |  0:01:58s
epoch 113| loss: 0.00033 | val_0_mse: 0.00011 |  0:01:59s
epoch 114| loss: 0.00033 | val_0_mse: 0.00011 |  0:02:01s
epoch 115| loss: 0.00035 | val_0_mse: 0.00011 |  0:02:02s
epoch 116| loss: 0.00033 | val_0_mse: 0.00011 |  0:02:03s
epoch 117| loss: 0.00032 | val_0_mse: 0.00011 |  0:02:04s
epoch 118| loss: 0.00031 | val_0_mse: 0.00011 |  0:02:05s
epoch 119| loss: 0.00031 | val_0_mse: 0.00011 |  0:02:06s
epoch 120| loss: 0.00031 | val_0_mse: 0.00011 |  0:02:07s
epoch 121| los

epoch 42 | loss: 0.00074 | val_0_mse: 0.00015 |  0:00:44s
epoch 43 | loss: 0.00073 | val_0_mse: 0.00015 |  0:00:44s
epoch 44 | loss: 0.00073 | val_0_mse: 0.00015 |  0:00:46s
epoch 45 | loss: 0.00072 | val_0_mse: 0.00015 |  0:00:47s
epoch 46 | loss: 0.00072 | val_0_mse: 0.00014 |  0:00:48s
epoch 47 | loss: 0.0007  | val_0_mse: 0.00015 |  0:00:49s
epoch 48 | loss: 0.00069 | val_0_mse: 0.00014 |  0:00:50s
epoch 49 | loss: 0.00068 | val_0_mse: 0.00015 |  0:00:52s
epoch 50 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:53s
epoch 51 | loss: 0.00065 | val_0_mse: 0.00014 |  0:00:54s
epoch 52 | loss: 0.00064 | val_0_mse: 0.00014 |  0:00:55s
epoch 53 | loss: 0.00063 | val_0_mse: 0.00015 |  0:00:56s
epoch 54 | loss: 0.0006  | val_0_mse: 0.00014 |  0:00:57s
epoch 55 | loss: 0.00059 | val_0_mse: 0.00015 |  0:00:58s
epoch 56 | loss: 0.00057 | val_0_mse: 0.00015 |  0:00:58s
epoch 57 | loss: 0.00056 | val_0_mse: 0.00015 |  0:00:59s
epoch 58 | loss: 0.00054 | val_0_mse: 0.00014 |  0:01:00s
epoch 59 | los

epoch 79 | loss: 0.00066 | val_0_mse: 0.00014 |  0:01:34s
epoch 80 | loss: 0.00069 | val_0_mse: 0.00014 |  0:01:36s
epoch 81 | loss: 0.00066 | val_0_mse: 0.00013 |  0:01:37s
epoch 82 | loss: 0.00065 | val_0_mse: 0.00014 |  0:01:38s
epoch 83 | loss: 0.00066 | val_0_mse: 0.00014 |  0:01:39s
epoch 84 | loss: 0.00066 | val_0_mse: 0.00014 |  0:01:41s
epoch 85 | loss: 0.00065 | val_0_mse: 0.00014 |  0:01:42s
epoch 86 | loss: 0.00063 | val_0_mse: 0.00016 |  0:01:43s
epoch 87 | loss: 0.00064 | val_0_mse: 0.00016 |  0:01:44s
epoch 88 | loss: 0.00063 | val_0_mse: 0.00015 |  0:01:46s
epoch 89 | loss: 0.00062 | val_0_mse: 0.00014 |  0:01:47s
epoch 90 | loss: 0.00062 | val_0_mse: 0.00014 |  0:01:48s
epoch 91 | loss: 0.0006  | val_0_mse: 0.00014 |  0:01:49s
epoch 92 | loss: 0.00061 | val_0_mse: 0.00014 |  0:01:50s
epoch 93 | loss: 0.00062 | val_0_mse: 0.00013 |  0:01:52s
epoch 94 | loss: 0.0006  | val_0_mse: 0.00013 |  0:01:53s
epoch 95 | loss: 0.00061 | val_0_mse: 0.00014 |  0:01:54s
epoch 96 | los

epoch 16 | loss: 0.00097 | val_0_mse: 0.00021 |  0:00:19s
epoch 17 | loss: 0.00093 | val_0_mse: 0.00015 |  0:00:21s
epoch 18 | loss: 0.00091 | val_0_mse: 0.00015 |  0:00:22s
epoch 19 | loss: 0.00089 | val_0_mse: 0.00015 |  0:00:23s
epoch 20 | loss: 0.00086 | val_0_mse: 0.00015 |  0:00:24s
epoch 21 | loss: 0.00085 | val_0_mse: 0.00014 |  0:00:25s
epoch 22 | loss: 0.00084 | val_0_mse: 0.00015 |  0:00:27s
epoch 23 | loss: 0.00083 | val_0_mse: 0.00016 |  0:00:28s
epoch 24 | loss: 0.00081 | val_0_mse: 0.00014 |  0:00:29s
epoch 25 | loss: 0.00081 | val_0_mse: 0.00015 |  0:00:30s
epoch 26 | loss: 0.0008  | val_0_mse: 0.00014 |  0:00:31s
epoch 27 | loss: 0.00079 | val_0_mse: 0.00015 |  0:00:32s
epoch 28 | loss: 0.00078 | val_0_mse: 0.00014 |  0:00:33s
epoch 29 | loss: 0.00077 | val_0_mse: 0.00015 |  0:00:34s
epoch 30 | loss: 0.00076 | val_0_mse: 0.00014 |  0:00:36s
epoch 31 | loss: 0.00075 | val_0_mse: 0.00014 |  0:00:37s
epoch 32 | loss: 0.00073 | val_0_mse: 0.00014 |  0:00:38s
epoch 33 | los

epoch 53 | loss: 0.00073 | val_0_mse: 0.00013 |  0:01:07s
epoch 54 | loss: 0.00073 | val_0_mse: 0.00014 |  0:01:08s
epoch 55 | loss: 0.00072 | val_0_mse: 0.00014 |  0:01:09s
epoch 56 | loss: 0.00071 | val_0_mse: 0.00014 |  0:01:10s
epoch 57 | loss: 0.00071 | val_0_mse: 0.00014 |  0:01:11s
epoch 58 | loss: 0.0007  | val_0_mse: 0.00013 |  0:01:13s
epoch 59 | loss: 0.0007  | val_0_mse: 0.00013 |  0:01:14s
epoch 60 | loss: 0.00072 | val_0_mse: 0.00014 |  0:01:15s
epoch 61 | loss: 0.00069 | val_0_mse: 0.00014 |  0:01:16s
epoch 62 | loss: 0.00068 | val_0_mse: 0.00013 |  0:01:17s
epoch 63 | loss: 0.00068 | val_0_mse: 0.00014 |  0:01:18s
epoch 64 | loss: 0.00067 | val_0_mse: 0.00014 |  0:01:20s
epoch 65 | loss: 0.00065 | val_0_mse: 0.00014 |  0:01:21s
epoch 66 | loss: 0.00066 | val_0_mse: 0.00014 |  0:01:22s
epoch 67 | loss: 0.00064 | val_0_mse: 0.00014 |  0:01:23s
epoch 68 | loss: 0.00063 | val_0_mse: 0.00014 |  0:01:24s
epoch 69 | loss: 0.00063 | val_0_mse: 0.00014 |  0:01:25s
epoch 70 | los

epoch 195| loss: 0.00025 | val_0_mse: 0.00014 |  0:04:00s
epoch 196| loss: 0.00027 | val_0_mse: 0.00013 |  0:04:02s
epoch 197| loss: 0.00027 | val_0_mse: 0.00013 |  0:04:03s
epoch 198| loss: 0.00026 | val_0_mse: 0.00013 |  0:04:04s
epoch 199| loss: 0.00025 | val_0_mse: 0.00013 |  0:04:05s
Stop training because you reached max_epochs = 200 with best_epoch = 173 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sentiment_all_8hours/deberta-low-6.zip
Device used : cuda
epoch 0  | loss: 0.63306 | val_0_mse: 0.01268 |  0:00:01s
epoch 1  | loss: 0.04293 | val_0_mse: 0.00101 |  0:00:02s
epoch 2  | loss: 0.01231 | val_0_mse: 0.00022 |  0:00:03s
epoch 3  | loss: 0.00499 | val_0_mse: 0.00017 |  0:00:04s
epoch 4  | loss: 0.00278 | val_0_mse: 0.0002  |  0:00:05s
epoch 5  | loss: 0.00229 | val_0_mse: 0.00022 |  0:00:06s
epoch 6  | loss: 0.00179 | val_0_mse: 0.00019 |  0:00:07s
epoch 7  | loss: 0.00146 | val_0_mse: 0.00019 |  0:00:09s

epoch 28 | loss: 0.001   | val_0_mse: 0.0002  |  0:00:29s
epoch 29 | loss: 0.00097 | val_0_mse: 0.0002  |  0:00:30s
epoch 30 | loss: 0.00098 | val_0_mse: 0.00019 |  0:00:31s
epoch 31 | loss: 0.00094 | val_0_mse: 0.00019 |  0:00:32s
epoch 32 | loss: 0.00095 | val_0_mse: 0.00019 |  0:00:33s
epoch 33 | loss: 0.00093 | val_0_mse: 0.00019 |  0:00:34s
epoch 34 | loss: 0.00092 | val_0_mse: 0.00019 |  0:00:35s
epoch 35 | loss: 0.0009  | val_0_mse: 0.00019 |  0:00:36s
epoch 36 | loss: 0.00088 | val_0_mse: 0.00018 |  0:00:37s
epoch 37 | loss: 0.0009  | val_0_mse: 0.00017 |  0:00:38s
epoch 38 | loss: 0.00087 | val_0_mse: 0.00018 |  0:00:40s
epoch 39 | loss: 0.00086 | val_0_mse: 0.00017 |  0:00:41s
epoch 40 | loss: 0.00085 | val_0_mse: 0.00018 |  0:00:42s
epoch 41 | loss: 0.00084 | val_0_mse: 0.00016 |  0:00:43s
epoch 42 | loss: 0.00083 | val_0_mse: 0.00018 |  0:00:44s
epoch 43 | loss: 0.00082 | val_0_mse: 0.00017 |  0:00:45s
epoch 44 | loss: 0.00082 | val_0_mse: 0.00017 |  0:00:46s
epoch 45 | los

epoch 170| loss: 0.00032 | val_0_mse: 8e-05   |  0:03:00s
epoch 171| loss: 0.00031 | val_0_mse: 9e-05   |  0:03:02s
epoch 172| loss: 0.00031 | val_0_mse: 8e-05   |  0:03:03s
epoch 173| loss: 0.00031 | val_0_mse: 8e-05   |  0:03:04s
epoch 174| loss: 0.0003  | val_0_mse: 8e-05   |  0:03:05s
epoch 175| loss: 0.0003  | val_0_mse: 8e-05   |  0:03:06s
epoch 176| loss: 0.0003  | val_0_mse: 7e-05   |  0:03:08s
epoch 177| loss: 0.0003  | val_0_mse: 8e-05   |  0:03:09s
epoch 178| loss: 0.0003  | val_0_mse: 7e-05   |  0:03:10s
epoch 179| loss: 0.00029 | val_0_mse: 8e-05   |  0:03:11s
epoch 180| loss: 0.00029 | val_0_mse: 7e-05   |  0:03:12s
epoch 181| loss: 0.0003  | val_0_mse: 8e-05   |  0:03:13s
epoch 182| loss: 0.00029 | val_0_mse: 7e-05   |  0:03:14s
epoch 183| loss: 0.00029 | val_0_mse: 7e-05   |  0:03:15s
epoch 184| loss: 0.00029 | val_0_mse: 7e-05   |  0:03:17s
epoch 185| loss: 0.00028 | val_0_mse: 8e-05   |  0:03:18s
epoch 186| loss: 0.00028 | val_0_mse: 7e-05   |  0:03:19s
epoch 187| los

epoch 3  | loss: 0.02043 | val_0_mse: 0.00158 |  0:00:04s
epoch 4  | loss: 0.00773 | val_0_mse: 0.00073 |  0:00:05s
epoch 5  | loss: 0.00456 | val_0_mse: 0.00058 |  0:00:06s
epoch 6  | loss: 0.00313 | val_0_mse: 0.00045 |  0:00:08s
epoch 7  | loss: 0.0026  | val_0_mse: 0.0004  |  0:00:09s
epoch 8  | loss: 0.00223 | val_0_mse: 0.00034 |  0:00:10s
epoch 9  | loss: 0.002   | val_0_mse: 0.0003  |  0:00:11s
epoch 10 | loss: 0.00185 | val_0_mse: 0.00028 |  0:00:12s
epoch 11 | loss: 0.00162 | val_0_mse: 0.00027 |  0:00:13s
epoch 12 | loss: 0.00152 | val_0_mse: 0.00023 |  0:00:14s
epoch 13 | loss: 0.00162 | val_0_mse: 0.00021 |  0:00:15s
epoch 14 | loss: 0.00147 | val_0_mse: 0.0002  |  0:00:17s
epoch 15 | loss: 0.00141 | val_0_mse: 0.0002  |  0:00:18s
epoch 16 | loss: 0.00132 | val_0_mse: 0.00018 |  0:00:19s
epoch 17 | loss: 0.00123 | val_0_mse: 0.00017 |  0:00:20s
epoch 18 | loss: 0.0012  | val_0_mse: 0.00017 |  0:00:21s
epoch 19 | loss: 0.0012  | val_0_mse: 0.00017 |  0:00:22s
epoch 20 | los

epoch 145| loss: 0.00033 | val_0_mse: 0.00013 |  0:02:59s
epoch 146| loss: 0.00034 | val_0_mse: 0.00013 |  0:03:00s
epoch 147| loss: 0.00033 | val_0_mse: 0.00014 |  0:03:01s
epoch 148| loss: 0.00034 | val_0_mse: 0.00013 |  0:03:02s
epoch 149| loss: 0.00033 | val_0_mse: 0.00013 |  0:03:03s
epoch 150| loss: 0.00033 | val_0_mse: 0.00013 |  0:03:05s
epoch 151| loss: 0.00033 | val_0_mse: 0.00013 |  0:03:06s
epoch 152| loss: 0.00033 | val_0_mse: 0.00013 |  0:03:07s
epoch 153| loss: 0.00033 | val_0_mse: 0.00013 |  0:03:08s
epoch 154| loss: 0.00033 | val_0_mse: 0.00013 |  0:03:09s
epoch 155| loss: 0.00033 | val_0_mse: 0.00013 |  0:03:11s
epoch 156| loss: 0.00033 | val_0_mse: 0.00013 |  0:03:12s
epoch 157| loss: 0.00033 | val_0_mse: 0.00014 |  0:03:13s
epoch 158| loss: 0.00033 | val_0_mse: 0.00015 |  0:03:14s
epoch 159| loss: 0.00033 | val_0_mse: 0.00013 |  0:03:15s
epoch 160| loss: 0.00032 | val_0_mse: 0.00014 |  0:03:16s
epoch 161| loss: 0.00032 | val_0_mse: 0.00011 |  0:03:18s
epoch 162| los

epoch 82 | loss: 0.00038 | val_0_mse: 0.00012 |  0:01:42s
epoch 83 | loss: 0.00038 | val_0_mse: 0.00012 |  0:01:43s
epoch 84 | loss: 0.00037 | val_0_mse: 0.00012 |  0:01:44s
epoch 85 | loss: 0.00037 | val_0_mse: 0.00012 |  0:01:45s
epoch 86 | loss: 0.00037 | val_0_mse: 0.00012 |  0:01:46s
epoch 87 | loss: 0.00036 | val_0_mse: 0.00012 |  0:01:47s
epoch 88 | loss: 0.00036 | val_0_mse: 0.00012 |  0:01:48s
epoch 89 | loss: 0.00036 | val_0_mse: 0.00012 |  0:01:50s
epoch 90 | loss: 0.00035 | val_0_mse: 0.00012 |  0:01:51s
epoch 91 | loss: 0.00036 | val_0_mse: 0.00012 |  0:01:52s
epoch 92 | loss: 0.00036 | val_0_mse: 0.00012 |  0:01:53s
epoch 93 | loss: 0.00035 | val_0_mse: 0.00012 |  0:01:55s
epoch 94 | loss: 0.00035 | val_0_mse: 0.00013 |  0:01:56s
epoch 95 | loss: 0.00035 | val_0_mse: 0.00012 |  0:01:57s
epoch 96 | loss: 0.00034 | val_0_mse: 0.00012 |  0:01:59s
epoch 97 | loss: 0.00034 | val_0_mse: 0.00012 |  0:02:00s
epoch 98 | loss: 0.00034 | val_0_mse: 0.00012 |  0:02:01s
epoch 99 | los

epoch 119| loss: 0.00044 | val_0_mse: 0.00013 |  0:02:26s
epoch 120| loss: 0.00044 | val_0_mse: 0.00014 |  0:02:28s
epoch 121| loss: 0.00043 | val_0_mse: 0.00014 |  0:02:29s
epoch 122| loss: 0.00043 | val_0_mse: 0.00014 |  0:02:31s
epoch 123| loss: 0.00043 | val_0_mse: 0.00013 |  0:02:32s
epoch 124| loss: 0.00043 | val_0_mse: 0.00015 |  0:02:34s
epoch 125| loss: 0.00042 | val_0_mse: 0.00013 |  0:02:35s
epoch 126| loss: 0.00042 | val_0_mse: 0.00014 |  0:02:36s
epoch 127| loss: 0.00042 | val_0_mse: 0.00013 |  0:02:37s
epoch 128| loss: 0.00042 | val_0_mse: 0.00014 |  0:02:38s
epoch 129| loss: 0.00042 | val_0_mse: 0.00013 |  0:02:39s
epoch 130| loss: 0.00041 | val_0_mse: 0.00015 |  0:02:40s
epoch 131| loss: 0.00041 | val_0_mse: 0.00014 |  0:02:42s
epoch 132| loss: 0.00041 | val_0_mse: 0.00015 |  0:02:43s
epoch 133| loss: 0.00041 | val_0_mse: 0.00013 |  0:02:44s
epoch 134| loss: 0.00042 | val_0_mse: 0.00013 |  0:02:46s
epoch 135| loss: 0.00041 | val_0_mse: 0.00015 |  0:02:47s
epoch 136| los

epoch 56 | loss: 0.00061 | val_0_mse: 0.00012 |  0:01:08s
epoch 57 | loss: 0.00061 | val_0_mse: 0.00014 |  0:01:09s
epoch 58 | loss: 0.0006  | val_0_mse: 0.00013 |  0:01:11s
epoch 59 | loss: 0.00059 | val_0_mse: 0.00012 |  0:01:12s
epoch 60 | loss: 0.00059 | val_0_mse: 0.00012 |  0:01:13s
epoch 61 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:14s
epoch 62 | loss: 0.00058 | val_0_mse: 0.00012 |  0:01:16s
epoch 63 | loss: 0.00058 | val_0_mse: 0.00012 |  0:01:17s
epoch 64 | loss: 0.00057 | val_0_mse: 0.00012 |  0:01:18s
epoch 65 | loss: 0.00056 | val_0_mse: 0.00014 |  0:01:19s
epoch 66 | loss: 0.00056 | val_0_mse: 0.00012 |  0:01:21s
epoch 67 | loss: 0.00057 | val_0_mse: 0.00014 |  0:01:22s
epoch 68 | loss: 0.00056 | val_0_mse: 0.00012 |  0:01:23s
epoch 69 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:25s
epoch 70 | loss: 0.00056 | val_0_mse: 0.00012 |  0:01:26s
epoch 71 | loss: 0.00055 | val_0_mse: 0.00012 |  0:01:27s
epoch 72 | loss: 0.00054 | val_0_mse: 0.00014 |  0:01:28s
epoch 73 | los

epoch 93 | loss: 0.00042 | val_0_mse: 0.00015 |  0:01:36s
epoch 94 | loss: 0.00041 | val_0_mse: 0.00015 |  0:01:37s
epoch 95 | loss: 0.00041 | val_0_mse: 0.00015 |  0:01:38s
epoch 96 | loss: 0.00041 | val_0_mse: 0.00013 |  0:01:39s
epoch 97 | loss: 0.0004  | val_0_mse: 0.00015 |  0:01:40s
epoch 98 | loss: 0.0004  | val_0_mse: 0.00013 |  0:01:41s
epoch 99 | loss: 0.0004  | val_0_mse: 0.00014 |  0:01:42s
epoch 100| loss: 0.0004  | val_0_mse: 0.00014 |  0:01:43s
epoch 101| loss: 0.00039 | val_0_mse: 0.00014 |  0:01:44s
epoch 102| loss: 0.00038 | val_0_mse: 0.00015 |  0:01:45s
epoch 103| loss: 0.00039 | val_0_mse: 0.00014 |  0:01:46s
epoch 104| loss: 0.00039 | val_0_mse: 0.00015 |  0:01:47s
epoch 105| loss: 0.0004  | val_0_mse: 0.00016 |  0:01:48s
epoch 106| loss: 0.00039 | val_0_mse: 0.00017 |  0:01:50s
epoch 107| loss: 0.00038 | val_0_mse: 0.00015 |  0:01:51s
epoch 108| loss: 0.00038 | val_0_mse: 0.00014 |  0:01:52s
epoch 109| loss: 0.00038 | val_0_mse: 0.00014 |  0:01:53s
epoch 110| los

epoch 30 | loss: 0.00103 | val_0_mse: 0.00015 |  0:00:32s
epoch 31 | loss: 0.00102 | val_0_mse: 0.00014 |  0:00:33s
epoch 32 | loss: 0.00098 | val_0_mse: 0.00015 |  0:00:33s
epoch 33 | loss: 0.00096 | val_0_mse: 0.00014 |  0:00:34s
epoch 34 | loss: 0.00096 | val_0_mse: 0.00014 |  0:00:36s
epoch 35 | loss: 0.00095 | val_0_mse: 0.00014 |  0:00:37s
epoch 36 | loss: 0.00092 | val_0_mse: 0.00014 |  0:00:38s
epoch 37 | loss: 0.00092 | val_0_mse: 0.00014 |  0:00:39s
epoch 38 | loss: 0.00092 | val_0_mse: 0.00014 |  0:00:40s
epoch 39 | loss: 0.00089 | val_0_mse: 0.00014 |  0:00:41s
epoch 40 | loss: 0.00089 | val_0_mse: 0.00015 |  0:00:42s
epoch 41 | loss: 0.00088 | val_0_mse: 0.00014 |  0:00:43s
epoch 42 | loss: 0.00086 | val_0_mse: 0.00014 |  0:00:44s
epoch 43 | loss: 0.00085 | val_0_mse: 0.00014 |  0:00:45s
epoch 44 | loss: 0.00085 | val_0_mse: 0.00014 |  0:00:46s
epoch 45 | loss: 0.00084 | val_0_mse: 0.00014 |  0:00:47s
epoch 46 | loss: 0.00083 | val_0_mse: 0.00014 |  0:00:48s
epoch 47 | los

epoch 67 | loss: 0.00057 | val_0_mse: 0.00014 |  0:01:12s
epoch 68 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:13s
epoch 69 | loss: 0.00054 | val_0_mse: 0.00014 |  0:01:14s
epoch 70 | loss: 0.00053 | val_0_mse: 0.00014 |  0:01:16s
epoch 71 | loss: 0.00052 | val_0_mse: 0.00014 |  0:01:17s
epoch 72 | loss: 0.00051 | val_0_mse: 0.00014 |  0:01:18s
epoch 73 | loss: 0.00052 | val_0_mse: 0.00014 |  0:01:19s
epoch 74 | loss: 0.00051 | val_0_mse: 0.00015 |  0:01:19s
epoch 75 | loss: 0.0005  | val_0_mse: 0.00013 |  0:01:20s
epoch 76 | loss: 0.0005  | val_0_mse: 0.00014 |  0:01:21s
epoch 77 | loss: 0.00051 | val_0_mse: 0.00013 |  0:01:23s
epoch 78 | loss: 0.0005  | val_0_mse: 0.00014 |  0:01:24s
epoch 79 | loss: 0.00048 | val_0_mse: 0.00013 |  0:01:25s
epoch 80 | loss: 0.00049 | val_0_mse: 0.00014 |  0:01:26s
epoch 81 | loss: 0.00048 | val_0_mse: 0.00013 |  0:01:27s
epoch 82 | loss: 0.00046 | val_0_mse: 0.00014 |  0:01:28s
epoch 83 | loss: 0.00046 | val_0_mse: 0.00013 |  0:01:29s
epoch 84 | los

epoch 4  | loss: 0.01066 | val_0_mse: 0.00029 |  0:00:04s
epoch 5  | loss: 0.00464 | val_0_mse: 0.00022 |  0:00:05s
epoch 6  | loss: 0.00285 | val_0_mse: 0.0002  |  0:00:06s
epoch 7  | loss: 0.00228 | val_0_mse: 0.00021 |  0:00:07s
epoch 8  | loss: 0.00178 | val_0_mse: 0.00022 |  0:00:08s
epoch 9  | loss: 0.00148 | val_0_mse: 0.00023 |  0:00:10s
epoch 10 | loss: 0.00131 | val_0_mse: 0.00023 |  0:00:11s
epoch 11 | loss: 0.00145 | val_0_mse: 0.00023 |  0:00:12s
epoch 12 | loss: 0.0011  | val_0_mse: 0.00023 |  0:00:13s
epoch 13 | loss: 0.00106 | val_0_mse: 0.00024 |  0:00:14s
epoch 14 | loss: 0.00101 | val_0_mse: 0.00023 |  0:00:15s
epoch 15 | loss: 0.00098 | val_0_mse: 0.00023 |  0:00:16s
epoch 16 | loss: 0.001   | val_0_mse: 0.00023 |  0:00:17s
epoch 17 | loss: 0.001   | val_0_mse: 0.00023 |  0:00:18s
epoch 18 | loss: 0.00106 | val_0_mse: 0.00023 |  0:00:19s
epoch 19 | loss: 0.0013  | val_0_mse: 0.00023 |  0:00:20s
epoch 20 | loss: 0.00089 | val_0_mse: 0.00023 |  0:00:21s
epoch 21 | los

epoch 41 | loss: 0.00088 | val_0_mse: 0.00018 |  0:00:42s
epoch 42 | loss: 0.00087 | val_0_mse: 0.00018 |  0:00:43s
epoch 43 | loss: 0.00085 | val_0_mse: 0.00017 |  0:00:44s
epoch 44 | loss: 0.00084 | val_0_mse: 0.00018 |  0:00:45s
epoch 45 | loss: 0.00083 | val_0_mse: 0.00018 |  0:00:46s
epoch 46 | loss: 0.00083 | val_0_mse: 0.00017 |  0:00:47s
epoch 47 | loss: 0.00082 | val_0_mse: 0.00017 |  0:00:47s
epoch 48 | loss: 0.00081 | val_0_mse: 0.00019 |  0:00:48s
epoch 49 | loss: 0.00081 | val_0_mse: 0.00016 |  0:00:49s
epoch 50 | loss: 0.00083 | val_0_mse: 0.00016 |  0:00:50s
epoch 51 | loss: 0.00079 | val_0_mse: 0.00017 |  0:00:51s
epoch 52 | loss: 0.00078 | val_0_mse: 0.00017 |  0:00:52s
epoch 53 | loss: 0.00078 | val_0_mse: 0.00017 |  0:00:52s
epoch 54 | loss: 0.00078 | val_0_mse: 0.00016 |  0:00:53s
epoch 55 | loss: 0.00077 | val_0_mse: 0.00017 |  0:00:54s
epoch 56 | loss: 0.00076 | val_0_mse: 0.00018 |  0:00:55s
epoch 57 | loss: 0.00076 | val_0_mse: 0.00016 |  0:00:56s
epoch 58 | los

epoch 183| loss: 0.00041 | val_0_mse: 0.00015 |  0:02:36s
epoch 184| loss: 0.00041 | val_0_mse: 0.00014 |  0:02:37s
epoch 185| loss: 0.00041 | val_0_mse: 0.00014 |  0:02:38s
epoch 186| loss: 0.0004  | val_0_mse: 0.00014 |  0:02:39s
epoch 187| loss: 0.0004  | val_0_mse: 0.00014 |  0:02:40s
epoch 188| loss: 0.0004  | val_0_mse: 0.00014 |  0:02:41s
epoch 189| loss: 0.00039 | val_0_mse: 0.00014 |  0:02:41s
epoch 190| loss: 0.0004  | val_0_mse: 0.00014 |  0:02:42s
epoch 191| loss: 0.00039 | val_0_mse: 0.00014 |  0:02:43s
epoch 192| loss: 0.00039 | val_0_mse: 0.00015 |  0:02:44s
epoch 193| loss: 0.0004  | val_0_mse: 0.00014 |  0:02:44s
epoch 194| loss: 0.0004  | val_0_mse: 0.00014 |  0:02:45s
epoch 195| loss: 0.00039 | val_0_mse: 0.00014 |  0:02:46s
epoch 196| loss: 0.00039 | val_0_mse: 0.00014 |  0:02:47s
epoch 197| loss: 0.00039 | val_0_mse: 0.00014 |  0:02:47s
epoch 198| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:48s
epoch 199| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:49s
Stop training 

epoch 16 | loss: 0.00097 | val_0_mse: 0.00019 |  0:00:13s
epoch 17 | loss: 0.00092 | val_0_mse: 0.00019 |  0:00:14s
epoch 18 | loss: 0.00088 | val_0_mse: 0.00018 |  0:00:15s
epoch 19 | loss: 0.00087 | val_0_mse: 0.00018 |  0:00:16s
epoch 20 | loss: 0.00085 | val_0_mse: 0.00018 |  0:00:16s
epoch 21 | loss: 0.00087 | val_0_mse: 0.00019 |  0:00:17s
epoch 22 | loss: 0.00087 | val_0_mse: 0.00018 |  0:00:18s
epoch 23 | loss: 0.00085 | val_0_mse: 0.00018 |  0:00:19s
epoch 24 | loss: 0.00088 | val_0_mse: 0.00019 |  0:00:19s
epoch 25 | loss: 0.00083 | val_0_mse: 0.00017 |  0:00:20s
epoch 26 | loss: 0.00087 | val_0_mse: 0.0002  |  0:00:21s
epoch 27 | loss: 0.00088 | val_0_mse: 0.00018 |  0:00:22s
epoch 28 | loss: 0.00086 | val_0_mse: 0.0002  |  0:00:23s
epoch 29 | loss: 0.0009  | val_0_mse: 0.00018 |  0:00:24s
epoch 30 | loss: 0.0008  | val_0_mse: 0.00017 |  0:00:25s
epoch 31 | loss: 0.00079 | val_0_mse: 0.00018 |  0:00:25s
epoch 32 | loss: 0.00079 | val_0_mse: 0.00017 |  0:00:26s
epoch 33 | los

epoch 158| loss: 0.00047 | val_0_mse: 0.00013 |  0:02:10s
epoch 159| loss: 0.00046 | val_0_mse: 0.00012 |  0:02:11s
epoch 160| loss: 0.00045 | val_0_mse: 0.00012 |  0:02:12s
epoch 161| loss: 0.00046 | val_0_mse: 0.00012 |  0:02:13s
epoch 162| loss: 0.00045 | val_0_mse: 0.00012 |  0:02:13s
epoch 163| loss: 0.00045 | val_0_mse: 0.00012 |  0:02:14s
epoch 164| loss: 0.00044 | val_0_mse: 0.00012 |  0:02:15s
epoch 165| loss: 0.00045 | val_0_mse: 0.00012 |  0:02:16s
epoch 166| loss: 0.00045 | val_0_mse: 0.00014 |  0:02:16s
epoch 167| loss: 0.00045 | val_0_mse: 0.00012 |  0:02:17s
epoch 168| loss: 0.00045 | val_0_mse: 0.00012 |  0:02:18s
epoch 169| loss: 0.00044 | val_0_mse: 0.00012 |  0:02:19s
epoch 170| loss: 0.00044 | val_0_mse: 0.00012 |  0:02:20s
epoch 171| loss: 0.00043 | val_0_mse: 0.00012 |  0:02:20s
epoch 172| loss: 0.00043 | val_0_mse: 0.00012 |  0:02:21s
epoch 173| loss: 0.00043 | val_0_mse: 0.00012 |  0:02:22s
epoch 174| loss: 0.00043 | val_0_mse: 0.00012 |  0:02:23s
epoch 175| los

epoch 95 | loss: 0.00051 | val_0_mse: 0.00013 |  0:01:21s
epoch 96 | loss: 0.0005  | val_0_mse: 0.00013 |  0:01:22s
epoch 97 | loss: 0.00049 | val_0_mse: 0.00013 |  0:01:23s
epoch 98 | loss: 0.00049 | val_0_mse: 0.00013 |  0:01:24s
epoch 99 | loss: 0.00049 | val_0_mse: 0.00013 |  0:01:24s
Stop training because you reached max_epochs = 100 with best_epoch = 74 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sentiment_all_8hours/roberta-high-4.zip
Device used : cuda
epoch 0  | loss: 2.43348 | val_0_mse: 0.51292 |  0:00:00s
epoch 1  | loss: 0.25938 | val_0_mse: 0.12547 |  0:00:01s
epoch 2  | loss: 0.08468 | val_0_mse: 0.01062 |  0:00:02s
epoch 3  | loss: 0.0328  | val_0_mse: 0.00344 |  0:00:03s
epoch 4  | loss: 0.01217 | val_0_mse: 0.00135 |  0:00:04s
epoch 5  | loss: 0.00579 | val_0_mse: 0.00069 |  0:00:05s
epoch 6  | loss: 0.00304 | val_0_mse: 0.00045 |  0:00:05s
epoch 7  | loss: 0.0022  | val_0_mse: 0.00033 |  0:00:06s

epoch 132| loss: 0.00036 | val_0_mse: 5e-05   |  0:01:58s
epoch 133| loss: 0.00036 | val_0_mse: 5e-05   |  0:01:59s
epoch 134| loss: 0.00036 | val_0_mse: 5e-05   |  0:02:00s
epoch 135| loss: 0.00036 | val_0_mse: 5e-05   |  0:02:01s
epoch 136| loss: 0.00036 | val_0_mse: 5e-05   |  0:02:02s
epoch 137| loss: 0.00037 | val_0_mse: 7e-05   |  0:02:03s
epoch 138| loss: 0.00036 | val_0_mse: 5e-05   |  0:02:04s
epoch 139| loss: 0.00036 | val_0_mse: 7e-05   |  0:02:05s
epoch 140| loss: 0.00038 | val_0_mse: 5e-05   |  0:02:06s
epoch 141| loss: 0.00035 | val_0_mse: 6e-05   |  0:02:07s
epoch 142| loss: 0.00035 | val_0_mse: 5e-05   |  0:02:08s
epoch 143| loss: 0.00035 | val_0_mse: 6e-05   |  0:02:09s
epoch 144| loss: 0.00035 | val_0_mse: 5e-05   |  0:02:10s
epoch 145| loss: 0.00035 | val_0_mse: 6e-05   |  0:02:11s
epoch 146| loss: 0.00034 | val_0_mse: 5e-05   |  0:02:11s
epoch 147| loss: 0.00034 | val_0_mse: 6e-05   |  0:02:12s
epoch 148| loss: 0.00033 | val_0_mse: 6e-05   |  0:02:13s
epoch 149| los

epoch 70 | loss: 0.00048 | val_0_mse: 0.00014 |  0:01:12s
epoch 71 | loss: 0.00047 | val_0_mse: 0.00014 |  0:01:13s
epoch 72 | loss: 0.00047 | val_0_mse: 0.00014 |  0:01:14s
epoch 73 | loss: 0.00047 | val_0_mse: 0.00015 |  0:01:16s
epoch 74 | loss: 0.00047 | val_0_mse: 0.00014 |  0:01:17s
epoch 75 | loss: 0.00045 | val_0_mse: 0.00015 |  0:01:18s
epoch 76 | loss: 0.00045 | val_0_mse: 0.00015 |  0:01:20s
epoch 77 | loss: 0.00045 | val_0_mse: 0.00014 |  0:01:21s
epoch 78 | loss: 0.00044 | val_0_mse: 0.00014 |  0:01:23s
epoch 79 | loss: 0.00044 | val_0_mse: 0.00014 |  0:01:24s
epoch 80 | loss: 0.00043 | val_0_mse: 0.00014 |  0:01:26s
epoch 81 | loss: 0.00043 | val_0_mse: 0.00014 |  0:01:27s
epoch 82 | loss: 0.00042 | val_0_mse: 0.00014 |  0:01:28s
epoch 83 | loss: 0.00041 | val_0_mse: 0.00014 |  0:01:30s
epoch 84 | loss: 0.00041 | val_0_mse: 0.00014 |  0:01:31s
epoch 85 | loss: 0.00041 | val_0_mse: 0.00014 |  0:01:32s
epoch 86 | loss: 0.0004  | val_0_mse: 0.00014 |  0:01:34s
epoch 87 | los

epoch 107| loss: 0.00053 | val_0_mse: 0.00013 |  0:02:13s
epoch 108| loss: 0.00052 | val_0_mse: 0.00013 |  0:02:14s
epoch 109| loss: 0.00052 | val_0_mse: 0.00014 |  0:02:15s
epoch 110| loss: 0.00052 | val_0_mse: 0.00014 |  0:02:16s
epoch 111| loss: 0.00052 | val_0_mse: 0.00013 |  0:02:18s
epoch 112| loss: 0.00051 | val_0_mse: 0.00014 |  0:02:19s
epoch 113| loss: 0.00051 | val_0_mse: 0.00014 |  0:02:20s
epoch 114| loss: 0.00051 | val_0_mse: 0.00014 |  0:02:21s
epoch 115| loss: 0.0005  | val_0_mse: 0.00014 |  0:02:22s
epoch 116| loss: 0.0005  | val_0_mse: 0.00014 |  0:02:23s
epoch 117| loss: 0.0005  | val_0_mse: 0.00013 |  0:02:25s
epoch 118| loss: 0.0005  | val_0_mse: 0.00014 |  0:02:26s
epoch 119| loss: 0.00051 | val_0_mse: 0.00013 |  0:02:27s
epoch 120| loss: 0.0005  | val_0_mse: 0.00013 |  0:02:28s
epoch 121| loss: 0.00049 | val_0_mse: 0.00013 |  0:02:30s
epoch 122| loss: 0.00048 | val_0_mse: 0.00015 |  0:02:31s
epoch 123| loss: 0.00049 | val_0_mse: 0.00013 |  0:02:32s
epoch 124| los

In [None]:
if not os.path.exists(checkpoint_save_dir):
    os.makedirs(checkpoint_save_dir, exist_ok=True)
with open(saveas, 'w') as f:
    for key in results.keys():
        f.write("%s, %s\n" % (key, results[key]))