In [1]:
import os
opj = os.path.join

import ccxt 
import matplotlib.pyplot as plt 
import pandas as pd 
from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor 
from sklearn.metrics import f1_score, mean_squared_error, mean_absolute_error
import torch

In [2]:
df_files = [
    "crypto-gt.csv",
    "deberta.csv",
    "default.csv",
    "roberta.csv",
    "bert.csv"
]

experiment_name = "sentiment_all_news_4hours"
saveas = opj("results", f"{experiment_name}.csv")
checkpoint_save_dir = opj("ckpts", experiment_name)
exp_times = 10
results = {}

In [3]:
def experiment(df_file, N):
    exp_name = df_file.split(".")[0]
    # load data
    chart_df = pd.read_csv(df_file)
    i1 = 0
    i2 = len(chart_df)-1
    st = "2018-02-14 14:00:00"
    en = "2022-04-15 23:00:00"
    filt = []
    for index, row in chart_df.iterrows():
        if chart_df.iloc[i1]['datetime'] < row.datetime and row.datetime < st:
            i1 = index
        if en < row.datetime and row.datetime < chart_df.iloc[i2]['datetime']:
            i2 = index
    chart_df = chart_df[i1+1:i2]
    chart_df.index = range(len(chart_df))
    
    # Train High Model
    ## train/validation/test split 
    train_size = int(chart_df.shape[0] * 0.8) 
    train_df = chart_df.iloc[:train_size,:] 

    val_size = int(chart_df.shape[0] * 0.1) 
    val_df = chart_df.iloc[train_size:train_size+val_size,:]  

    test_df = chart_df.iloc[train_size+val_size:, :] 
    
    categorical_columns = ["months", "days", "hours"]
    features = train_df.columns

    cat_idxs = [0, 1, 2] 
    cat_dims = [13, 32, 25] 

    tabnet_params = {"cat_idxs":cat_idxs, 
                     "cat_dims":cat_dims, 
                     "cat_emb_dim":1, 
                     "optimizer_fn":torch.optim.Adam,
                     "seed": N
                    } 
    
    input_columns = [] 
    for col in train_df.columns:
        if col != 'low_delta' and col != 'years' and col != 'datetime': 
            input_columns.append(col) 

    X_train = train_df[input_columns].values 
    Y_train = train_df['high_delta'].values 
    Y_train = Y_train.reshape((-1,1))

    X_val = val_df[input_columns].values
    Y_val = val_df['high_delta'].values 
    Y_val = Y_val.reshape((-1,1))

    X_test = test_df[input_columns].values 
    Y_test = test_df['high_delta'].values  
    Y_test = Y_test.reshape((-1,1))
    
    reg_high = TabNetRegressor(**tabnet_params) 

    reg_high.fit(X_train, Y_train, 
                 eval_set=[(X_val, Y_val)], 
                 max_epochs=100, 
                 patience=100)  
    
    Y_pred = reg_high.predict(X_test).flatten() 
    
    exp = f"{exp_name}-high"
    if exp not in results:
        results[exp] = []
        
    results[exp].append(
        {
            "MSE": mean_squared_error(Y_test, Y_pred),
            "MAE": mean_absolute_error(Y_test, Y_pred)
        }
    )
    
    saveas = opj(checkpoint_save_dir, f"{exp}-{N}")
    reg_high.save_model(saveas)
    
    # Train Low Model
    input_columns = [] 
    for col in train_df.columns:
        if col != 'high_delta' and col != 'years' and col != 'datetime': 
            input_columns.append(col) 

    X_train = train_df[input_columns].values 
    Y_train = train_df['low_delta'].values 
    Y_train = Y_train.reshape((-1,1))

    X_val = val_df[input_columns].values
    Y_val = val_df['low_delta'].values 
    Y_val = Y_val.reshape((-1,1))

    X_test = test_df[input_columns].values 
    Y_test = test_df['low_delta'].values  
    Y_test = Y_test.reshape((-1,1))
    
    reg_low = TabNetRegressor(**tabnet_params) 

    reg_low.fit(X_train, Y_train, 
                eval_set=[(X_val, Y_val)], 
                max_epochs=200, 
                patience=200)  
    
    Y_pred = reg_low.predict(X_test).flatten() 
    exp = f"{exp_name}-low"
    if exp not in results:
        results[exp] = []
        
    results[exp].append(
        {
            "MSE": mean_squared_error(Y_test, Y_pred),
            "MAE": mean_absolute_error(Y_test, Y_pred)
        }
    )
    
    saveas = opj(checkpoint_save_dir, f"{exp}-{N}")
    reg_high.save_model(saveas)

In [None]:
for df_file in df_files:
    for i in range(exp_times):
        experiment(df_file, i)

Device used : cuda
epoch 0  | loss: 0.30603 | val_0_mse: 0.00768 |  0:00:01s
epoch 1  | loss: 0.05294 | val_0_mse: 0.00075 |  0:00:01s
epoch 2  | loss: 0.013   | val_0_mse: 0.00038 |  0:00:02s
epoch 3  | loss: 0.00455 | val_0_mse: 0.00042 |  0:00:03s
epoch 4  | loss: 0.00227 | val_0_mse: 0.00023 |  0:00:04s
epoch 5  | loss: 0.00152 | val_0_mse: 0.00018 |  0:00:04s
epoch 6  | loss: 0.00132 | val_0_mse: 0.00016 |  0:00:05s
epoch 7  | loss: 0.00129 | val_0_mse: 0.00017 |  0:00:06s
epoch 8  | loss: 0.00121 | val_0_mse: 0.00016 |  0:00:07s
epoch 9  | loss: 0.00119 | val_0_mse: 0.00017 |  0:00:08s
epoch 10 | loss: 0.00116 | val_0_mse: 0.00017 |  0:00:09s
epoch 11 | loss: 0.00114 | val_0_mse: 0.00015 |  0:00:09s
epoch 12 | loss: 0.00113 | val_0_mse: 0.00016 |  0:00:10s
epoch 13 | loss: 0.00114 | val_0_mse: 0.00015 |  0:00:11s
epoch 14 | loss: 0.0011  | val_0_mse: 0.00017 |  0:00:12s
epoch 15 | loss: 0.00109 | val_0_mse: 0.00015 |  0:00:13s
epoch 16 | loss: 0.00111 | val_0_mse: 0.00015 |  0:00

epoch 37 | loss: 0.00078 | val_0_mse: 0.00013 |  0:00:28s
epoch 38 | loss: 0.00075 | val_0_mse: 0.00013 |  0:00:29s
epoch 39 | loss: 0.00075 | val_0_mse: 0.00013 |  0:00:29s
epoch 40 | loss: 0.00071 | val_0_mse: 0.00014 |  0:00:30s
epoch 41 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:31s
epoch 42 | loss: 0.00064 | val_0_mse: 0.00014 |  0:00:32s
epoch 43 | loss: 0.00063 | val_0_mse: 0.00014 |  0:00:32s
epoch 44 | loss: 0.00061 | val_0_mse: 0.00013 |  0:00:33s
epoch 45 | loss: 0.00058 | val_0_mse: 0.00015 |  0:00:34s
epoch 46 | loss: 0.00057 | val_0_mse: 0.00013 |  0:00:35s
epoch 47 | loss: 0.00056 | val_0_mse: 0.00013 |  0:00:36s
epoch 48 | loss: 0.00055 | val_0_mse: 0.00013 |  0:00:36s
epoch 49 | loss: 0.00054 | val_0_mse: 0.00013 |  0:00:37s
epoch 50 | loss: 0.00055 | val_0_mse: 0.00013 |  0:00:38s
epoch 51 | loss: 0.00054 | val_0_mse: 0.00013 |  0:00:39s
epoch 52 | loss: 0.00051 | val_0_mse: 0.00013 |  0:00:40s
epoch 53 | loss: 0.0005  | val_0_mse: 0.00013 |  0:00:41s
epoch 54 | los

epoch 179| loss: 0.00027 | val_0_mse: 0.00013 |  0:02:21s
epoch 180| loss: 0.00027 | val_0_mse: 0.00013 |  0:02:22s
epoch 181| loss: 0.00026 | val_0_mse: 0.00013 |  0:02:23s
epoch 182| loss: 0.00027 | val_0_mse: 0.00012 |  0:02:23s
epoch 183| loss: 0.00026 | val_0_mse: 0.00013 |  0:02:24s
epoch 184| loss: 0.00026 | val_0_mse: 0.00013 |  0:02:25s
epoch 185| loss: 0.00026 | val_0_mse: 0.00014 |  0:02:26s
epoch 186| loss: 0.00027 | val_0_mse: 0.00013 |  0:02:27s
epoch 187| loss: 0.00027 | val_0_mse: 0.00013 |  0:02:28s
epoch 188| loss: 0.00026 | val_0_mse: 0.00012 |  0:02:28s
epoch 189| loss: 0.00026 | val_0_mse: 0.00013 |  0:02:29s
epoch 190| loss: 0.00027 | val_0_mse: 0.00012 |  0:02:30s
epoch 191| loss: 0.00026 | val_0_mse: 0.00012 |  0:02:30s
epoch 192| loss: 0.00025 | val_0_mse: 0.00012 |  0:02:31s
epoch 193| loss: 0.00026 | val_0_mse: 0.00012 |  0:02:32s
epoch 194| loss: 0.00026 | val_0_mse: 0.00012 |  0:02:32s
epoch 195| loss: 0.00025 | val_0_mse: 0.00012 |  0:02:33s
epoch 196| los

epoch 12 | loss: 0.00129 | val_0_mse: 0.00017 |  0:00:10s
epoch 13 | loss: 0.00124 | val_0_mse: 0.00017 |  0:00:11s
epoch 14 | loss: 0.00118 | val_0_mse: 0.00017 |  0:00:11s
epoch 15 | loss: 0.00117 | val_0_mse: 0.00016 |  0:00:12s
epoch 16 | loss: 0.0011  | val_0_mse: 0.00016 |  0:00:13s
epoch 17 | loss: 0.00109 | val_0_mse: 0.00016 |  0:00:14s
epoch 18 | loss: 0.00106 | val_0_mse: 0.00016 |  0:00:15s
epoch 19 | loss: 0.00103 | val_0_mse: 0.00016 |  0:00:15s
epoch 20 | loss: 0.00102 | val_0_mse: 0.00016 |  0:00:16s
epoch 21 | loss: 0.00098 | val_0_mse: 0.00017 |  0:00:17s
epoch 22 | loss: 0.00098 | val_0_mse: 0.00017 |  0:00:18s
epoch 23 | loss: 0.00095 | val_0_mse: 0.00017 |  0:00:19s
epoch 24 | loss: 0.00095 | val_0_mse: 0.00017 |  0:00:19s
epoch 25 | loss: 0.00095 | val_0_mse: 0.00017 |  0:00:20s
epoch 26 | loss: 0.00093 | val_0_mse: 0.00018 |  0:00:21s
epoch 27 | loss: 0.00091 | val_0_mse: 0.00017 |  0:00:22s
epoch 28 | loss: 0.0009  | val_0_mse: 0.00016 |  0:00:23s
epoch 29 | los

epoch 154| loss: 0.00037 | val_0_mse: 0.00013 |  0:02:03s
epoch 155| loss: 0.00036 | val_0_mse: 0.00014 |  0:02:04s
epoch 156| loss: 0.00036 | val_0_mse: 0.00015 |  0:02:05s
epoch 157| loss: 0.00036 | val_0_mse: 0.00015 |  0:02:06s
epoch 158| loss: 0.00036 | val_0_mse: 0.00017 |  0:02:07s
epoch 159| loss: 0.00035 | val_0_mse: 0.00011 |  0:02:07s
epoch 160| loss: 0.00036 | val_0_mse: 0.00012 |  0:02:08s
epoch 161| loss: 0.00035 | val_0_mse: 0.00013 |  0:02:09s
epoch 162| loss: 0.00035 | val_0_mse: 0.00012 |  0:02:10s
epoch 163| loss: 0.00035 | val_0_mse: 0.00011 |  0:02:11s
epoch 164| loss: 0.00035 | val_0_mse: 0.00011 |  0:02:11s
epoch 165| loss: 0.00034 | val_0_mse: 0.00011 |  0:02:12s
epoch 166| loss: 0.00033 | val_0_mse: 0.00013 |  0:02:13s
epoch 167| loss: 0.00032 | val_0_mse: 0.00014 |  0:02:14s
epoch 168| loss: 0.00033 | val_0_mse: 0.0003  |  0:02:14s
epoch 169| loss: 0.0004  | val_0_mse: 0.0002  |  0:02:15s
epoch 170| loss: 0.00035 | val_0_mse: 0.00012 |  0:02:16s
epoch 171| los

epoch 91 | loss: 0.00045 | val_0_mse: 0.00012 |  0:01:13s
epoch 92 | loss: 0.00045 | val_0_mse: 0.00012 |  0:01:14s
epoch 93 | loss: 0.00044 | val_0_mse: 0.00011 |  0:01:15s
epoch 94 | loss: 0.00045 | val_0_mse: 0.00011 |  0:01:16s
epoch 95 | loss: 0.00045 | val_0_mse: 0.00011 |  0:01:17s
epoch 96 | loss: 0.00045 | val_0_mse: 0.00011 |  0:01:17s
epoch 97 | loss: 0.00044 | val_0_mse: 0.0001  |  0:01:18s
epoch 98 | loss: 0.00044 | val_0_mse: 9e-05   |  0:01:19s
epoch 99 | loss: 0.00046 | val_0_mse: 0.0001  |  0:01:20s
Stop training because you reached max_epochs = 100 with best_epoch = 98 and best_val_0_mse = 9e-05
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sentiment_all_news_4hours/crypto-gt-high-2.zip
Device used : cuda
epoch 0  | loss: 3.14772 | val_0_mse: 0.08269 |  0:00:00s
epoch 1  | loss: 0.21834 | val_0_mse: 0.0591  |  0:00:01s
epoch 2  | loss: 0.04434 | val_0_mse: 0.0125  |  0:00:02s
epoch 3  | loss: 0.01686 | val_0_mse: 0.00289 |  0:0

epoch 128| loss: 0.0004  | val_0_mse: 0.00014 |  0:01:42s
epoch 129| loss: 0.0004  | val_0_mse: 0.00013 |  0:01:43s
epoch 130| loss: 0.00039 | val_0_mse: 0.00014 |  0:01:43s
epoch 131| loss: 0.00039 | val_0_mse: 0.00015 |  0:01:44s
epoch 132| loss: 0.00039 | val_0_mse: 0.00013 |  0:01:45s
epoch 133| loss: 0.00039 | val_0_mse: 0.00013 |  0:01:46s
epoch 134| loss: 0.00039 | val_0_mse: 0.00014 |  0:01:46s
epoch 135| loss: 0.00039 | val_0_mse: 0.00013 |  0:01:47s
epoch 136| loss: 0.00039 | val_0_mse: 0.00014 |  0:01:48s
epoch 137| loss: 0.00039 | val_0_mse: 0.00013 |  0:01:49s
epoch 138| loss: 0.00039 | val_0_mse: 0.00014 |  0:01:49s
epoch 139| loss: 0.00039 | val_0_mse: 0.00013 |  0:01:50s
epoch 140| loss: 0.00038 | val_0_mse: 0.00014 |  0:01:51s
epoch 141| loss: 0.00038 | val_0_mse: 0.00014 |  0:01:52s
epoch 142| loss: 0.00038 | val_0_mse: 0.00017 |  0:01:52s
epoch 143| loss: 0.00039 | val_0_mse: 0.00013 |  0:01:53s
epoch 144| loss: 0.00038 | val_0_mse: 0.00013 |  0:01:54s
epoch 145| los

epoch 65 | loss: 0.00049 | val_0_mse: 0.00014 |  0:00:53s
epoch 66 | loss: 0.00047 | val_0_mse: 0.00012 |  0:00:54s
epoch 67 | loss: 0.00055 | val_0_mse: 0.00014 |  0:00:55s
epoch 68 | loss: 0.00047 | val_0_mse: 0.00012 |  0:00:56s
epoch 69 | loss: 0.00047 | val_0_mse: 0.00012 |  0:00:56s
epoch 70 | loss: 0.00049 | val_0_mse: 0.0001  |  0:00:57s
epoch 71 | loss: 0.00047 | val_0_mse: 0.00012 |  0:00:58s
epoch 72 | loss: 0.00047 | val_0_mse: 0.00013 |  0:00:59s
epoch 73 | loss: 0.00045 | val_0_mse: 0.00012 |  0:00:59s
epoch 74 | loss: 0.00044 | val_0_mse: 0.0001  |  0:01:00s
epoch 75 | loss: 0.00043 | val_0_mse: 0.00011 |  0:01:01s
epoch 76 | loss: 0.00043 | val_0_mse: 0.00014 |  0:01:02s
epoch 77 | loss: 0.00043 | val_0_mse: 0.00011 |  0:01:03s
epoch 78 | loss: 0.00042 | val_0_mse: 0.0001  |  0:01:03s
epoch 79 | loss: 0.00041 | val_0_mse: 9e-05   |  0:01:04s
epoch 80 | loss: 0.0004  | val_0_mse: 9e-05   |  0:01:05s
epoch 81 | loss: 0.0004  | val_0_mse: 9e-05   |  0:01:06s
epoch 82 | los

epoch 102| loss: 0.00053 | val_0_mse: 0.00014 |  0:01:22s
epoch 103| loss: 0.00053 | val_0_mse: 0.00015 |  0:01:23s
epoch 104| loss: 0.00052 | val_0_mse: 0.00014 |  0:01:24s
epoch 105| loss: 0.00052 | val_0_mse: 0.00014 |  0:01:25s
epoch 106| loss: 0.00051 | val_0_mse: 0.00014 |  0:01:26s
epoch 107| loss: 0.00051 | val_0_mse: 0.00014 |  0:01:26s
epoch 108| loss: 0.0005  | val_0_mse: 0.00014 |  0:01:27s
epoch 109| loss: 0.0005  | val_0_mse: 0.00014 |  0:01:28s
epoch 110| loss: 0.0005  | val_0_mse: 0.00014 |  0:01:29s
epoch 111| loss: 0.0005  | val_0_mse: 0.00014 |  0:01:29s
epoch 112| loss: 0.0005  | val_0_mse: 0.00013 |  0:01:30s
epoch 113| loss: 0.00049 | val_0_mse: 0.00013 |  0:01:31s
epoch 114| loss: 0.00048 | val_0_mse: 0.00013 |  0:01:32s
epoch 115| loss: 0.00048 | val_0_mse: 0.00013 |  0:01:32s
epoch 116| loss: 0.00049 | val_0_mse: 0.00013 |  0:01:33s
epoch 117| loss: 0.00049 | val_0_mse: 0.00014 |  0:01:34s
epoch 118| loss: 0.00049 | val_0_mse: 0.00014 |  0:01:35s
epoch 119| los

epoch 39 | loss: 0.00096 | val_0_mse: 0.00019 |  0:00:31s
epoch 40 | loss: 0.00094 | val_0_mse: 0.00019 |  0:00:32s
epoch 41 | loss: 0.00093 | val_0_mse: 0.00018 |  0:00:33s
epoch 42 | loss: 0.00093 | val_0_mse: 0.0002  |  0:00:34s
epoch 43 | loss: 0.00091 | val_0_mse: 0.00019 |  0:00:34s
epoch 44 | loss: 0.00089 | val_0_mse: 0.00017 |  0:00:35s
epoch 45 | loss: 0.00088 | val_0_mse: 0.00017 |  0:00:36s
epoch 46 | loss: 0.00087 | val_0_mse: 0.00019 |  0:00:37s
epoch 47 | loss: 0.00088 | val_0_mse: 0.00017 |  0:00:38s
epoch 48 | loss: 0.00085 | val_0_mse: 0.00017 |  0:00:38s
epoch 49 | loss: 0.00083 | val_0_mse: 0.00016 |  0:00:39s
epoch 50 | loss: 0.00083 | val_0_mse: 0.00017 |  0:00:40s
epoch 51 | loss: 0.00082 | val_0_mse: 0.00016 |  0:00:41s
epoch 52 | loss: 0.00081 | val_0_mse: 0.00017 |  0:00:42s
epoch 53 | loss: 0.0008  | val_0_mse: 0.00018 |  0:00:42s
epoch 54 | loss: 0.00079 | val_0_mse: 0.00017 |  0:00:43s
epoch 55 | loss: 0.00079 | val_0_mse: 0.00016 |  0:00:44s
epoch 56 | los

epoch 76 | loss: 0.00065 | val_0_mse: 0.00014 |  0:01:02s
epoch 77 | loss: 0.00065 | val_0_mse: 0.00014 |  0:01:03s
epoch 78 | loss: 0.00064 | val_0_mse: 0.00014 |  0:01:04s
epoch 79 | loss: 0.00064 | val_0_mse: 0.00014 |  0:01:05s
epoch 80 | loss: 0.00063 | val_0_mse: 0.00014 |  0:01:06s
epoch 81 | loss: 0.00064 | val_0_mse: 0.00014 |  0:01:07s
epoch 82 | loss: 0.00063 | val_0_mse: 0.00013 |  0:01:07s
epoch 83 | loss: 0.00063 | val_0_mse: 0.00014 |  0:01:08s
epoch 84 | loss: 0.00062 | val_0_mse: 0.00013 |  0:01:09s
epoch 85 | loss: 0.00061 | val_0_mse: 0.00014 |  0:01:10s
epoch 86 | loss: 0.00061 | val_0_mse: 0.00013 |  0:01:10s
epoch 87 | loss: 0.0006  | val_0_mse: 0.00014 |  0:01:11s
epoch 88 | loss: 0.00059 | val_0_mse: 0.00013 |  0:01:12s
epoch 89 | loss: 0.0006  | val_0_mse: 0.00013 |  0:01:13s
epoch 90 | loss: 0.00059 | val_0_mse: 0.00013 |  0:01:13s
epoch 91 | loss: 0.00059 | val_0_mse: 0.00014 |  0:01:14s
epoch 92 | loss: 0.00059 | val_0_mse: 0.00015 |  0:01:15s
epoch 93 | los

epoch 13 | loss: 0.0012  | val_0_mse: 0.0002  |  0:00:10s
epoch 14 | loss: 0.00116 | val_0_mse: 0.00019 |  0:00:11s
epoch 15 | loss: 0.00111 | val_0_mse: 0.0002  |  0:00:12s
epoch 16 | loss: 0.00109 | val_0_mse: 0.00019 |  0:00:13s
epoch 17 | loss: 0.00105 | val_0_mse: 0.0002  |  0:00:13s
epoch 18 | loss: 0.00103 | val_0_mse: 0.0002  |  0:00:14s
epoch 19 | loss: 0.001   | val_0_mse: 0.0002  |  0:00:15s
epoch 20 | loss: 0.001   | val_0_mse: 0.00021 |  0:00:16s
epoch 21 | loss: 0.001   | val_0_mse: 0.0002  |  0:00:17s
epoch 22 | loss: 0.00095 | val_0_mse: 0.0002  |  0:00:17s
epoch 23 | loss: 0.00093 | val_0_mse: 0.0002  |  0:00:18s
epoch 24 | loss: 0.00092 | val_0_mse: 0.00019 |  0:00:19s
epoch 25 | loss: 0.00089 | val_0_mse: 0.0002  |  0:00:20s
epoch 26 | loss: 0.00087 | val_0_mse: 0.00019 |  0:00:21s
epoch 27 | loss: 0.00089 | val_0_mse: 0.00019 |  0:00:22s
epoch 28 | loss: 0.00086 | val_0_mse: 0.00019 |  0:00:22s
epoch 29 | loss: 0.00084 | val_0_mse: 0.00019 |  0:00:23s
epoch 30 | los

epoch 50 | loss: 0.00069 | val_0_mse: 0.00016 |  0:00:42s
epoch 51 | loss: 0.00069 | val_0_mse: 0.00014 |  0:00:43s
epoch 52 | loss: 0.00068 | val_0_mse: 0.00016 |  0:00:44s
epoch 53 | loss: 0.00068 | val_0_mse: 0.00015 |  0:00:45s
epoch 54 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:45s
epoch 55 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:46s
epoch 56 | loss: 0.00065 | val_0_mse: 0.00014 |  0:00:47s
epoch 57 | loss: 0.00065 | val_0_mse: 0.00015 |  0:00:47s
epoch 58 | loss: 0.00065 | val_0_mse: 0.00015 |  0:00:48s
epoch 59 | loss: 0.00064 | val_0_mse: 0.00014 |  0:00:49s
epoch 60 | loss: 0.00063 | val_0_mse: 0.00014 |  0:00:50s
epoch 61 | loss: 0.00063 | val_0_mse: 0.00013 |  0:00:50s
epoch 62 | loss: 0.00063 | val_0_mse: 0.00015 |  0:00:51s
epoch 63 | loss: 0.00062 | val_0_mse: 0.00013 |  0:00:52s
epoch 64 | loss: 0.00063 | val_0_mse: 0.00016 |  0:00:53s
epoch 65 | loss: 0.00063 | val_0_mse: 0.00014 |  0:00:53s
epoch 66 | loss: 0.00061 | val_0_mse: 0.00014 |  0:00:54s
epoch 67 | los

epoch 192| loss: 0.00034 | val_0_mse: 0.00015 |  0:02:33s
epoch 193| loss: 0.00034 | val_0_mse: 0.00014 |  0:02:34s
epoch 194| loss: 0.00034 | val_0_mse: 0.00013 |  0:02:35s
epoch 195| loss: 0.00033 | val_0_mse: 0.00013 |  0:02:36s
epoch 196| loss: 0.00033 | val_0_mse: 0.00013 |  0:02:37s
epoch 197| loss: 0.00034 | val_0_mse: 0.00013 |  0:02:37s
epoch 198| loss: 0.00035 | val_0_mse: 0.00013 |  0:02:38s
epoch 199| loss: 0.00033 | val_0_mse: 0.00013 |  0:02:39s
Stop training because you reached max_epochs = 200 with best_epoch = 186 and best_val_0_mse = 0.00012
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sentiment_all_news_4hours/crypto-gt-low-5.zip
Device used : cuda
epoch 0  | loss: 0.34475 | val_0_mse: 0.0108  |  0:00:00s
epoch 1  | loss: 0.04774 | val_0_mse: 0.00455 |  0:00:01s
epoch 2  | loss: 0.0102  | val_0_mse: 0.00047 |  0:00:02s
epoch 3  | loss: 0.00565 | val_0_mse: 0.00071 |  0:00:03s
epoch 4  | loss: 0.00288 | val_0_mse: 0.00067 |  0

epoch 25 | loss: 0.00095 | val_0_mse: 0.00019 |  0:00:21s
epoch 26 | loss: 0.00094 | val_0_mse: 0.00018 |  0:00:22s
epoch 27 | loss: 0.00093 | val_0_mse: 0.00018 |  0:00:23s
epoch 28 | loss: 0.00091 | val_0_mse: 0.00018 |  0:00:24s
epoch 29 | loss: 0.00091 | val_0_mse: 0.00018 |  0:00:24s
epoch 30 | loss: 0.0009  | val_0_mse: 0.00018 |  0:00:25s
epoch 31 | loss: 0.00089 | val_0_mse: 0.00018 |  0:00:26s
epoch 32 | loss: 0.00088 | val_0_mse: 0.00018 |  0:00:27s
epoch 33 | loss: 0.00087 | val_0_mse: 0.00017 |  0:00:27s
epoch 34 | loss: 0.00086 | val_0_mse: 0.00017 |  0:00:28s
epoch 35 | loss: 0.00085 | val_0_mse: 0.00016 |  0:00:29s
epoch 36 | loss: 0.00086 | val_0_mse: 0.00016 |  0:00:30s
epoch 37 | loss: 0.00084 | val_0_mse: 0.00019 |  0:00:31s
epoch 38 | loss: 0.00084 | val_0_mse: 0.00017 |  0:00:31s
epoch 39 | loss: 0.00083 | val_0_mse: 0.00016 |  0:00:32s
epoch 40 | loss: 0.00081 | val_0_mse: 0.00018 |  0:00:33s
epoch 41 | loss: 0.00081 | val_0_mse: 0.00017 |  0:00:34s
epoch 42 | los

epoch 167| loss: 0.00039 | val_0_mse: 0.00015 |  0:02:13s
epoch 168| loss: 0.0004  | val_0_mse: 0.00014 |  0:02:13s
epoch 169| loss: 0.0004  | val_0_mse: 0.00015 |  0:02:14s
epoch 170| loss: 0.0004  | val_0_mse: 0.00015 |  0:02:15s
epoch 171| loss: 0.0004  | val_0_mse: 0.00015 |  0:02:16s
epoch 172| loss: 0.00041 | val_0_mse: 0.00016 |  0:02:17s
epoch 173| loss: 0.0004  | val_0_mse: 0.00014 |  0:02:18s
epoch 174| loss: 0.00039 | val_0_mse: 0.00014 |  0:02:18s
epoch 175| loss: 0.00039 | val_0_mse: 0.00014 |  0:02:19s
epoch 176| loss: 0.00039 | val_0_mse: 0.00014 |  0:02:20s
epoch 177| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:21s
epoch 178| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:21s
epoch 179| loss: 0.00038 | val_0_mse: 0.00015 |  0:02:22s
epoch 180| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:23s
epoch 181| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:24s
epoch 182| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:24s
epoch 183| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:25s
epoch 184| los

epoch 0  | loss: 0.53618 | val_0_mse: 0.01935 |  0:00:00s
epoch 1  | loss: 0.06054 | val_0_mse: 0.00593 |  0:00:01s
epoch 2  | loss: 0.01767 | val_0_mse: 0.00145 |  0:00:02s
epoch 3  | loss: 0.00727 | val_0_mse: 0.00126 |  0:00:03s
epoch 4  | loss: 0.00473 | val_0_mse: 0.00077 |  0:00:04s
epoch 5  | loss: 0.00289 | val_0_mse: 0.00038 |  0:00:04s
epoch 6  | loss: 0.00241 | val_0_mse: 0.00038 |  0:00:05s
epoch 7  | loss: 0.00215 | val_0_mse: 0.00036 |  0:00:06s
epoch 8  | loss: 0.00193 | val_0_mse: 0.0003  |  0:00:07s
epoch 9  | loss: 0.0017  | val_0_mse: 0.00027 |  0:00:08s
epoch 10 | loss: 0.00158 | val_0_mse: 0.00028 |  0:00:08s
epoch 11 | loss: 0.00164 | val_0_mse: 0.00025 |  0:00:09s
epoch 12 | loss: 0.00138 | val_0_mse: 0.00025 |  0:00:10s
epoch 13 | loss: 0.00145 | val_0_mse: 0.00023 |  0:00:11s
epoch 14 | loss: 0.00141 | val_0_mse: 0.00023 |  0:00:12s
epoch 15 | loss: 0.00134 | val_0_mse: 0.00026 |  0:00:12s
epoch 16 | loss: 0.0013  | val_0_mse: 0.00023 |  0:00:13s
epoch 17 | los

epoch 142| loss: 0.00034 | val_0_mse: 0.00012 |  0:01:53s
epoch 143| loss: 0.00031 | val_0_mse: 9e-05   |  0:01:54s
epoch 144| loss: 0.0003  | val_0_mse: 0.00011 |  0:01:55s
epoch 145| loss: 0.00032 | val_0_mse: 0.00012 |  0:01:56s
epoch 146| loss: 0.00029 | val_0_mse: 8e-05   |  0:01:56s
epoch 147| loss: 0.00029 | val_0_mse: 9e-05   |  0:01:57s
epoch 148| loss: 0.00028 | val_0_mse: 0.00011 |  0:01:58s
epoch 149| loss: 0.00029 | val_0_mse: 8e-05   |  0:01:59s
epoch 150| loss: 0.00027 | val_0_mse: 8e-05   |  0:01:59s
epoch 151| loss: 0.00027 | val_0_mse: 8e-05   |  0:02:00s
epoch 152| loss: 0.00028 | val_0_mse: 0.00011 |  0:02:01s
epoch 153| loss: 0.00029 | val_0_mse: 0.0001  |  0:02:02s
epoch 154| loss: 0.00029 | val_0_mse: 0.0001  |  0:02:02s
epoch 155| loss: 0.00028 | val_0_mse: 9e-05   |  0:02:03s
epoch 156| loss: 0.00028 | val_0_mse: 8e-05   |  0:02:04s
epoch 157| loss: 0.00027 | val_0_mse: 7e-05   |  0:02:04s
epoch 158| loss: 0.00027 | val_0_mse: 7e-05   |  0:02:05s
epoch 159| los

epoch 79 | loss: 0.0006  | val_0_mse: 0.00014 |  0:01:06s
epoch 80 | loss: 0.00059 | val_0_mse: 0.00013 |  0:01:07s
epoch 81 | loss: 0.0006  | val_0_mse: 0.00013 |  0:01:07s
epoch 82 | loss: 0.00059 | val_0_mse: 0.00013 |  0:01:08s
epoch 83 | loss: 0.00059 | val_0_mse: 0.00013 |  0:01:09s
epoch 84 | loss: 0.00057 | val_0_mse: 0.00013 |  0:01:10s
epoch 85 | loss: 0.00059 | val_0_mse: 0.00013 |  0:01:11s
epoch 86 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:11s
epoch 87 | loss: 0.00058 | val_0_mse: 0.00012 |  0:01:12s
epoch 88 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:13s
epoch 89 | loss: 0.00057 | val_0_mse: 0.00013 |  0:01:14s
epoch 90 | loss: 0.00056 | val_0_mse: 0.00012 |  0:01:14s
epoch 91 | loss: 0.00056 | val_0_mse: 0.00012 |  0:01:15s
epoch 92 | loss: 0.00055 | val_0_mse: 0.00012 |  0:01:16s
epoch 93 | loss: 0.00055 | val_0_mse: 0.00012 |  0:01:17s
epoch 94 | loss: 0.00055 | val_0_mse: 0.00013 |  0:01:17s
epoch 95 | loss: 0.00054 | val_0_mse: 0.00012 |  0:01:18s
epoch 96 | los

epoch 116| loss: 0.00044 | val_0_mse: 0.00014 |  0:01:32s
epoch 117| loss: 0.00044 | val_0_mse: 0.00014 |  0:01:32s
epoch 118| loss: 0.00044 | val_0_mse: 0.00014 |  0:01:33s
epoch 119| loss: 0.00043 | val_0_mse: 0.00014 |  0:01:34s
epoch 120| loss: 0.00043 | val_0_mse: 0.00014 |  0:01:35s
epoch 121| loss: 0.00043 | val_0_mse: 0.00014 |  0:01:35s
epoch 122| loss: 0.00043 | val_0_mse: 0.00014 |  0:01:36s
epoch 123| loss: 0.00043 | val_0_mse: 0.00014 |  0:01:37s
epoch 124| loss: 0.00043 | val_0_mse: 0.00014 |  0:01:38s
epoch 125| loss: 0.00042 | val_0_mse: 0.00014 |  0:01:39s
epoch 126| loss: 0.00042 | val_0_mse: 0.00014 |  0:01:39s
epoch 127| loss: 0.00042 | val_0_mse: 0.00014 |  0:01:40s
epoch 128| loss: 0.00042 | val_0_mse: 0.00014 |  0:01:41s
epoch 129| loss: 0.00042 | val_0_mse: 0.00014 |  0:01:42s
epoch 130| loss: 0.00042 | val_0_mse: 0.00014 |  0:01:42s
epoch 131| loss: 0.00041 | val_0_mse: 0.00015 |  0:01:43s
epoch 132| loss: 0.00041 | val_0_mse: 0.00014 |  0:01:44s
epoch 133| los

epoch 53 | loss: 0.00051 | val_0_mse: 0.00014 |  0:00:43s
epoch 54 | loss: 0.00052 | val_0_mse: 0.00014 |  0:00:43s
epoch 55 | loss: 0.00051 | val_0_mse: 0.00014 |  0:00:44s
epoch 56 | loss: 0.0005  | val_0_mse: 0.00013 |  0:00:45s
epoch 57 | loss: 0.0005  | val_0_mse: 0.00014 |  0:00:46s
epoch 58 | loss: 0.00049 | val_0_mse: 0.00014 |  0:00:46s
epoch 59 | loss: 0.00049 | val_0_mse: 0.00014 |  0:00:47s
epoch 60 | loss: 0.00049 | val_0_mse: 0.00014 |  0:00:48s
epoch 61 | loss: 0.00049 | val_0_mse: 0.00014 |  0:00:49s
epoch 62 | loss: 0.00048 | val_0_mse: 0.00013 |  0:00:50s
epoch 63 | loss: 0.00048 | val_0_mse: 0.00013 |  0:00:50s
epoch 64 | loss: 0.00048 | val_0_mse: 0.00013 |  0:00:51s
epoch 65 | loss: 0.00048 | val_0_mse: 0.00014 |  0:00:52s
epoch 66 | loss: 0.00047 | val_0_mse: 0.00014 |  0:00:53s
epoch 67 | loss: 0.00047 | val_0_mse: 0.00014 |  0:00:53s
epoch 68 | loss: 0.00047 | val_0_mse: 0.00013 |  0:00:54s
epoch 69 | loss: 0.00047 | val_0_mse: 0.00013 |  0:00:55s
epoch 70 | los

epoch 90 | loss: 0.00051 | val_0_mse: 0.00014 |  0:01:10s
epoch 91 | loss: 0.00051 | val_0_mse: 0.00019 |  0:01:11s
epoch 92 | loss: 0.00051 | val_0_mse: 0.00019 |  0:01:12s
epoch 93 | loss: 0.0005  | val_0_mse: 0.00013 |  0:01:12s
epoch 94 | loss: 0.0005  | val_0_mse: 0.00014 |  0:01:13s
epoch 95 | loss: 0.00049 | val_0_mse: 0.00014 |  0:01:14s
epoch 96 | loss: 0.00049 | val_0_mse: 0.00013 |  0:01:15s
epoch 97 | loss: 0.00048 | val_0_mse: 0.00015 |  0:01:16s
epoch 98 | loss: 0.00047 | val_0_mse: 0.00015 |  0:01:16s
epoch 99 | loss: 0.00045 | val_0_mse: 0.00014 |  0:01:17s
epoch 100| loss: 0.00046 | val_0_mse: 0.00013 |  0:01:18s
epoch 101| loss: 0.00048 | val_0_mse: 0.00013 |  0:01:19s
epoch 102| loss: 0.00046 | val_0_mse: 0.00013 |  0:01:19s
epoch 103| loss: 0.00045 | val_0_mse: 0.00013 |  0:01:20s
epoch 104| loss: 0.00045 | val_0_mse: 0.00014 |  0:01:21s
epoch 105| loss: 0.00045 | val_0_mse: 0.00014 |  0:01:22s
epoch 106| loss: 0.00044 | val_0_mse: 0.00015 |  0:01:23s
epoch 107| los

epoch 27 | loss: 0.00083 | val_0_mse: 0.00015 |  0:00:21s
epoch 28 | loss: 0.0008  | val_0_mse: 0.00014 |  0:00:22s
epoch 29 | loss: 0.00078 | val_0_mse: 0.00014 |  0:00:23s
epoch 30 | loss: 0.00077 | val_0_mse: 0.00015 |  0:00:24s
epoch 31 | loss: 0.00077 | val_0_mse: 0.00014 |  0:00:25s
epoch 32 | loss: 0.00075 | val_0_mse: 0.00015 |  0:00:25s
epoch 33 | loss: 0.00074 | val_0_mse: 0.00015 |  0:00:26s
epoch 34 | loss: 0.00072 | val_0_mse: 0.00014 |  0:00:27s
epoch 35 | loss: 0.00072 | val_0_mse: 0.00014 |  0:00:28s
epoch 36 | loss: 0.0007  | val_0_mse: 0.00016 |  0:00:28s
epoch 37 | loss: 0.00068 | val_0_mse: 0.00015 |  0:00:29s
epoch 38 | loss: 0.00067 | val_0_mse: 0.00015 |  0:00:30s
epoch 39 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:31s
epoch 40 | loss: 0.00066 | val_0_mse: 0.00014 |  0:00:31s
epoch 41 | loss: 0.00065 | val_0_mse: 0.00014 |  0:00:32s
epoch 42 | loss: 0.00065 | val_0_mse: 0.00015 |  0:00:33s
epoch 43 | loss: 0.00063 | val_0_mse: 0.00015 |  0:00:34s
epoch 44 | los

epoch 64 | loss: 0.00055 | val_0_mse: 0.00014 |  0:00:50s
epoch 65 | loss: 0.00055 | val_0_mse: 0.00013 |  0:00:51s
epoch 66 | loss: 0.00054 | val_0_mse: 0.00015 |  0:00:51s
epoch 67 | loss: 0.00055 | val_0_mse: 0.00014 |  0:00:52s
epoch 68 | loss: 0.00053 | val_0_mse: 0.00014 |  0:00:53s
epoch 69 | loss: 0.00053 | val_0_mse: 0.00014 |  0:00:54s
epoch 70 | loss: 0.00052 | val_0_mse: 0.00014 |  0:00:55s
epoch 71 | loss: 0.00052 | val_0_mse: 0.00014 |  0:00:56s
epoch 72 | loss: 0.00052 | val_0_mse: 0.00013 |  0:00:56s
epoch 73 | loss: 0.00051 | val_0_mse: 0.00014 |  0:00:57s
epoch 74 | loss: 0.00053 | val_0_mse: 0.00014 |  0:00:58s
epoch 75 | loss: 0.00052 | val_0_mse: 0.00014 |  0:00:59s
epoch 76 | loss: 0.00051 | val_0_mse: 0.00014 |  0:00:59s
epoch 77 | loss: 0.0005  | val_0_mse: 0.00014 |  0:01:00s
epoch 78 | loss: 0.0005  | val_0_mse: 0.00013 |  0:01:01s
epoch 79 | loss: 0.00049 | val_0_mse: 0.00013 |  0:01:02s
epoch 80 | loss: 0.00049 | val_0_mse: 0.00013 |  0:01:02s
epoch 81 | los

epoch 1  | loss: 0.22188 | val_0_mse: 0.00319 |  0:00:01s
epoch 2  | loss: 0.05724 | val_0_mse: 0.00295 |  0:00:02s
epoch 3  | loss: 0.0245  | val_0_mse: 0.00258 |  0:00:03s
epoch 4  | loss: 0.01191 | val_0_mse: 0.00188 |  0:00:04s
epoch 5  | loss: 0.00641 | val_0_mse: 0.00085 |  0:00:04s
epoch 6  | loss: 0.00427 | val_0_mse: 0.00042 |  0:00:05s
epoch 7  | loss: 0.00286 | val_0_mse: 0.0003  |  0:00:06s
epoch 8  | loss: 0.00214 | val_0_mse: 0.00029 |  0:00:07s
epoch 9  | loss: 0.00174 | val_0_mse: 0.00023 |  0:00:07s
epoch 10 | loss: 0.00161 | val_0_mse: 0.00024 |  0:00:08s
epoch 11 | loss: 0.00145 | val_0_mse: 0.00022 |  0:00:09s
epoch 12 | loss: 0.00135 | val_0_mse: 0.00022 |  0:00:10s
epoch 13 | loss: 0.00123 | val_0_mse: 0.0002  |  0:00:10s
epoch 14 | loss: 0.00121 | val_0_mse: 0.00021 |  0:00:11s
epoch 15 | loss: 0.00115 | val_0_mse: 0.00021 |  0:00:12s
epoch 16 | loss: 0.00108 | val_0_mse: 0.00021 |  0:00:13s
epoch 17 | loss: 0.00106 | val_0_mse: 0.00021 |  0:00:14s
epoch 18 | los

epoch 38 | loss: 0.00083 | val_0_mse: 0.00018 |  0:00:30s
epoch 39 | loss: 0.00082 | val_0_mse: 0.00018 |  0:00:30s
epoch 40 | loss: 0.0008  | val_0_mse: 0.00017 |  0:00:31s
epoch 41 | loss: 0.00081 | val_0_mse: 0.00017 |  0:00:32s
epoch 42 | loss: 0.00079 | val_0_mse: 0.00017 |  0:00:33s
epoch 43 | loss: 0.00078 | val_0_mse: 0.00018 |  0:00:33s
epoch 44 | loss: 0.00077 | val_0_mse: 0.00018 |  0:00:34s
epoch 45 | loss: 0.00075 | val_0_mse: 0.00016 |  0:00:35s
epoch 46 | loss: 0.00074 | val_0_mse: 0.00017 |  0:00:36s
epoch 47 | loss: 0.00073 | val_0_mse: 0.00017 |  0:00:37s
epoch 48 | loss: 0.00073 | val_0_mse: 0.00016 |  0:00:37s
epoch 49 | loss: 0.00071 | val_0_mse: 0.00018 |  0:00:38s
epoch 50 | loss: 0.0007  | val_0_mse: 0.00017 |  0:00:39s
epoch 51 | loss: 0.00069 | val_0_mse: 0.00017 |  0:00:39s
epoch 52 | loss: 0.00068 | val_0_mse: 0.00017 |  0:00:40s
epoch 53 | loss: 0.00066 | val_0_mse: 0.00017 |  0:00:41s
epoch 54 | loss: 0.00067 | val_0_mse: 0.00016 |  0:00:42s
epoch 55 | los

epoch 180| loss: 0.00022 | val_0_mse: 9e-05   |  0:02:17s
epoch 181| loss: 0.00022 | val_0_mse: 9e-05   |  0:02:17s
epoch 182| loss: 0.00022 | val_0_mse: 9e-05   |  0:02:18s
epoch 183| loss: 0.00021 | val_0_mse: 0.0001  |  0:02:19s
epoch 184| loss: 0.00022 | val_0_mse: 0.0001  |  0:02:20s
epoch 185| loss: 0.00021 | val_0_mse: 9e-05   |  0:02:20s
epoch 186| loss: 0.0002  | val_0_mse: 9e-05   |  0:02:21s
epoch 187| loss: 0.0002  | val_0_mse: 0.0001  |  0:02:22s
epoch 188| loss: 0.00021 | val_0_mse: 9e-05   |  0:02:22s
epoch 189| loss: 0.00021 | val_0_mse: 9e-05   |  0:02:23s
epoch 190| loss: 0.00021 | val_0_mse: 0.00011 |  0:02:24s
epoch 191| loss: 0.00023 | val_0_mse: 0.0001  |  0:02:25s
epoch 192| loss: 0.00022 | val_0_mse: 0.0001  |  0:02:25s
epoch 193| loss: 0.00021 | val_0_mse: 9e-05   |  0:02:26s
epoch 194| loss: 0.0002  | val_0_mse: 9e-05   |  0:02:27s
epoch 195| loss: 0.00019 | val_0_mse: 9e-05   |  0:02:28s
epoch 196| loss: 0.0002  | val_0_mse: 9e-05   |  0:02:28s
epoch 197| los

epoch 13 | loss: 0.00125 | val_0_mse: 0.00019 |  0:00:10s
epoch 14 | loss: 0.00118 | val_0_mse: 0.0002  |  0:00:11s
epoch 15 | loss: 0.00112 | val_0_mse: 0.00019 |  0:00:12s
epoch 16 | loss: 0.00109 | val_0_mse: 0.0002  |  0:00:13s
epoch 17 | loss: 0.00106 | val_0_mse: 0.00019 |  0:00:13s
epoch 18 | loss: 0.00101 | val_0_mse: 0.00019 |  0:00:14s
epoch 19 | loss: 0.00103 | val_0_mse: 0.00019 |  0:00:15s
epoch 20 | loss: 0.001   | val_0_mse: 0.00019 |  0:00:16s
epoch 21 | loss: 0.00098 | val_0_mse: 0.00019 |  0:00:17s
epoch 22 | loss: 0.00097 | val_0_mse: 0.00019 |  0:00:18s
epoch 23 | loss: 0.00094 | val_0_mse: 0.00019 |  0:00:19s
epoch 24 | loss: 0.00099 | val_0_mse: 0.00018 |  0:00:19s
epoch 25 | loss: 0.00094 | val_0_mse: 0.00018 |  0:00:20s
epoch 26 | loss: 0.00092 | val_0_mse: 0.00018 |  0:00:21s
epoch 27 | loss: 0.00091 | val_0_mse: 0.00017 |  0:00:22s
epoch 28 | loss: 0.0009  | val_0_mse: 0.00018 |  0:00:22s
epoch 29 | loss: 0.00088 | val_0_mse: 0.00017 |  0:00:23s
epoch 30 | los

epoch 155| loss: 0.00037 | val_0_mse: 0.00012 |  0:02:04s
epoch 156| loss: 0.00041 | val_0_mse: 0.00012 |  0:02:05s
epoch 157| loss: 0.00037 | val_0_mse: 0.00012 |  0:02:05s
epoch 158| loss: 0.00036 | val_0_mse: 0.00011 |  0:02:06s
epoch 159| loss: 0.00036 | val_0_mse: 0.00012 |  0:02:07s
epoch 160| loss: 0.00036 | val_0_mse: 0.00012 |  0:02:08s
epoch 161| loss: 0.00035 | val_0_mse: 0.00011 |  0:02:08s
epoch 162| loss: 0.00036 | val_0_mse: 0.00012 |  0:02:09s
epoch 163| loss: 0.00035 | val_0_mse: 0.00012 |  0:02:10s
epoch 164| loss: 0.00035 | val_0_mse: 0.00012 |  0:02:11s
epoch 165| loss: 0.00034 | val_0_mse: 0.00012 |  0:02:11s
epoch 166| loss: 0.00034 | val_0_mse: 0.00012 |  0:02:12s
epoch 167| loss: 0.00034 | val_0_mse: 0.00011 |  0:02:13s
epoch 168| loss: 0.00034 | val_0_mse: 0.00011 |  0:02:14s
epoch 169| loss: 0.00033 | val_0_mse: 0.00011 |  0:02:15s
epoch 170| loss: 0.00033 | val_0_mse: 0.00011 |  0:02:15s
epoch 171| loss: 0.00033 | val_0_mse: 0.00012 |  0:02:16s
epoch 172| los

epoch 92 | loss: 0.00052 | val_0_mse: 0.00014 |  0:01:13s
epoch 93 | loss: 0.00051 | val_0_mse: 0.00013 |  0:01:13s
epoch 94 | loss: 0.00051 | val_0_mse: 0.00013 |  0:01:14s
epoch 95 | loss: 0.00051 | val_0_mse: 0.00013 |  0:01:15s
epoch 96 | loss: 0.0005  | val_0_mse: 0.00014 |  0:01:15s
epoch 97 | loss: 0.0005  | val_0_mse: 0.00013 |  0:01:16s
epoch 98 | loss: 0.0005  | val_0_mse: 0.00013 |  0:01:17s
epoch 99 | loss: 0.00049 | val_0_mse: 0.00013 |  0:01:18s
Stop training because you reached max_epochs = 100 with best_epoch = 97 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sentiment_all_news_4hours/deberta-high-3.zip
Device used : cuda
epoch 0  | loss: 2.83887 | val_0_mse: 0.02396 |  0:00:00s
epoch 1  | loss: 0.32189 | val_0_mse: 0.01613 |  0:00:01s
epoch 2  | loss: 0.08808 | val_0_mse: 0.00413 |  0:00:02s
epoch 3  | loss: 0.02378 | val_0_mse: 0.00112 |  0:00:03s
epoch 4  | loss: 0.00842 | val_0_mse: 0.00066 |  0:0

epoch 129| loss: 0.00038 | val_0_mse: 0.00013 |  0:01:43s
epoch 130| loss: 0.00037 | val_0_mse: 0.00013 |  0:01:44s
epoch 131| loss: 0.00041 | val_0_mse: 0.00013 |  0:01:45s
epoch 132| loss: 0.00037 | val_0_mse: 0.00013 |  0:01:46s
epoch 133| loss: 0.00036 | val_0_mse: 0.00013 |  0:01:46s
epoch 134| loss: 0.00037 | val_0_mse: 0.00013 |  0:01:47s
epoch 135| loss: 0.00036 | val_0_mse: 0.00013 |  0:01:48s
epoch 136| loss: 0.00036 | val_0_mse: 0.00013 |  0:01:49s
epoch 137| loss: 0.00036 | val_0_mse: 0.00013 |  0:01:50s
epoch 138| loss: 0.00035 | val_0_mse: 0.00013 |  0:01:50s
epoch 139| loss: 0.00036 | val_0_mse: 0.00013 |  0:01:51s
epoch 140| loss: 0.00035 | val_0_mse: 0.00013 |  0:01:52s
epoch 141| loss: 0.00034 | val_0_mse: 0.00013 |  0:01:53s
epoch 142| loss: 0.00035 | val_0_mse: 0.00016 |  0:01:54s
epoch 143| loss: 0.0004  | val_0_mse: 0.00017 |  0:01:54s
epoch 144| loss: 0.00037 | val_0_mse: 0.00013 |  0:01:55s
epoch 145| loss: 0.00035 | val_0_mse: 0.00013 |  0:01:56s
epoch 146| los

epoch 66 | loss: 0.00072 | val_0_mse: 0.00014 |  0:00:52s
epoch 67 | loss: 0.00074 | val_0_mse: 0.00014 |  0:00:53s
epoch 68 | loss: 0.00071 | val_0_mse: 0.00014 |  0:00:54s
epoch 69 | loss: 0.00099 | val_0_mse: 0.00015 |  0:00:55s
epoch 70 | loss: 0.00087 | val_0_mse: 0.00014 |  0:00:55s
epoch 71 | loss: 0.00108 | val_0_mse: 0.00014 |  0:00:56s
epoch 72 | loss: 0.00078 | val_0_mse: 0.00016 |  0:00:57s
epoch 73 | loss: 0.00068 | val_0_mse: 0.00015 |  0:00:58s
epoch 74 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:58s
epoch 75 | loss: 0.0007  | val_0_mse: 0.00016 |  0:00:59s
epoch 76 | loss: 0.00075 | val_0_mse: 0.00016 |  0:01:00s
epoch 77 | loss: 0.00074 | val_0_mse: 0.00014 |  0:01:01s
epoch 78 | loss: 0.00068 | val_0_mse: 0.00015 |  0:01:01s
epoch 79 | loss: 0.00066 | val_0_mse: 0.00014 |  0:01:02s
epoch 80 | loss: 0.00064 | val_0_mse: 0.00014 |  0:01:03s
epoch 81 | loss: 0.00064 | val_0_mse: 0.00015 |  0:01:04s
epoch 82 | loss: 0.00064 | val_0_mse: 0.00014 |  0:01:04s
epoch 83 | los

epoch 103| loss: 0.00033 | val_0_mse: 5e-05   |  0:01:20s
epoch 104| loss: 0.00034 | val_0_mse: 5e-05   |  0:01:21s
epoch 105| loss: 0.00033 | val_0_mse: 5e-05   |  0:01:22s
epoch 106| loss: 0.00033 | val_0_mse: 4e-05   |  0:01:23s
epoch 107| loss: 0.00032 | val_0_mse: 4e-05   |  0:01:23s
epoch 108| loss: 0.00034 | val_0_mse: 6e-05   |  0:01:24s
epoch 109| loss: 0.00033 | val_0_mse: 5e-05   |  0:01:25s
epoch 110| loss: 0.00032 | val_0_mse: 6e-05   |  0:01:26s
epoch 111| loss: 0.00031 | val_0_mse: 4e-05   |  0:01:27s
epoch 112| loss: 0.00031 | val_0_mse: 5e-05   |  0:01:28s
epoch 113| loss: 0.00031 | val_0_mse: 4e-05   |  0:01:29s
epoch 114| loss: 0.0003  | val_0_mse: 4e-05   |  0:01:29s
epoch 115| loss: 0.0003  | val_0_mse: 5e-05   |  0:01:30s
epoch 116| loss: 0.0003  | val_0_mse: 5e-05   |  0:01:31s
epoch 117| loss: 0.0003  | val_0_mse: 5e-05   |  0:01:32s
epoch 118| loss: 0.0003  | val_0_mse: 5e-05   |  0:01:32s
epoch 119| loss: 0.00029 | val_0_mse: 5e-05   |  0:01:33s
epoch 120| los

epoch 40 | loss: 0.00077 | val_0_mse: 0.00013 |  0:00:32s
epoch 41 | loss: 0.00076 | val_0_mse: 0.00013 |  0:00:33s
epoch 42 | loss: 0.00075 | val_0_mse: 0.00013 |  0:00:34s
epoch 43 | loss: 0.00074 | val_0_mse: 0.00013 |  0:00:35s
epoch 44 | loss: 0.00073 | val_0_mse: 0.00013 |  0:00:36s
epoch 45 | loss: 0.00073 | val_0_mse: 0.00013 |  0:00:37s
epoch 46 | loss: 0.00071 | val_0_mse: 0.00013 |  0:00:38s
epoch 47 | loss: 0.00071 | val_0_mse: 0.00013 |  0:00:39s
epoch 48 | loss: 0.00071 | val_0_mse: 0.00013 |  0:00:40s
epoch 49 | loss: 0.00071 | val_0_mse: 0.00013 |  0:00:40s
epoch 50 | loss: 0.0007  | val_0_mse: 0.00013 |  0:00:41s
epoch 51 | loss: 0.00069 | val_0_mse: 0.00013 |  0:00:42s
epoch 52 | loss: 0.00068 | val_0_mse: 0.00013 |  0:00:43s
epoch 53 | loss: 0.00068 | val_0_mse: 0.00013 |  0:00:43s
epoch 54 | loss: 0.00067 | val_0_mse: 0.00013 |  0:00:44s
epoch 55 | loss: 0.00067 | val_0_mse: 0.00013 |  0:00:45s
epoch 56 | loss: 0.00066 | val_0_mse: 0.00013 |  0:00:46s
epoch 57 | los

epoch 77 | loss: 0.0005  | val_0_mse: 0.00013 |  0:01:00s
epoch 78 | loss: 0.0005  | val_0_mse: 0.00014 |  0:01:00s
epoch 79 | loss: 0.0005  | val_0_mse: 0.00011 |  0:01:01s
epoch 80 | loss: 0.00053 | val_0_mse: 0.0001  |  0:01:02s
epoch 81 | loss: 0.00048 | val_0_mse: 0.0001  |  0:01:03s
epoch 82 | loss: 0.00048 | val_0_mse: 9e-05   |  0:01:03s
epoch 83 | loss: 0.0005  | val_0_mse: 0.00013 |  0:01:04s
epoch 84 | loss: 0.00055 | val_0_mse: 8e-05   |  0:01:05s
epoch 85 | loss: 0.00051 | val_0_mse: 0.00011 |  0:01:06s
epoch 86 | loss: 0.00051 | val_0_mse: 0.00011 |  0:01:07s
epoch 87 | loss: 0.00049 | val_0_mse: 0.00011 |  0:01:08s
epoch 88 | loss: 0.00047 | val_0_mse: 0.0001  |  0:01:09s
epoch 89 | loss: 0.00046 | val_0_mse: 9e-05   |  0:01:09s
epoch 90 | loss: 0.00045 | val_0_mse: 9e-05   |  0:01:10s
epoch 91 | loss: 0.00046 | val_0_mse: 8e-05   |  0:01:11s
epoch 92 | loss: 0.00045 | val_0_mse: 9e-05   |  0:01:12s
epoch 93 | loss: 0.00046 | val_0_mse: 8e-05   |  0:01:12s
epoch 94 | los

epoch 14 | loss: 0.0011  | val_0_mse: 0.00034 |  0:00:11s
epoch 15 | loss: 0.00111 | val_0_mse: 0.00033 |  0:00:12s
epoch 16 | loss: 0.00106 | val_0_mse: 0.00031 |  0:00:12s
epoch 17 | loss: 0.00102 | val_0_mse: 0.00027 |  0:00:13s
epoch 18 | loss: 0.001   | val_0_mse: 0.00026 |  0:00:14s
epoch 19 | loss: 0.00098 | val_0_mse: 0.00025 |  0:00:15s
epoch 20 | loss: 0.00095 | val_0_mse: 0.00024 |  0:00:16s
epoch 21 | loss: 0.00092 | val_0_mse: 0.00023 |  0:00:16s
epoch 22 | loss: 0.0009  | val_0_mse: 0.00022 |  0:00:17s
epoch 23 | loss: 0.00089 | val_0_mse: 0.00022 |  0:00:18s
epoch 24 | loss: 0.00087 | val_0_mse: 0.00021 |  0:00:19s
epoch 25 | loss: 0.00086 | val_0_mse: 0.00021 |  0:00:19s
epoch 26 | loss: 0.00084 | val_0_mse: 0.0002  |  0:00:20s
epoch 27 | loss: 0.00082 | val_0_mse: 0.00019 |  0:00:21s
epoch 28 | loss: 0.00083 | val_0_mse: 0.00019 |  0:00:21s
epoch 29 | loss: 0.00082 | val_0_mse: 0.00019 |  0:00:22s
epoch 30 | loss: 0.00079 | val_0_mse: 0.00019 |  0:00:23s
epoch 31 | los

epoch 51 | loss: 0.00071 | val_0_mse: 0.00014 |  0:00:39s
epoch 52 | loss: 0.00069 | val_0_mse: 0.00016 |  0:00:40s
epoch 53 | loss: 0.00069 | val_0_mse: 0.00013 |  0:00:41s
epoch 54 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:42s
epoch 55 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:42s
epoch 56 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:43s
epoch 57 | loss: 0.00066 | val_0_mse: 0.00016 |  0:00:44s
epoch 58 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:45s
epoch 59 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:45s
epoch 60 | loss: 0.00064 | val_0_mse: 0.00014 |  0:00:46s
epoch 61 | loss: 0.00065 | val_0_mse: 0.00014 |  0:00:47s
epoch 62 | loss: 0.00065 | val_0_mse: 0.00014 |  0:00:48s
epoch 63 | loss: 0.00064 | val_0_mse: 0.00015 |  0:00:48s
epoch 64 | loss: 0.00064 | val_0_mse: 0.00015 |  0:00:49s
epoch 65 | loss: 0.00063 | val_0_mse: 0.00014 |  0:00:50s
epoch 66 | loss: 0.00062 | val_0_mse: 0.00015 |  0:00:51s
epoch 67 | loss: 0.00063 | val_0_mse: 0.00014 |  0:00:52s
epoch 68 | los

epoch 193| loss: 0.0003  | val_0_mse: 0.00013 |  0:02:31s
epoch 194| loss: 0.0003  | val_0_mse: 0.00013 |  0:02:32s
epoch 195| loss: 0.0003  | val_0_mse: 0.0002  |  0:02:33s
epoch 196| loss: 0.00033 | val_0_mse: 0.00014 |  0:02:33s
epoch 197| loss: 0.00031 | val_0_mse: 0.00013 |  0:02:34s
epoch 198| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:35s
epoch 199| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:36s
Stop training because you reached max_epochs = 200 with best_epoch = 138 and best_val_0_mse = 0.00013
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sentiment_all_news_4hours/deberta-low-6.zip
Device used : cuda
epoch 0  | loss: 0.59781 | val_0_mse: 0.01191 |  0:00:00s
epoch 1  | loss: 0.04969 | val_0_mse: 0.00478 |  0:00:01s
epoch 2  | loss: 0.01184 | val_0_mse: 0.00466 |  0:00:02s
epoch 3  | loss: 0.00747 | val_0_mse: 0.00076 |  0:00:03s
epoch 4  | loss: 0.0035  | val_0_mse: 0.00089 |  0:00:04s
epoch 5  | loss: 0.00231 | val_0_mse: 0.00051 |  0:0

epoch 26 | loss: 0.00099 | val_0_mse: 0.00019 |  0:00:21s
epoch 27 | loss: 0.00096 | val_0_mse: 0.00017 |  0:00:22s
epoch 28 | loss: 0.00094 | val_0_mse: 0.00017 |  0:00:23s
epoch 29 | loss: 0.00092 | val_0_mse: 0.00017 |  0:00:24s
epoch 30 | loss: 0.00088 | val_0_mse: 0.00016 |  0:00:24s
epoch 31 | loss: 0.00088 | val_0_mse: 0.00016 |  0:00:25s
epoch 32 | loss: 0.00086 | val_0_mse: 0.00016 |  0:00:26s
epoch 33 | loss: 0.00087 | val_0_mse: 0.00016 |  0:00:27s
epoch 34 | loss: 0.00085 | val_0_mse: 0.00016 |  0:00:27s
epoch 35 | loss: 0.00084 | val_0_mse: 0.00017 |  0:00:28s
epoch 36 | loss: 0.00082 | val_0_mse: 0.00016 |  0:00:29s
epoch 37 | loss: 0.00082 | val_0_mse: 0.00016 |  0:00:30s
epoch 38 | loss: 0.0008  | val_0_mse: 0.00017 |  0:00:30s
epoch 39 | loss: 0.00079 | val_0_mse: 0.00017 |  0:00:31s
epoch 40 | loss: 0.00078 | val_0_mse: 0.00017 |  0:00:32s
epoch 41 | loss: 0.00078 | val_0_mse: 0.00016 |  0:00:33s
epoch 42 | loss: 0.00076 | val_0_mse: 0.00016 |  0:00:33s
epoch 43 | los

epoch 168| loss: 0.00033 | val_0_mse: 0.00013 |  0:02:15s
epoch 169| loss: 0.00033 | val_0_mse: 0.00013 |  0:02:15s
epoch 170| loss: 0.00034 | val_0_mse: 0.00013 |  0:02:16s
epoch 171| loss: 0.00033 | val_0_mse: 0.00014 |  0:02:17s
epoch 172| loss: 0.00033 | val_0_mse: 0.00014 |  0:02:18s
epoch 173| loss: 0.00032 | val_0_mse: 0.00014 |  0:02:19s
epoch 174| loss: 0.00032 | val_0_mse: 0.00016 |  0:02:19s
epoch 175| loss: 0.00034 | val_0_mse: 0.00016 |  0:02:20s
epoch 176| loss: 0.00034 | val_0_mse: 0.00014 |  0:02:21s
epoch 177| loss: 0.00032 | val_0_mse: 0.00014 |  0:02:22s
epoch 178| loss: 0.00033 | val_0_mse: 0.00014 |  0:02:23s
epoch 179| loss: 0.00033 | val_0_mse: 0.00014 |  0:02:24s
epoch 180| loss: 0.00033 | val_0_mse: 0.00014 |  0:02:25s
epoch 181| loss: 0.00033 | val_0_mse: 0.00014 |  0:02:25s
epoch 182| loss: 0.00032 | val_0_mse: 0.00014 |  0:02:26s
epoch 183| loss: 0.00032 | val_0_mse: 0.00015 |  0:02:27s
epoch 184| loss: 0.00032 | val_0_mse: 0.00016 |  0:02:28s
epoch 185| los

epoch 1  | loss: 0.23999 | val_0_mse: 0.02713 |  0:00:01s
epoch 2  | loss: 0.06417 | val_0_mse: 0.02581 |  0:00:02s
epoch 3  | loss: 0.02149 | val_0_mse: 0.01082 |  0:00:03s
epoch 4  | loss: 0.01048 | val_0_mse: 0.00462 |  0:00:03s
epoch 5  | loss: 0.00671 | val_0_mse: 0.00102 |  0:00:04s
epoch 6  | loss: 0.00411 | val_0_mse: 0.00097 |  0:00:05s
epoch 7  | loss: 0.00283 | val_0_mse: 0.0004  |  0:00:06s
epoch 8  | loss: 0.00221 | val_0_mse: 0.00047 |  0:00:07s
epoch 9  | loss: 0.00189 | val_0_mse: 0.0003  |  0:00:08s
epoch 10 | loss: 0.0017  | val_0_mse: 0.00027 |  0:00:09s
epoch 11 | loss: 0.00158 | val_0_mse: 0.00027 |  0:00:09s
epoch 12 | loss: 0.00155 | val_0_mse: 0.00027 |  0:00:10s
epoch 13 | loss: 0.00141 | val_0_mse: 0.00036 |  0:00:11s
epoch 14 | loss: 0.00139 | val_0_mse: 0.00023 |  0:00:12s
epoch 15 | loss: 0.00128 | val_0_mse: 0.00025 |  0:00:13s
epoch 16 | loss: 0.00125 | val_0_mse: 0.00023 |  0:00:13s
epoch 17 | loss: 0.00132 | val_0_mse: 0.00022 |  0:00:14s
epoch 18 | los

epoch 143| loss: 0.00035 | val_0_mse: 0.00014 |  0:01:57s
epoch 144| loss: 0.00035 | val_0_mse: 0.00014 |  0:01:58s
epoch 145| loss: 0.00034 | val_0_mse: 0.00015 |  0:01:59s
epoch 146| loss: 0.00035 | val_0_mse: 0.00013 |  0:02:00s
epoch 147| loss: 0.00035 | val_0_mse: 0.00014 |  0:02:01s
epoch 148| loss: 0.00036 | val_0_mse: 0.00015 |  0:02:02s
epoch 149| loss: 0.00034 | val_0_mse: 0.00015 |  0:02:03s
epoch 150| loss: 0.00034 | val_0_mse: 0.00014 |  0:02:04s
epoch 151| loss: 0.00034 | val_0_mse: 0.00013 |  0:02:05s
epoch 152| loss: 0.00033 | val_0_mse: 0.00014 |  0:02:05s
epoch 153| loss: 0.00034 | val_0_mse: 0.00014 |  0:02:06s
epoch 154| loss: 0.00033 | val_0_mse: 0.00014 |  0:02:08s
epoch 155| loss: 0.00033 | val_0_mse: 0.00014 |  0:02:09s
epoch 156| loss: 0.00032 | val_0_mse: 0.00015 |  0:02:09s
epoch 157| loss: 0.00034 | val_0_mse: 0.00014 |  0:02:10s
epoch 158| loss: 0.00033 | val_0_mse: 0.00015 |  0:02:11s
epoch 159| loss: 0.00032 | val_0_mse: 0.00013 |  0:02:12s
epoch 160| los

epoch 80 | loss: 0.00051 | val_0_mse: 0.00015 |  0:01:16s
epoch 81 | loss: 0.0005  | val_0_mse: 0.00014 |  0:01:17s
epoch 82 | loss: 0.00051 | val_0_mse: 0.00015 |  0:01:17s
epoch 83 | loss: 0.0005  | val_0_mse: 0.00015 |  0:01:18s
epoch 84 | loss: 0.0005  | val_0_mse: 0.00014 |  0:01:19s
epoch 85 | loss: 0.0005  | val_0_mse: 0.00014 |  0:01:20s
epoch 86 | loss: 0.00048 | val_0_mse: 0.00015 |  0:01:21s
epoch 87 | loss: 0.00049 | val_0_mse: 0.00014 |  0:01:22s
epoch 88 | loss: 0.00048 | val_0_mse: 0.00014 |  0:01:23s
epoch 89 | loss: 0.00048 | val_0_mse: 0.00014 |  0:01:24s
epoch 90 | loss: 0.00048 | val_0_mse: 0.00014 |  0:01:25s
epoch 91 | loss: 0.00047 | val_0_mse: 0.00014 |  0:01:26s
epoch 92 | loss: 0.00047 | val_0_mse: 0.00014 |  0:01:26s
epoch 93 | loss: 0.00047 | val_0_mse: 0.00013 |  0:01:27s
epoch 94 | loss: 0.00047 | val_0_mse: 0.00013 |  0:01:28s
epoch 95 | loss: 0.00047 | val_0_mse: 0.00013 |  0:01:29s
epoch 96 | loss: 0.00047 | val_0_mse: 0.00013 |  0:01:30s
epoch 97 | los

epoch 117| loss: 0.00043 | val_0_mse: 0.00013 |  0:01:49s
epoch 118| loss: 0.00044 | val_0_mse: 0.00013 |  0:01:50s
epoch 119| loss: 0.00044 | val_0_mse: 0.00013 |  0:01:50s
epoch 120| loss: 0.00043 | val_0_mse: 0.00013 |  0:01:51s
epoch 121| loss: 0.00042 | val_0_mse: 0.00013 |  0:01:52s
epoch 122| loss: 0.00043 | val_0_mse: 0.00013 |  0:01:53s
epoch 123| loss: 0.00044 | val_0_mse: 0.00013 |  0:01:54s
epoch 124| loss: 0.00046 | val_0_mse: 0.00014 |  0:01:54s
epoch 125| loss: 0.00046 | val_0_mse: 0.00013 |  0:01:55s
epoch 126| loss: 0.00045 | val_0_mse: 0.00014 |  0:01:56s
epoch 127| loss: 0.00046 | val_0_mse: 0.00013 |  0:01:56s
epoch 128| loss: 0.00046 | val_0_mse: 0.00013 |  0:01:57s
epoch 129| loss: 0.00045 | val_0_mse: 0.00013 |  0:01:58s
epoch 130| loss: 0.00045 | val_0_mse: 0.00013 |  0:01:58s
epoch 131| loss: 0.00045 | val_0_mse: 0.00013 |  0:01:59s
epoch 132| loss: 0.00045 | val_0_mse: 0.00013 |  0:02:00s
epoch 133| loss: 0.00044 | val_0_mse: 0.00013 |  0:02:01s
epoch 134| los

epoch 54 | loss: 0.00074 | val_0_mse: 0.00014 |  0:00:38s
epoch 55 | loss: 0.00073 | val_0_mse: 0.00014 |  0:00:39s
epoch 56 | loss: 0.00073 | val_0_mse: 0.00014 |  0:00:39s
epoch 57 | loss: 0.00072 | val_0_mse: 0.00014 |  0:00:40s
epoch 58 | loss: 0.00073 | val_0_mse: 0.00014 |  0:00:41s
epoch 59 | loss: 0.00072 | val_0_mse: 0.00014 |  0:00:41s
epoch 60 | loss: 0.0007  | val_0_mse: 0.00014 |  0:00:42s
epoch 61 | loss: 0.0007  | val_0_mse: 0.00014 |  0:00:43s
epoch 62 | loss: 0.00069 | val_0_mse: 0.00014 |  0:00:43s
epoch 63 | loss: 0.00069 | val_0_mse: 0.00014 |  0:00:44s
epoch 64 | loss: 0.00068 | val_0_mse: 0.00013 |  0:00:45s
epoch 65 | loss: 0.00068 | val_0_mse: 0.00014 |  0:00:46s
epoch 66 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:47s
epoch 67 | loss: 0.00067 | val_0_mse: 0.00013 |  0:00:47s
epoch 68 | loss: 0.00067 | val_0_mse: 0.00013 |  0:00:48s
epoch 69 | loss: 0.00066 | val_0_mse: 0.00014 |  0:00:49s
epoch 70 | loss: 0.00065 | val_0_mse: 0.00013 |  0:00:49s
epoch 71 | los

epoch 91 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:04s
epoch 92 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:05s
epoch 93 | loss: 0.00054 | val_0_mse: 0.00014 |  0:01:06s
epoch 94 | loss: 0.00056 | val_0_mse: 0.00015 |  0:01:07s
epoch 95 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:08s
epoch 96 | loss: 0.00053 | val_0_mse: 0.00014 |  0:01:08s
epoch 97 | loss: 0.00053 | val_0_mse: 0.00014 |  0:01:09s
epoch 98 | loss: 0.00053 | val_0_mse: 0.00014 |  0:01:10s
epoch 99 | loss: 0.00052 | val_0_mse: 0.00014 |  0:01:10s
epoch 100| loss: 0.00052 | val_0_mse: 0.00014 |  0:01:11s
epoch 101| loss: 0.00052 | val_0_mse: 0.00014 |  0:01:12s
epoch 102| loss: 0.0005  | val_0_mse: 0.00014 |  0:01:13s
epoch 103| loss: 0.0005  | val_0_mse: 0.00014 |  0:01:13s
epoch 104| loss: 0.0005  | val_0_mse: 0.00014 |  0:01:14s
epoch 105| loss: 0.0005  | val_0_mse: 0.00014 |  0:01:15s
epoch 106| loss: 0.0005  | val_0_mse: 0.00014 |  0:01:16s
epoch 107| loss: 0.00049 | val_0_mse: 0.00014 |  0:01:17s
epoch 108| los

epoch 28 | loss: 0.00082 | val_0_mse: 0.00017 |  0:00:19s
epoch 29 | loss: 0.00081 | val_0_mse: 0.00016 |  0:00:20s
epoch 30 | loss: 0.00079 | val_0_mse: 0.00016 |  0:00:20s
epoch 31 | loss: 0.00079 | val_0_mse: 0.00016 |  0:00:21s
epoch 32 | loss: 0.00081 | val_0_mse: 0.00016 |  0:00:22s
epoch 33 | loss: 0.00081 | val_0_mse: 0.00016 |  0:00:23s
epoch 34 | loss: 0.00076 | val_0_mse: 0.00016 |  0:00:23s
epoch 35 | loss: 0.00076 | val_0_mse: 0.00016 |  0:00:24s
epoch 36 | loss: 0.00076 | val_0_mse: 0.00016 |  0:00:25s
epoch 37 | loss: 0.00074 | val_0_mse: 0.00015 |  0:00:26s
epoch 38 | loss: 0.00073 | val_0_mse: 0.00015 |  0:00:26s
epoch 39 | loss: 0.00072 | val_0_mse: 0.00015 |  0:00:27s
epoch 40 | loss: 0.00071 | val_0_mse: 0.00015 |  0:00:28s
epoch 41 | loss: 0.0007  | val_0_mse: 0.00015 |  0:00:28s
epoch 42 | loss: 0.00069 | val_0_mse: 0.00015 |  0:00:29s
epoch 43 | loss: 0.0007  | val_0_mse: 0.00015 |  0:00:29s
epoch 44 | loss: 0.00067 | val_0_mse: 0.00015 |  0:00:30s
epoch 45 | los

epoch 65 | loss: 0.00061 | val_0_mse: 0.00016 |  0:00:39s
epoch 66 | loss: 0.0006  | val_0_mse: 0.00015 |  0:00:39s
epoch 67 | loss: 0.0006  | val_0_mse: 0.00015 |  0:00:40s
epoch 68 | loss: 0.00059 | val_0_mse: 0.00015 |  0:00:40s
epoch 69 | loss: 0.00058 | val_0_mse: 0.00014 |  0:00:41s
epoch 70 | loss: 0.00057 | val_0_mse: 0.00015 |  0:00:41s
epoch 71 | loss: 0.00057 | val_0_mse: 0.00014 |  0:00:42s
epoch 72 | loss: 0.00059 | val_0_mse: 0.00016 |  0:00:43s
epoch 73 | loss: 0.00056 | val_0_mse: 0.00014 |  0:00:43s
epoch 74 | loss: 0.00056 | val_0_mse: 0.00015 |  0:00:44s
epoch 75 | loss: 0.00056 | val_0_mse: 0.00014 |  0:00:44s
epoch 76 | loss: 0.00056 | val_0_mse: 0.00016 |  0:00:45s
epoch 77 | loss: 0.00056 | val_0_mse: 0.00015 |  0:00:46s
epoch 78 | loss: 0.00056 | val_0_mse: 0.00017 |  0:00:46s
epoch 79 | loss: 0.00056 | val_0_mse: 0.00015 |  0:00:47s
epoch 80 | loss: 0.00055 | val_0_mse: 0.00015 |  0:00:47s
epoch 81 | loss: 0.00054 | val_0_mse: 0.00014 |  0:00:48s
epoch 82 | los

epoch 2  | loss: 0.02226 | val_0_mse: 0.00128 |  0:00:01s
epoch 3  | loss: 0.00989 | val_0_mse: 0.00101 |  0:00:02s
epoch 4  | loss: 0.0051  | val_0_mse: 0.00047 |  0:00:02s
epoch 5  | loss: 0.00334 | val_0_mse: 0.00044 |  0:00:03s
epoch 6  | loss: 0.00261 | val_0_mse: 0.00033 |  0:00:04s
epoch 7  | loss: 0.00203 | val_0_mse: 0.00029 |  0:00:04s
epoch 8  | loss: 0.00189 | val_0_mse: 0.00026 |  0:00:05s
epoch 9  | loss: 0.00153 | val_0_mse: 0.00024 |  0:00:05s
epoch 10 | loss: 0.00145 | val_0_mse: 0.00021 |  0:00:06s
epoch 11 | loss: 0.00132 | val_0_mse: 0.0002  |  0:00:06s
epoch 12 | loss: 0.00123 | val_0_mse: 0.0002  |  0:00:07s
epoch 13 | loss: 0.00112 | val_0_mse: 0.00019 |  0:00:08s
epoch 14 | loss: 0.00109 | val_0_mse: 0.0002  |  0:00:08s
epoch 15 | loss: 0.00104 | val_0_mse: 0.00019 |  0:00:09s
epoch 16 | loss: 0.00101 | val_0_mse: 0.00018 |  0:00:10s
epoch 17 | loss: 0.00098 | val_0_mse: 0.00019 |  0:00:10s
epoch 18 | loss: 0.00098 | val_0_mse: 0.00018 |  0:00:11s
epoch 19 | los

epoch 39 | loss: 0.00083 | val_0_mse: 0.00016 |  0:00:26s
epoch 40 | loss: 0.00083 | val_0_mse: 0.00016 |  0:00:27s
epoch 41 | loss: 0.00081 | val_0_mse: 0.00015 |  0:00:27s
epoch 42 | loss: 0.0008  | val_0_mse: 0.00016 |  0:00:28s
epoch 43 | loss: 0.00081 | val_0_mse: 0.00015 |  0:00:29s
epoch 44 | loss: 0.00079 | val_0_mse: 0.00015 |  0:00:29s
epoch 45 | loss: 0.00079 | val_0_mse: 0.00015 |  0:00:30s
epoch 46 | loss: 0.00077 | val_0_mse: 0.00015 |  0:00:30s
epoch 47 | loss: 0.00077 | val_0_mse: 0.00015 |  0:00:31s
epoch 48 | loss: 0.00076 | val_0_mse: 0.00014 |  0:00:32s
epoch 49 | loss: 0.00075 | val_0_mse: 0.00015 |  0:00:32s
epoch 50 | loss: 0.00075 | val_0_mse: 0.00014 |  0:00:33s
epoch 51 | loss: 0.00074 | val_0_mse: 0.00015 |  0:00:33s
epoch 52 | loss: 0.00074 | val_0_mse: 0.00015 |  0:00:34s
epoch 53 | loss: 0.00073 | val_0_mse: 0.00014 |  0:00:35s
epoch 54 | loss: 0.00073 | val_0_mse: 0.00014 |  0:00:35s
epoch 55 | loss: 0.00072 | val_0_mse: 0.00015 |  0:00:36s
epoch 56 | los

epoch 181| loss: 0.00021 | val_0_mse: 7e-05   |  0:01:50s
epoch 182| loss: 0.00022 | val_0_mse: 7e-05   |  0:01:50s
epoch 183| loss: 0.00022 | val_0_mse: 8e-05   |  0:01:51s
epoch 184| loss: 0.00021 | val_0_mse: 7e-05   |  0:01:52s
epoch 185| loss: 0.00023 | val_0_mse: 0.0001  |  0:01:52s
epoch 186| loss: 0.00025 | val_0_mse: 7e-05   |  0:01:53s
epoch 187| loss: 0.00021 | val_0_mse: 7e-05   |  0:01:53s
epoch 188| loss: 0.00021 | val_0_mse: 7e-05   |  0:01:54s
epoch 189| loss: 0.0002  | val_0_mse: 7e-05   |  0:01:55s
epoch 190| loss: 0.00021 | val_0_mse: 7e-05   |  0:01:55s
epoch 191| loss: 0.00021 | val_0_mse: 7e-05   |  0:01:56s
epoch 192| loss: 0.00021 | val_0_mse: 7e-05   |  0:01:56s
epoch 193| loss: 0.0002  | val_0_mse: 7e-05   |  0:01:57s
epoch 194| loss: 0.0002  | val_0_mse: 9e-05   |  0:01:58s
epoch 195| loss: 0.0002  | val_0_mse: 9e-05   |  0:01:58s
epoch 196| loss: 0.00019 | val_0_mse: 9e-05   |  0:01:59s
epoch 197| loss: 0.0002  | val_0_mse: 9e-05   |  0:02:00s
epoch 198| los

epoch 14 | loss: 0.00159 | val_0_mse: 0.00022 |  0:00:08s
epoch 15 | loss: 0.00141 | val_0_mse: 0.00016 |  0:00:09s
epoch 16 | loss: 0.00139 | val_0_mse: 0.00016 |  0:00:10s
epoch 17 | loss: 0.00135 | val_0_mse: 0.00017 |  0:00:10s
epoch 18 | loss: 0.00122 | val_0_mse: 0.00016 |  0:00:11s
epoch 19 | loss: 0.00128 | val_0_mse: 0.00016 |  0:00:11s
epoch 20 | loss: 0.00121 | val_0_mse: 0.00016 |  0:00:12s
epoch 21 | loss: 0.00114 | val_0_mse: 0.00018 |  0:00:13s
epoch 22 | loss: 0.00112 | val_0_mse: 0.00015 |  0:00:13s
epoch 23 | loss: 0.00112 | val_0_mse: 0.00016 |  0:00:14s
epoch 24 | loss: 0.00105 | val_0_mse: 0.00018 |  0:00:14s
epoch 25 | loss: 0.00099 | val_0_mse: 0.00016 |  0:00:15s
epoch 26 | loss: 0.001   | val_0_mse: 0.00016 |  0:00:15s
epoch 27 | loss: 0.00098 | val_0_mse: 0.00016 |  0:00:16s
epoch 28 | loss: 0.00097 | val_0_mse: 0.00016 |  0:00:17s
epoch 29 | loss: 0.00096 | val_0_mse: 0.00016 |  0:00:17s
epoch 30 | loss: 0.00094 | val_0_mse: 0.00016 |  0:00:18s
epoch 31 | los

epoch 156| loss: 0.00035 | val_0_mse: 0.00014 |  0:01:34s
epoch 157| loss: 0.00034 | val_0_mse: 0.00014 |  0:01:35s
epoch 158| loss: 0.00034 | val_0_mse: 0.00013 |  0:01:36s
epoch 159| loss: 0.00033 | val_0_mse: 0.00014 |  0:01:36s
epoch 160| loss: 0.00034 | val_0_mse: 0.00013 |  0:01:37s
epoch 161| loss: 0.00033 | val_0_mse: 0.00014 |  0:01:37s
epoch 162| loss: 0.00033 | val_0_mse: 0.00013 |  0:01:38s
epoch 163| loss: 0.00033 | val_0_mse: 0.00013 |  0:01:39s
epoch 164| loss: 0.00033 | val_0_mse: 0.00013 |  0:01:39s
epoch 165| loss: 0.00033 | val_0_mse: 0.00013 |  0:01:40s
epoch 166| loss: 0.00032 | val_0_mse: 0.00013 |  0:01:40s
epoch 167| loss: 0.00032 | val_0_mse: 0.00013 |  0:01:41s
epoch 168| loss: 0.00032 | val_0_mse: 0.00014 |  0:01:42s
epoch 169| loss: 0.00032 | val_0_mse: 0.00013 |  0:01:42s
epoch 170| loss: 0.00032 | val_0_mse: 0.00014 |  0:01:43s
epoch 171| loss: 0.00033 | val_0_mse: 0.00016 |  0:01:44s
epoch 172| loss: 0.00034 | val_0_mse: 0.00014 |  0:01:44s
epoch 173| los

epoch 93 | loss: 0.00033 | val_0_mse: 6e-05   |  0:00:57s
epoch 94 | loss: 0.00032 | val_0_mse: 6e-05   |  0:00:58s
epoch 95 | loss: 0.00032 | val_0_mse: 5e-05   |  0:00:58s
epoch 96 | loss: 0.00032 | val_0_mse: 9e-05   |  0:00:59s
epoch 97 | loss: 0.00039 | val_0_mse: 0.0001  |  0:01:00s
epoch 98 | loss: 0.00034 | val_0_mse: 4e-05   |  0:01:00s
epoch 99 | loss: 0.00031 | val_0_mse: 5e-05   |  0:01:01s
Stop training because you reached max_epochs = 100 with best_epoch = 98 and best_val_0_mse = 4e-05
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sentiment_all_news_4hours/default-high-4.zip
Device used : cuda
epoch 0  | loss: 0.2436  | val_0_mse: 0.01924 |  0:00:00s
epoch 1  | loss: 0.05814 | val_0_mse: 0.00563 |  0:00:01s
epoch 2  | loss: 0.01813 | val_0_mse: 0.00116 |  0:00:01s
epoch 3  | loss: 0.00539 | val_0_mse: 0.00091 |  0:00:02s
epoch 4  | loss: 0.00278 | val_0_mse: 0.00032 |  0:00:03s
epoch 5  | loss: 0.00197 | val_0_mse: 0.00019 |  0:00:

epoch 130| loss: 0.00034 | val_0_mse: 8e-05   |  0:01:19s
epoch 131| loss: 0.00034 | val_0_mse: 9e-05   |  0:01:20s
epoch 132| loss: 0.00034 | val_0_mse: 9e-05   |  0:01:21s
epoch 133| loss: 0.00033 | val_0_mse: 0.00011 |  0:01:21s
epoch 134| loss: 0.00035 | val_0_mse: 0.00011 |  0:01:22s
epoch 135| loss: 0.00033 | val_0_mse: 0.0001  |  0:01:22s
epoch 136| loss: 0.00031 | val_0_mse: 0.00011 |  0:01:23s
epoch 137| loss: 0.0003  | val_0_mse: 0.0001  |  0:01:24s
epoch 138| loss: 0.0003  | val_0_mse: 0.0001  |  0:01:24s
epoch 139| loss: 0.0003  | val_0_mse: 0.0001  |  0:01:25s
epoch 140| loss: 0.00029 | val_0_mse: 0.0001  |  0:01:25s
epoch 141| loss: 0.00029 | val_0_mse: 0.0001  |  0:01:26s
epoch 142| loss: 0.00029 | val_0_mse: 0.0001  |  0:01:27s
epoch 143| loss: 0.00031 | val_0_mse: 0.00011 |  0:01:27s
epoch 144| loss: 0.0003  | val_0_mse: 0.0001  |  0:01:28s
epoch 145| loss: 0.0003  | val_0_mse: 0.0001  |  0:01:28s
epoch 146| loss: 0.0003  | val_0_mse: 0.0001  |  0:01:29s
epoch 147| los

epoch 67 | loss: 0.00064 | val_0_mse: 0.00014 |  0:00:32s
epoch 68 | loss: 0.00063 | val_0_mse: 0.00013 |  0:00:32s
epoch 69 | loss: 0.00064 | val_0_mse: 0.00012 |  0:00:33s
epoch 70 | loss: 0.00062 | val_0_mse: 0.00013 |  0:00:33s
epoch 71 | loss: 0.00061 | val_0_mse: 0.00012 |  0:00:34s
epoch 72 | loss: 0.00061 | val_0_mse: 0.00012 |  0:00:34s
epoch 73 | loss: 0.00061 | val_0_mse: 0.00014 |  0:00:35s
epoch 74 | loss: 0.00061 | val_0_mse: 0.00013 |  0:00:35s
epoch 75 | loss: 0.00059 | val_0_mse: 0.00012 |  0:00:35s
epoch 76 | loss: 0.00059 | val_0_mse: 0.00012 |  0:00:36s
epoch 77 | loss: 0.00059 | val_0_mse: 0.00013 |  0:00:36s
epoch 78 | loss: 0.00058 | val_0_mse: 0.00012 |  0:00:37s
epoch 79 | loss: 0.00058 | val_0_mse: 0.00012 |  0:00:37s
epoch 80 | loss: 0.00057 | val_0_mse: 0.00013 |  0:00:38s
epoch 81 | loss: 0.00056 | val_0_mse: 0.00013 |  0:00:38s
epoch 82 | loss: 0.00056 | val_0_mse: 0.00013 |  0:00:39s
epoch 83 | loss: 0.00056 | val_0_mse: 0.00012 |  0:00:39s
epoch 84 | los

epoch 104| loss: 0.00046 | val_0_mse: 0.00013 |  0:01:05s
epoch 105| loss: 0.00045 | val_0_mse: 0.00013 |  0:01:05s
epoch 106| loss: 0.00045 | val_0_mse: 0.00013 |  0:01:06s
epoch 107| loss: 0.00045 | val_0_mse: 0.00013 |  0:01:06s
epoch 108| loss: 0.00045 | val_0_mse: 0.00013 |  0:01:07s
epoch 109| loss: 0.00044 | val_0_mse: 0.00013 |  0:01:08s
epoch 110| loss: 0.00044 | val_0_mse: 0.00013 |  0:01:08s
epoch 111| loss: 0.00044 | val_0_mse: 0.00013 |  0:01:09s
epoch 112| loss: 0.00043 | val_0_mse: 0.00013 |  0:01:09s
epoch 113| loss: 0.00043 | val_0_mse: 0.00013 |  0:01:10s
epoch 114| loss: 0.00043 | val_0_mse: 0.00013 |  0:01:11s
epoch 115| loss: 0.00043 | val_0_mse: 0.00013 |  0:01:11s
epoch 116| loss: 0.00043 | val_0_mse: 0.00013 |  0:01:12s
epoch 117| loss: 0.00043 | val_0_mse: 0.00013 |  0:01:12s
epoch 118| loss: 0.00042 | val_0_mse: 0.00013 |  0:01:13s
epoch 119| loss: 0.00042 | val_0_mse: 0.00013 |  0:01:14s
epoch 120| loss: 0.00042 | val_0_mse: 0.00013 |  0:01:15s
epoch 121| los

epoch 41 | loss: 0.00077 | val_0_mse: 0.00013 |  0:00:24s
epoch 42 | loss: 0.00075 | val_0_mse: 0.00013 |  0:00:25s
epoch 43 | loss: 0.00072 | val_0_mse: 0.00014 |  0:00:26s
epoch 44 | loss: 0.00071 | val_0_mse: 0.00014 |  0:00:26s
epoch 45 | loss: 0.00071 | val_0_mse: 0.00014 |  0:00:27s
epoch 46 | loss: 0.00071 | val_0_mse: 0.00013 |  0:00:27s
epoch 47 | loss: 0.00069 | val_0_mse: 0.00013 |  0:00:28s
epoch 48 | loss: 0.00068 | val_0_mse: 0.00014 |  0:00:29s
epoch 49 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:29s
epoch 50 | loss: 0.00065 | val_0_mse: 0.00014 |  0:00:30s
epoch 51 | loss: 0.00065 | val_0_mse: 0.00014 |  0:00:30s
epoch 52 | loss: 0.00064 | val_0_mse: 0.00014 |  0:00:31s
epoch 53 | loss: 0.00063 | val_0_mse: 0.00014 |  0:00:32s
epoch 54 | loss: 0.00061 | val_0_mse: 0.00014 |  0:00:32s
epoch 55 | loss: 0.00062 | val_0_mse: 0.00014 |  0:00:33s
epoch 56 | loss: 0.0006  | val_0_mse: 0.00014 |  0:00:33s
epoch 57 | loss: 0.00059 | val_0_mse: 0.00014 |  0:00:34s
epoch 58 | los

epoch 78 | loss: 0.00057 | val_0_mse: 0.00014 |  0:00:50s
epoch 79 | loss: 0.00056 | val_0_mse: 0.00014 |  0:00:50s
epoch 80 | loss: 0.00055 | val_0_mse: 0.00014 |  0:00:51s
epoch 81 | loss: 0.00056 | val_0_mse: 0.00014 |  0:00:52s
epoch 82 | loss: 0.00056 | val_0_mse: 0.00013 |  0:00:52s
epoch 83 | loss: 0.00056 | val_0_mse: 0.00014 |  0:00:53s
epoch 84 | loss: 0.00055 | val_0_mse: 0.00013 |  0:00:54s
epoch 85 | loss: 0.00056 | val_0_mse: 0.00013 |  0:00:54s
epoch 86 | loss: 0.00054 | val_0_mse: 0.00014 |  0:00:55s
epoch 87 | loss: 0.00054 | val_0_mse: 0.00013 |  0:00:56s
epoch 88 | loss: 0.00055 | val_0_mse: 0.00013 |  0:00:56s
epoch 89 | loss: 0.00054 | val_0_mse: 0.00014 |  0:00:57s
epoch 90 | loss: 0.00053 | val_0_mse: 0.00013 |  0:00:58s
epoch 91 | loss: 0.00052 | val_0_mse: 0.00013 |  0:00:58s
epoch 92 | loss: 0.00053 | val_0_mse: 0.00014 |  0:00:59s
epoch 93 | loss: 0.00052 | val_0_mse: 0.00013 |  0:01:00s
epoch 94 | loss: 0.00052 | val_0_mse: 0.00014 |  0:01:00s
epoch 95 | los

epoch 15 | loss: 0.00087 | val_0_mse: 0.00018 |  0:00:10s
epoch 16 | loss: 0.00083 | val_0_mse: 0.00017 |  0:00:11s
epoch 17 | loss: 0.00082 | val_0_mse: 0.00017 |  0:00:11s
epoch 18 | loss: 0.00082 | val_0_mse: 0.00017 |  0:00:12s
epoch 19 | loss: 0.00077 | val_0_mse: 0.00017 |  0:00:13s
epoch 20 | loss: 0.00076 | val_0_mse: 0.00017 |  0:00:13s
epoch 21 | loss: 0.00077 | val_0_mse: 0.00017 |  0:00:14s
epoch 22 | loss: 0.00078 | val_0_mse: 0.00016 |  0:00:14s
epoch 23 | loss: 0.00073 | val_0_mse: 0.00017 |  0:00:15s
epoch 24 | loss: 0.00072 | val_0_mse: 0.00016 |  0:00:16s
epoch 25 | loss: 0.00071 | val_0_mse: 0.00017 |  0:00:16s
epoch 26 | loss: 0.00072 | val_0_mse: 0.00016 |  0:00:17s
epoch 27 | loss: 0.00071 | val_0_mse: 0.00017 |  0:00:17s
epoch 28 | loss: 0.00071 | val_0_mse: 0.00014 |  0:00:18s
epoch 29 | loss: 0.0007  | val_0_mse: 0.00015 |  0:00:19s
epoch 30 | loss: 0.00068 | val_0_mse: 0.00014 |  0:00:19s
epoch 31 | loss: 0.00067 | val_0_mse: 0.00015 |  0:00:20s
epoch 32 | los

epoch 52 | loss: 0.00068 | val_0_mse: 0.00015 |  0:00:30s
epoch 53 | loss: 0.00069 | val_0_mse: 0.00016 |  0:00:31s
epoch 54 | loss: 0.0007  | val_0_mse: 0.00015 |  0:00:31s
epoch 55 | loss: 0.00066 | val_0_mse: 0.00014 |  0:00:32s
epoch 56 | loss: 0.00066 | val_0_mse: 0.00014 |  0:00:33s
epoch 57 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:33s
epoch 58 | loss: 0.00065 | val_0_mse: 0.00014 |  0:00:34s
epoch 59 | loss: 0.00064 | val_0_mse: 0.00014 |  0:00:34s
epoch 60 | loss: 0.00065 | val_0_mse: 0.00014 |  0:00:35s
epoch 61 | loss: 0.00062 | val_0_mse: 0.00014 |  0:00:36s
epoch 62 | loss: 0.00061 | val_0_mse: 0.00014 |  0:00:36s
epoch 63 | loss: 0.00061 | val_0_mse: 0.00015 |  0:00:37s
epoch 64 | loss: 0.00061 | val_0_mse: 0.00014 |  0:00:37s
epoch 65 | loss: 0.00059 | val_0_mse: 0.00015 |  0:00:38s
epoch 66 | loss: 0.00059 | val_0_mse: 0.00015 |  0:00:39s
epoch 67 | loss: 0.0006  | val_0_mse: 0.00014 |  0:00:39s
epoch 68 | loss: 0.00059 | val_0_mse: 0.00014 |  0:00:40s
epoch 69 | los

epoch 194| loss: 0.00027 | val_0_mse: 0.00011 |  0:01:56s
epoch 195| loss: 0.00027 | val_0_mse: 0.0001  |  0:01:57s
epoch 196| loss: 0.00027 | val_0_mse: 0.0001  |  0:01:57s
epoch 197| loss: 0.00027 | val_0_mse: 0.0001  |  0:01:58s
epoch 198| loss: 0.00026 | val_0_mse: 0.0001  |  0:01:59s
epoch 199| loss: 0.00027 | val_0_mse: 0.0001  |  0:01:59s
Stop training because you reached max_epochs = 200 with best_epoch = 198 and best_val_0_mse = 0.0001
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sentiment_all_news_4hours/default-low-7.zip
Device used : cuda
epoch 0  | loss: 1.30951 | val_0_mse: 0.03364 |  0:00:00s
epoch 1  | loss: 0.26806 | val_0_mse: 0.03622 |  0:00:01s
epoch 2  | loss: 0.0748  | val_0_mse: 0.00653 |  0:00:01s
epoch 3  | loss: 0.03028 | val_0_mse: 0.00095 |  0:00:02s
epoch 4  | loss: 0.01073 | val_0_mse: 0.00061 |  0:00:03s
epoch 5  | loss: 0.00516 | val_0_mse: 0.00021 |  0:00:03s
epoch 6  | loss: 0.003   | val_0_mse: 0.00026 |  0:00

epoch 27 | loss: 0.00101 | val_0_mse: 0.0002  |  0:00:16s
epoch 28 | loss: 0.001   | val_0_mse: 0.0002  |  0:00:17s
epoch 29 | loss: 0.001   | val_0_mse: 0.0002  |  0:00:17s
epoch 30 | loss: 0.00097 | val_0_mse: 0.00019 |  0:00:18s
epoch 31 | loss: 0.00095 | val_0_mse: 0.0002  |  0:00:18s
epoch 32 | loss: 0.00096 | val_0_mse: 0.00019 |  0:00:19s
epoch 33 | loss: 0.00095 | val_0_mse: 0.00019 |  0:00:20s
epoch 34 | loss: 0.00092 | val_0_mse: 0.00019 |  0:00:20s
epoch 35 | loss: 0.00093 | val_0_mse: 0.00018 |  0:00:21s
epoch 36 | loss: 0.00088 | val_0_mse: 0.00018 |  0:00:21s
epoch 37 | loss: 0.00089 | val_0_mse: 0.00018 |  0:00:22s
epoch 38 | loss: 0.00092 | val_0_mse: 0.00018 |  0:00:23s
epoch 39 | loss: 0.00085 | val_0_mse: 0.00017 |  0:00:23s
epoch 40 | loss: 0.00084 | val_0_mse: 0.00017 |  0:00:24s
epoch 41 | loss: 0.00082 | val_0_mse: 0.00017 |  0:00:24s
epoch 42 | loss: 0.00082 | val_0_mse: 0.00016 |  0:00:25s
epoch 43 | loss: 0.0008  | val_0_mse: 0.00017 |  0:00:26s
epoch 44 | los

epoch 169| loss: 0.00037 | val_0_mse: 0.00012 |  0:01:40s
epoch 170| loss: 0.00037 | val_0_mse: 0.00013 |  0:01:41s
epoch 171| loss: 0.00036 | val_0_mse: 0.00012 |  0:01:41s
epoch 172| loss: 0.00035 | val_0_mse: 0.00012 |  0:01:42s
epoch 173| loss: 0.00036 | val_0_mse: 0.00012 |  0:01:43s
epoch 174| loss: 0.00036 | val_0_mse: 0.00012 |  0:01:43s
epoch 175| loss: 0.00036 | val_0_mse: 0.00013 |  0:01:44s
epoch 176| loss: 0.00035 | val_0_mse: 0.00012 |  0:01:44s
epoch 177| loss: 0.00035 | val_0_mse: 0.00012 |  0:01:45s
epoch 178| loss: 0.00035 | val_0_mse: 0.00015 |  0:01:46s
epoch 179| loss: 0.00039 | val_0_mse: 0.00016 |  0:01:46s
epoch 180| loss: 0.00037 | val_0_mse: 0.00012 |  0:01:47s
epoch 181| loss: 0.00035 | val_0_mse: 0.00012 |  0:01:47s
epoch 182| loss: 0.00035 | val_0_mse: 0.00012 |  0:01:48s
epoch 183| loss: 0.00034 | val_0_mse: 0.00013 |  0:01:49s
epoch 184| loss: 0.00034 | val_0_mse: 0.00013 |  0:01:49s
epoch 185| loss: 0.00034 | val_0_mse: 0.00012 |  0:01:50s
epoch 186| los

epoch 2  | loss: 0.0964  | val_0_mse: 0.01191 |  0:00:01s
epoch 3  | loss: 0.03532 | val_0_mse: 0.00648 |  0:00:02s
epoch 4  | loss: 0.02637 | val_0_mse: 0.00621 |  0:00:02s
epoch 5  | loss: 0.00925 | val_0_mse: 0.00363 |  0:00:03s
epoch 6  | loss: 0.00652 | val_0_mse: 0.00622 |  0:00:04s
epoch 7  | loss: 0.00486 | val_0_mse: 0.00196 |  0:00:04s
epoch 8  | loss: 0.00319 | val_0_mse: 0.00057 |  0:00:05s
epoch 9  | loss: 0.00267 | val_0_mse: 0.00068 |  0:00:05s
epoch 10 | loss: 0.00232 | val_0_mse: 0.0005  |  0:00:06s
epoch 11 | loss: 0.00216 | val_0_mse: 0.00038 |  0:00:07s
epoch 12 | loss: 0.0022  | val_0_mse: 0.00024 |  0:00:07s
epoch 13 | loss: 0.00183 | val_0_mse: 0.00028 |  0:00:08s
epoch 14 | loss: 0.00159 | val_0_mse: 0.00023 |  0:00:09s
epoch 15 | loss: 0.00149 | val_0_mse: 0.00037 |  0:00:09s
epoch 16 | loss: 0.00139 | val_0_mse: 0.00029 |  0:00:10s
epoch 17 | loss: 0.0013  | val_0_mse: 0.00025 |  0:00:11s
epoch 18 | loss: 0.00119 | val_0_mse: 0.00021 |  0:00:11s
epoch 19 | los

epoch 144| loss: 0.00026 | val_0_mse: 5e-05   |  0:01:29s
epoch 145| loss: 0.00027 | val_0_mse: 5e-05   |  0:01:29s
epoch 146| loss: 0.00027 | val_0_mse: 5e-05   |  0:01:30s
epoch 147| loss: 0.00027 | val_0_mse: 5e-05   |  0:01:30s
epoch 148| loss: 0.00026 | val_0_mse: 5e-05   |  0:01:31s
epoch 149| loss: 0.00026 | val_0_mse: 5e-05   |  0:01:31s
epoch 150| loss: 0.00027 | val_0_mse: 6e-05   |  0:01:32s
epoch 151| loss: 0.00027 | val_0_mse: 5e-05   |  0:01:33s
epoch 152| loss: 0.00025 | val_0_mse: 5e-05   |  0:01:33s
epoch 153| loss: 0.00026 | val_0_mse: 4e-05   |  0:01:34s
epoch 154| loss: 0.00026 | val_0_mse: 5e-05   |  0:01:34s
epoch 155| loss: 0.00026 | val_0_mse: 5e-05   |  0:01:35s
epoch 156| loss: 0.00026 | val_0_mse: 5e-05   |  0:01:36s
epoch 157| loss: 0.00026 | val_0_mse: 6e-05   |  0:01:36s
epoch 158| loss: 0.00025 | val_0_mse: 5e-05   |  0:01:37s
epoch 159| loss: 0.00025 | val_0_mse: 4e-05   |  0:01:37s
epoch 160| loss: 0.00024 | val_0_mse: 4e-05   |  0:01:38s
epoch 161| los

epoch 81 | loss: 0.00058 | val_0_mse: 0.00012 |  0:01:01s
epoch 82 | loss: 0.00057 | val_0_mse: 0.00013 |  0:01:02s
epoch 83 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:03s
epoch 84 | loss: 0.00057 | val_0_mse: 0.00015 |  0:01:04s
epoch 85 | loss: 0.00056 | val_0_mse: 0.00012 |  0:01:04s
epoch 86 | loss: 0.00056 | val_0_mse: 0.00013 |  0:01:05s
epoch 87 | loss: 0.00055 | val_0_mse: 0.00016 |  0:01:06s
epoch 88 | loss: 0.00056 | val_0_mse: 0.00015 |  0:01:07s
epoch 89 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:07s
epoch 90 | loss: 0.00054 | val_0_mse: 0.00012 |  0:01:08s
epoch 91 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:09s
epoch 92 | loss: 0.00054 | val_0_mse: 0.00013 |  0:01:10s
epoch 93 | loss: 0.00053 | val_0_mse: 0.00014 |  0:01:11s
epoch 94 | loss: 0.00052 | val_0_mse: 0.00013 |  0:01:12s
epoch 95 | loss: 0.00052 | val_0_mse: 0.00012 |  0:01:12s
epoch 96 | loss: 0.00051 | val_0_mse: 0.00013 |  0:01:13s
epoch 97 | loss: 0.00051 | val_0_mse: 0.00013 |  0:01:14s
epoch 98 | los

epoch 118| loss: 0.00037 | val_0_mse: 0.00014 |  0:01:32s
epoch 119| loss: 0.00036 | val_0_mse: 0.00014 |  0:01:33s
epoch 120| loss: 0.00035 | val_0_mse: 0.00014 |  0:01:34s
epoch 121| loss: 0.00036 | val_0_mse: 0.00016 |  0:01:34s
epoch 122| loss: 0.00038 | val_0_mse: 0.00018 |  0:01:35s
epoch 123| loss: 0.00036 | val_0_mse: 0.00015 |  0:01:36s
epoch 124| loss: 0.00035 | val_0_mse: 0.00014 |  0:01:37s
epoch 125| loss: 0.00035 | val_0_mse: 0.00014 |  0:01:37s
epoch 126| loss: 0.00035 | val_0_mse: 0.00014 |  0:01:38s
epoch 127| loss: 0.00033 | val_0_mse: 0.00016 |  0:01:39s
epoch 128| loss: 0.00034 | val_0_mse: 0.00017 |  0:01:39s
epoch 129| loss: 0.00034 | val_0_mse: 0.00016 |  0:01:40s
epoch 130| loss: 0.00034 | val_0_mse: 0.00015 |  0:01:41s
epoch 131| loss: 0.00033 | val_0_mse: 0.00014 |  0:01:42s
epoch 132| loss: 0.00032 | val_0_mse: 0.0002  |  0:01:42s
epoch 133| loss: 0.00036 | val_0_mse: 0.00017 |  0:01:43s
epoch 134| loss: 0.00034 | val_0_mse: 0.00015 |  0:01:44s
epoch 135| los

epoch 55 | loss: 0.00074 | val_0_mse: 0.00014 |  0:00:42s
epoch 56 | loss: 0.00074 | val_0_mse: 0.00014 |  0:00:43s
epoch 57 | loss: 0.00073 | val_0_mse: 0.00014 |  0:00:44s
epoch 58 | loss: 0.00072 | val_0_mse: 0.00014 |  0:00:44s
epoch 59 | loss: 0.00071 | val_0_mse: 0.00014 |  0:00:45s
epoch 60 | loss: 0.0007  | val_0_mse: 0.00014 |  0:00:46s
epoch 61 | loss: 0.0007  | val_0_mse: 0.00013 |  0:00:47s
epoch 62 | loss: 0.0007  | val_0_mse: 0.00013 |  0:00:47s
epoch 63 | loss: 0.00069 | val_0_mse: 0.00013 |  0:00:48s
epoch 64 | loss: 0.00068 | val_0_mse: 0.00014 |  0:00:49s
epoch 65 | loss: 0.00068 | val_0_mse: 0.00014 |  0:00:50s
epoch 66 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:51s
epoch 67 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:51s
epoch 68 | loss: 0.00066 | val_0_mse: 0.00014 |  0:00:52s
epoch 69 | loss: 0.00066 | val_0_mse: 0.00014 |  0:00:53s
epoch 70 | loss: 0.00065 | val_0_mse: 0.00013 |  0:00:54s
epoch 71 | loss: 0.00064 | val_0_mse: 0.00014 |  0:00:55s
epoch 72 | los

epoch 92 | loss: 0.0006  | val_0_mse: 0.00014 |  0:01:11s
epoch 93 | loss: 0.00059 | val_0_mse: 0.00014 |  0:01:12s
epoch 94 | loss: 0.00059 | val_0_mse: 0.00014 |  0:01:12s
epoch 95 | loss: 0.00058 | val_0_mse: 0.00014 |  0:01:13s
epoch 96 | loss: 0.00058 | val_0_mse: 0.00014 |  0:01:14s
epoch 97 | loss: 0.00057 | val_0_mse: 0.00014 |  0:01:14s
epoch 98 | loss: 0.00057 | val_0_mse: 0.00014 |  0:01:15s
epoch 99 | loss: 0.00057 | val_0_mse: 0.00014 |  0:01:16s
epoch 100| loss: 0.00057 | val_0_mse: 0.00014 |  0:01:17s
epoch 101| loss: 0.00057 | val_0_mse: 0.00014 |  0:01:17s
epoch 102| loss: 0.00056 | val_0_mse: 0.00014 |  0:01:18s
epoch 103| loss: 0.00057 | val_0_mse: 0.00014 |  0:01:19s
epoch 104| loss: 0.00056 | val_0_mse: 0.00014 |  0:01:19s
epoch 105| loss: 0.00056 | val_0_mse: 0.00014 |  0:01:20s
epoch 106| loss: 0.00055 | val_0_mse: 0.00014 |  0:01:21s
epoch 107| loss: 0.00055 | val_0_mse: 0.00014 |  0:01:22s
epoch 108| loss: 0.00054 | val_0_mse: 0.00014 |  0:01:22s
epoch 109| los

epoch 29 | loss: 0.00082 | val_0_mse: 0.00022 |  0:00:23s
epoch 30 | loss: 0.0008  | val_0_mse: 0.00021 |  0:00:23s
epoch 31 | loss: 0.00078 | val_0_mse: 0.00021 |  0:00:24s
epoch 32 | loss: 0.00079 | val_0_mse: 0.00022 |  0:00:25s
epoch 33 | loss: 0.00078 | val_0_mse: 0.00021 |  0:00:26s
epoch 34 | loss: 0.00076 | val_0_mse: 0.00021 |  0:00:27s
epoch 35 | loss: 0.00074 | val_0_mse: 0.00022 |  0:00:27s
epoch 36 | loss: 0.00074 | val_0_mse: 0.00021 |  0:00:28s
epoch 37 | loss: 0.00073 | val_0_mse: 0.00021 |  0:00:29s
epoch 38 | loss: 0.00071 | val_0_mse: 0.00021 |  0:00:30s
epoch 39 | loss: 0.00071 | val_0_mse: 0.00021 |  0:00:30s
epoch 40 | loss: 0.0007  | val_0_mse: 0.00021 |  0:00:31s
epoch 41 | loss: 0.00069 | val_0_mse: 0.00021 |  0:00:32s
epoch 42 | loss: 0.00069 | val_0_mse: 0.00021 |  0:00:33s
epoch 43 | loss: 0.00069 | val_0_mse: 0.00021 |  0:00:33s
epoch 44 | loss: 0.00067 | val_0_mse: 0.00021 |  0:00:34s
epoch 45 | loss: 0.00066 | val_0_mse: 0.00021 |  0:00:35s
epoch 46 | los

epoch 66 | loss: 0.00069 | val_0_mse: 0.00017 |  0:00:52s
epoch 67 | loss: 0.0007  | val_0_mse: 0.00016 |  0:00:52s
epoch 68 | loss: 0.00068 | val_0_mse: 0.00016 |  0:00:53s
epoch 69 | loss: 0.00066 | val_0_mse: 0.00017 |  0:00:54s
epoch 70 | loss: 0.00066 | val_0_mse: 0.00015 |  0:00:55s
epoch 71 | loss: 0.00066 | val_0_mse: 0.00015 |  0:00:56s
epoch 72 | loss: 0.00065 | val_0_mse: 0.00015 |  0:00:56s
epoch 73 | loss: 0.00063 | val_0_mse: 0.00015 |  0:00:57s
epoch 74 | loss: 0.00065 | val_0_mse: 0.00015 |  0:00:58s
epoch 75 | loss: 0.00063 | val_0_mse: 0.00014 |  0:00:59s
epoch 76 | loss: 0.00068 | val_0_mse: 0.00015 |  0:00:59s
epoch 77 | loss: 0.00066 | val_0_mse: 0.00015 |  0:01:00s
epoch 78 | loss: 0.00064 | val_0_mse: 0.00016 |  0:01:01s
epoch 79 | loss: 0.00061 | val_0_mse: 0.00017 |  0:01:01s
epoch 80 | loss: 0.0006  | val_0_mse: 0.00015 |  0:01:02s
epoch 81 | loss: 0.00062 | val_0_mse: 0.00016 |  0:01:03s
epoch 82 | loss: 0.00063 | val_0_mse: 0.00015 |  0:01:04s
epoch 83 | los

epoch 3  | loss: 0.01696 | val_0_mse: 0.00086 |  0:00:03s
epoch 4  | loss: 0.00794 | val_0_mse: 0.00027 |  0:00:03s
epoch 5  | loss: 0.00445 | val_0_mse: 0.00027 |  0:00:04s
epoch 6  | loss: 0.00292 | val_0_mse: 0.00018 |  0:00:05s
epoch 7  | loss: 0.00217 | val_0_mse: 0.00019 |  0:00:06s
epoch 8  | loss: 0.00201 | val_0_mse: 0.00018 |  0:00:07s
epoch 9  | loss: 0.00161 | val_0_mse: 0.00019 |  0:00:08s
epoch 10 | loss: 0.00141 | val_0_mse: 0.00019 |  0:00:08s
epoch 11 | loss: 0.00158 | val_0_mse: 0.00019 |  0:00:09s
epoch 12 | loss: 0.00138 | val_0_mse: 0.0002  |  0:00:10s
epoch 13 | loss: 0.00134 | val_0_mse: 0.0002  |  0:00:11s
epoch 14 | loss: 0.00128 | val_0_mse: 0.00021 |  0:00:12s
epoch 15 | loss: 0.00116 | val_0_mse: 0.0002  |  0:00:13s
epoch 16 | loss: 0.00116 | val_0_mse: 0.0002  |  0:00:14s
epoch 17 | loss: 0.0011  | val_0_mse: 0.0002  |  0:00:14s
epoch 18 | loss: 0.0011  | val_0_mse: 0.0002  |  0:00:15s
epoch 19 | loss: 0.00109 | val_0_mse: 0.0002  |  0:00:16s
epoch 20 | los

epoch 40 | loss: 0.00088 | val_0_mse: 0.00015 |  0:00:31s
epoch 41 | loss: 0.00082 | val_0_mse: 0.00014 |  0:00:32s
epoch 42 | loss: 0.0008  | val_0_mse: 0.00014 |  0:00:33s
epoch 43 | loss: 0.00079 | val_0_mse: 0.00014 |  0:00:33s
epoch 44 | loss: 0.00077 | val_0_mse: 0.00014 |  0:00:34s
epoch 45 | loss: 0.00076 | val_0_mse: 0.00014 |  0:00:35s
epoch 46 | loss: 0.00074 | val_0_mse: 0.00014 |  0:00:35s
epoch 47 | loss: 0.00077 | val_0_mse: 0.00015 |  0:00:36s
epoch 48 | loss: 0.00073 | val_0_mse: 0.00014 |  0:00:37s
epoch 49 | loss: 0.00072 | val_0_mse: 0.00015 |  0:00:38s
epoch 50 | loss: 0.00071 | val_0_mse: 0.00014 |  0:00:39s
epoch 51 | loss: 0.00071 | val_0_mse: 0.00014 |  0:00:39s
epoch 52 | loss: 0.0007  | val_0_mse: 0.00014 |  0:00:40s
epoch 53 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:41s
epoch 54 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:42s
epoch 55 | loss: 0.00067 | val_0_mse: 0.00014 |  0:00:43s
epoch 56 | loss: 0.00065 | val_0_mse: 0.00014 |  0:00:43s
epoch 57 | los

epoch 182| loss: 0.0003  | val_0_mse: 0.00014 |  0:02:19s
epoch 183| loss: 0.00029 | val_0_mse: 0.00014 |  0:02:20s
epoch 184| loss: 0.0003  | val_0_mse: 0.00013 |  0:02:21s
epoch 185| loss: 0.00029 | val_0_mse: 0.00014 |  0:02:22s
epoch 186| loss: 0.00029 | val_0_mse: 0.00014 |  0:02:23s
epoch 187| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:23s
epoch 188| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:24s
epoch 189| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:25s
epoch 190| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:26s
epoch 191| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:26s
epoch 192| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:27s
epoch 193| loss: 0.00028 | val_0_mse: 0.00014 |  0:02:28s
epoch 194| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:29s
epoch 195| loss: 0.00028 | val_0_mse: 0.00012 |  0:02:29s
epoch 196| loss: 0.00029 | val_0_mse: 0.00013 |  0:02:30s
epoch 197| loss: 0.00028 | val_0_mse: 0.00012 |  0:02:31s
epoch 198| loss: 0.00028 | val_0_mse: 0.00012 |  0:02:32s
epoch 199| los

epoch 15 | loss: 0.00113 | val_0_mse: 0.0002  |  0:00:11s
epoch 16 | loss: 0.00122 | val_0_mse: 0.00021 |  0:00:12s
epoch 17 | loss: 0.0012  | val_0_mse: 0.00022 |  0:00:13s
epoch 18 | loss: 0.00103 | val_0_mse: 0.0002  |  0:00:14s
epoch 19 | loss: 0.00114 | val_0_mse: 0.00022 |  0:00:14s
epoch 20 | loss: 0.00107 | val_0_mse: 0.0002  |  0:00:15s
epoch 21 | loss: 0.00098 | val_0_mse: 0.00021 |  0:00:16s
epoch 22 | loss: 0.00099 | val_0_mse: 0.0002  |  0:00:17s
epoch 23 | loss: 0.00094 | val_0_mse: 0.00021 |  0:00:17s
epoch 24 | loss: 0.00095 | val_0_mse: 0.0002  |  0:00:18s
epoch 25 | loss: 0.00092 | val_0_mse: 0.0002  |  0:00:19s
epoch 26 | loss: 0.0009  | val_0_mse: 0.00021 |  0:00:20s
epoch 27 | loss: 0.00089 | val_0_mse: 0.0002  |  0:00:20s
epoch 28 | loss: 0.00088 | val_0_mse: 0.00021 |  0:00:21s
epoch 29 | loss: 0.00087 | val_0_mse: 0.0002  |  0:00:22s
epoch 30 | loss: 0.00089 | val_0_mse: 0.0002  |  0:00:23s
epoch 31 | loss: 0.00087 | val_0_mse: 0.0002  |  0:00:23s
epoch 32 | los

epoch 157| loss: 0.00028 | val_0_mse: 5e-05   |  0:02:04s
epoch 158| loss: 0.00027 | val_0_mse: 8e-05   |  0:02:05s
epoch 159| loss: 0.00029 | val_0_mse: 6e-05   |  0:02:06s
epoch 160| loss: 0.00027 | val_0_mse: 5e-05   |  0:02:07s
epoch 161| loss: 0.00026 | val_0_mse: 5e-05   |  0:02:08s
epoch 162| loss: 0.00026 | val_0_mse: 5e-05   |  0:02:09s
epoch 163| loss: 0.00026 | val_0_mse: 5e-05   |  0:02:10s
epoch 164| loss: 0.00026 | val_0_mse: 6e-05   |  0:02:11s
epoch 165| loss: 0.00025 | val_0_mse: 5e-05   |  0:02:12s
epoch 166| loss: 0.00027 | val_0_mse: 5e-05   |  0:02:12s
epoch 167| loss: 0.00026 | val_0_mse: 5e-05   |  0:02:14s
epoch 168| loss: 0.00025 | val_0_mse: 6e-05   |  0:02:15s
epoch 169| loss: 0.00025 | val_0_mse: 5e-05   |  0:02:16s
epoch 170| loss: 0.00024 | val_0_mse: 5e-05   |  0:02:17s
epoch 171| loss: 0.00025 | val_0_mse: 6e-05   |  0:02:18s
epoch 172| loss: 0.00024 | val_0_mse: 5e-05   |  0:02:19s
epoch 173| loss: 0.00024 | val_0_mse: 5e-05   |  0:02:20s
epoch 174| los

epoch 94 | loss: 0.0005  | val_0_mse: 0.00012 |  0:01:40s
epoch 95 | loss: 0.00049 | val_0_mse: 0.00013 |  0:01:41s
epoch 96 | loss: 0.00048 | val_0_mse: 0.00012 |  0:01:42s
epoch 97 | loss: 0.00048 | val_0_mse: 0.00013 |  0:01:43s
epoch 98 | loss: 0.00048 | val_0_mse: 0.00013 |  0:01:44s
epoch 99 | loss: 0.00048 | val_0_mse: 0.00013 |  0:01:45s
Stop training because you reached max_epochs = 100 with best_epoch = 87 and best_val_0_mse = 0.00012
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sentiment_all_news_4hours/roberta-high-5.zip
Device used : cuda
epoch 0  | loss: 0.47468 | val_0_mse: 0.02161 |  0:00:01s
epoch 1  | loss: 0.12551 | val_0_mse: 0.11768 |  0:00:02s
epoch 2  | loss: 0.05818 | val_0_mse: 0.00757 |  0:00:03s
epoch 3  | loss: 0.02089 | val_0_mse: 0.00337 |  0:00:04s
epoch 4  | loss: 0.00754 | val_0_mse: 0.00118 |  0:00:04s
epoch 5  | loss: 0.0046  | val_0_mse: 0.00041 |  0:00:06s
epoch 6  | loss: 0.00336 | val_0_mse: 0.00032 |  0:0

epoch 131| loss: 0.00046 | val_0_mse: 0.00013 |  0:02:12s
epoch 132| loss: 0.00045 | val_0_mse: 0.00013 |  0:02:13s
epoch 133| loss: 0.00045 | val_0_mse: 0.00013 |  0:02:14s
epoch 134| loss: 0.00045 | val_0_mse: 0.00013 |  0:02:15s
epoch 135| loss: 0.00046 | val_0_mse: 0.00013 |  0:02:16s
epoch 136| loss: 0.00044 | val_0_mse: 0.00013 |  0:02:17s
epoch 137| loss: 0.00044 | val_0_mse: 0.00012 |  0:02:17s
epoch 138| loss: 0.00044 | val_0_mse: 0.00014 |  0:02:18s
epoch 139| loss: 0.00045 | val_0_mse: 0.00014 |  0:02:19s
epoch 140| loss: 0.00044 | val_0_mse: 0.00013 |  0:02:20s
epoch 141| loss: 0.00043 | val_0_mse: 0.00013 |  0:02:21s
epoch 142| loss: 0.00042 | val_0_mse: 0.00013 |  0:02:22s
epoch 143| loss: 0.00043 | val_0_mse: 0.00013 |  0:02:23s
epoch 144| loss: 0.00042 | val_0_mse: 0.00013 |  0:02:24s
epoch 145| loss: 0.00042 | val_0_mse: 0.00014 |  0:02:25s
epoch 146| loss: 0.00043 | val_0_mse: 0.00015 |  0:02:26s
epoch 147| loss: 0.00042 | val_0_mse: 0.00014 |  0:02:27s
epoch 148| los

epoch 68 | loss: 0.00061 | val_0_mse: 0.00013 |  0:01:07s
epoch 69 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:08s
epoch 70 | loss: 0.00058 | val_0_mse: 0.00013 |  0:01:09s
epoch 71 | loss: 0.00057 | val_0_mse: 0.00013 |  0:01:09s
epoch 72 | loss: 0.00056 | val_0_mse: 0.00014 |  0:01:10s
epoch 73 | loss: 0.00056 | val_0_mse: 0.00014 |  0:01:11s
epoch 74 | loss: 0.00056 | val_0_mse: 0.00013 |  0:01:13s
epoch 75 | loss: 0.00055 | val_0_mse: 0.00013 |  0:01:14s
epoch 76 | loss: 0.00055 | val_0_mse: 0.00014 |  0:01:15s
epoch 77 | loss: 0.00055 | val_0_mse: 0.00013 |  0:01:16s
epoch 78 | loss: 0.00053 | val_0_mse: 0.00014 |  0:01:17s
epoch 79 | loss: 0.00054 | val_0_mse: 0.00014 |  0:01:17s
epoch 80 | loss: 0.00053 | val_0_mse: 0.00014 |  0:01:18s
epoch 81 | loss: 0.00054 | val_0_mse: 0.00016 |  0:01:19s
epoch 82 | loss: 0.00054 | val_0_mse: 0.00015 |  0:01:20s
epoch 83 | loss: 0.00053 | val_0_mse: 0.00013 |  0:01:21s
epoch 84 | loss: 0.00051 | val_0_mse: 0.00014 |  0:01:22s
epoch 85 | los

epoch 105| loss: 0.00039 | val_0_mse: 0.00014 |  0:01:49s
epoch 106| loss: 0.00039 | val_0_mse: 0.00014 |  0:01:50s
epoch 107| loss: 0.00041 | val_0_mse: 0.00014 |  0:01:51s
epoch 108| loss: 0.00042 | val_0_mse: 0.00013 |  0:01:52s
epoch 109| loss: 0.00042 | val_0_mse: 0.00014 |  0:01:53s
epoch 110| loss: 0.00041 | val_0_mse: 0.00014 |  0:01:54s
epoch 111| loss: 0.00041 | val_0_mse: 0.00014 |  0:01:56s
epoch 112| loss: 0.0004  | val_0_mse: 0.00013 |  0:01:57s
epoch 113| loss: 0.00041 | val_0_mse: 0.00014 |  0:01:58s
epoch 114| loss: 0.0004  | val_0_mse: 0.00013 |  0:01:59s
epoch 115| loss: 0.00039 | val_0_mse: 0.00014 |  0:02:00s
epoch 116| loss: 0.00039 | val_0_mse: 0.00013 |  0:02:01s
epoch 117| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:02s
epoch 118| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:03s
epoch 119| loss: 0.00038 | val_0_mse: 0.00014 |  0:02:04s
epoch 120| loss: 0.00038 | val_0_mse: 0.00013 |  0:02:05s
epoch 121| loss: 0.00038 | val_0_mse: 0.00013 |  0:02:06s
epoch 122| los

epoch 42 | loss: 0.0008  | val_0_mse: 0.00014 |  0:00:45s
epoch 43 | loss: 0.0008  | val_0_mse: 0.00014 |  0:00:46s
epoch 44 | loss: 0.00078 | val_0_mse: 0.00014 |  0:00:47s
epoch 45 | loss: 0.00078 | val_0_mse: 0.00014 |  0:00:48s
epoch 46 | loss: 0.00078 | val_0_mse: 0.00013 |  0:00:49s
epoch 47 | loss: 0.00076 | val_0_mse: 0.00013 |  0:00:50s
epoch 48 | loss: 0.00076 | val_0_mse: 0.00013 |  0:00:51s
epoch 49 | loss: 0.00075 | val_0_mse: 0.00014 |  0:00:52s
epoch 50 | loss: 0.00074 | val_0_mse: 0.00013 |  0:00:53s
epoch 51 | loss: 0.00073 | val_0_mse: 0.00013 |  0:00:54s
epoch 52 | loss: 0.00073 | val_0_mse: 0.00013 |  0:00:55s
epoch 53 | loss: 0.00072 | val_0_mse: 0.00014 |  0:00:56s
epoch 54 | loss: 0.00071 | val_0_mse: 0.00013 |  0:00:57s
epoch 55 | loss: 0.0007  | val_0_mse: 0.00013 |  0:00:58s
epoch 56 | loss: 0.0007  | val_0_mse: 0.00014 |  0:00:59s
epoch 57 | loss: 0.00069 | val_0_mse: 0.00014 |  0:01:00s
epoch 58 | loss: 0.00069 | val_0_mse: 0.00014 |  0:01:01s
epoch 59 | los

epoch 79 | loss: 0.00046 | val_0_mse: 0.00014 |  0:01:22s
epoch 80 | loss: 0.00046 | val_0_mse: 0.00014 |  0:01:23s
epoch 81 | loss: 0.00045 | val_0_mse: 0.00014 |  0:01:24s
epoch 82 | loss: 0.00044 | val_0_mse: 0.00014 |  0:01:25s
epoch 83 | loss: 0.00043 | val_0_mse: 0.00014 |  0:01:26s
epoch 84 | loss: 0.00044 | val_0_mse: 0.00014 |  0:01:27s
epoch 85 | loss: 0.00043 | val_0_mse: 0.00014 |  0:01:28s
epoch 86 | loss: 0.00042 | val_0_mse: 0.00014 |  0:01:29s
epoch 87 | loss: 0.00042 | val_0_mse: 0.00014 |  0:01:30s
epoch 88 | loss: 0.00042 | val_0_mse: 0.00014 |  0:01:31s
epoch 89 | loss: 0.00042 | val_0_mse: 0.00014 |  0:01:32s
epoch 90 | loss: 0.00041 | val_0_mse: 0.00014 |  0:01:33s
epoch 91 | loss: 0.0004  | val_0_mse: 0.00014 |  0:01:34s
epoch 92 | loss: 0.0004  | val_0_mse: 0.00014 |  0:01:35s
epoch 93 | loss: 0.00041 | val_0_mse: 0.00014 |  0:01:37s
epoch 94 | loss: 0.0004  | val_0_mse: 0.00014 |  0:01:38s
epoch 95 | loss: 0.00039 | val_0_mse: 0.00014 |  0:01:39s
epoch 96 | los

epoch 16 | loss: 0.00126 | val_0_mse: 0.00019 |  0:00:17s
epoch 17 | loss: 0.00123 | val_0_mse: 0.00017 |  0:00:18s
epoch 18 | loss: 0.00119 | val_0_mse: 0.00016 |  0:00:19s
epoch 19 | loss: 0.00115 | val_0_mse: 0.00016 |  0:00:20s
epoch 20 | loss: 0.00111 | val_0_mse: 0.00015 |  0:00:21s
epoch 21 | loss: 0.00108 | val_0_mse: 0.00015 |  0:00:22s
epoch 22 | loss: 0.00103 | val_0_mse: 0.00015 |  0:00:22s
epoch 23 | loss: 0.00103 | val_0_mse: 0.00015 |  0:00:23s
epoch 24 | loss: 0.001   | val_0_mse: 0.00015 |  0:00:25s
epoch 25 | loss: 0.00101 | val_0_mse: 0.00015 |  0:00:26s
epoch 26 | loss: 0.00099 | val_0_mse: 0.00014 |  0:00:27s
epoch 27 | loss: 0.00097 | val_0_mse: 0.00015 |  0:00:28s
epoch 28 | loss: 0.00095 | val_0_mse: 0.00014 |  0:00:29s
epoch 29 | loss: 0.00093 | val_0_mse: 0.00014 |  0:00:30s
epoch 30 | loss: 0.00092 | val_0_mse: 0.00015 |  0:00:31s
epoch 31 | loss: 0.00089 | val_0_mse: 0.00014 |  0:00:32s
epoch 32 | loss: 0.00088 | val_0_mse: 0.00015 |  0:00:33s
epoch 33 | los

epoch 53 | loss: 0.00089 | val_0_mse: 0.00018 |  0:00:50s
epoch 54 | loss: 0.00085 | val_0_mse: 0.00015 |  0:00:51s
epoch 55 | loss: 0.00085 | val_0_mse: 0.00015 |  0:00:52s
epoch 56 | loss: 0.00083 | val_0_mse: 0.00015 |  0:00:53s
epoch 57 | loss: 0.00082 | val_0_mse: 0.00016 |  0:00:54s
epoch 58 | loss: 0.00082 | val_0_mse: 0.00014 |  0:00:55s
epoch 59 | loss: 0.00082 | val_0_mse: 0.00014 |  0:00:56s
epoch 60 | loss: 0.00081 | val_0_mse: 0.00016 |  0:00:57s
epoch 61 | loss: 0.0008  | val_0_mse: 0.00016 |  0:00:58s
epoch 62 | loss: 0.0008  | val_0_mse: 0.00015 |  0:00:58s
epoch 63 | loss: 0.00079 | val_0_mse: 0.00015 |  0:00:59s
epoch 64 | loss: 0.00078 | val_0_mse: 0.00014 |  0:01:00s
epoch 65 | loss: 0.00077 | val_0_mse: 0.00015 |  0:01:01s
epoch 66 | loss: 0.00077 | val_0_mse: 0.00014 |  0:01:02s
epoch 67 | loss: 0.00075 | val_0_mse: 0.00014 |  0:01:02s
epoch 68 | loss: 0.00075 | val_0_mse: 0.00014 |  0:01:03s
epoch 69 | loss: 0.00073 | val_0_mse: 0.00014 |  0:01:04s
epoch 70 | los

epoch 195| loss: 0.00036 | val_0_mse: 0.0001  |  0:02:52s
epoch 196| loss: 0.00035 | val_0_mse: 0.0001  |  0:02:53s
epoch 197| loss: 0.00035 | val_0_mse: 0.0001  |  0:02:53s
epoch 198| loss: 0.00035 | val_0_mse: 0.0001  |  0:02:55s
epoch 199| loss: 0.00036 | val_0_mse: 0.0001  |  0:02:55s
Stop training because you reached max_epochs = 200 with best_epoch = 199 and best_val_0_mse = 0.0001
Best weights from best epoch are automatically used!
Successfully saved model at ckpts/sentiment_all_news_4hours/roberta-low-8.zip
Device used : cuda
epoch 0  | loss: 1.84678 | val_0_mse: 0.14412 |  0:00:01s
epoch 1  | loss: 0.22576 | val_0_mse: 0.0271  |  0:00:01s
epoch 2  | loss: 0.06761 | val_0_mse: 0.00218 |  0:00:02s
epoch 3  | loss: 0.02357 | val_0_mse: 0.00185 |  0:00:03s
epoch 4  | loss: 0.0098  | val_0_mse: 0.00047 |  0:00:04s
epoch 5  | loss: 0.00656 | val_0_mse: 0.00023 |  0:00:05s
epoch 6  | loss: 0.00377 | val_0_mse: 0.00022 |  0:00:06s
epoch 7  | loss: 0.00291 | val_0_mse: 0.00021 |  0:00

epoch 28 | loss: 0.00092 | val_0_mse: 0.00018 |  0:00:25s
epoch 29 | loss: 0.00094 | val_0_mse: 0.00016 |  0:00:26s
epoch 30 | loss: 0.0009  | val_0_mse: 0.00017 |  0:00:26s
epoch 31 | loss: 0.00089 | val_0_mse: 0.00017 |  0:00:27s
epoch 32 | loss: 0.00086 | val_0_mse: 0.00016 |  0:00:28s
epoch 33 | loss: 0.00086 | val_0_mse: 0.00016 |  0:00:29s
epoch 34 | loss: 0.00085 | val_0_mse: 0.00017 |  0:00:30s
epoch 35 | loss: 0.00083 | val_0_mse: 0.00016 |  0:00:31s
epoch 36 | loss: 0.00082 | val_0_mse: 0.00016 |  0:00:32s
epoch 37 | loss: 0.00083 | val_0_mse: 0.00016 |  0:00:33s
epoch 38 | loss: 0.00082 | val_0_mse: 0.00015 |  0:00:33s
epoch 39 | loss: 0.00081 | val_0_mse: 0.00016 |  0:00:34s
epoch 40 | loss: 0.00082 | val_0_mse: 0.00015 |  0:00:35s
epoch 41 | loss: 0.00081 | val_0_mse: 0.00014 |  0:00:36s
epoch 42 | loss: 0.00079 | val_0_mse: 0.00016 |  0:00:37s
epoch 43 | loss: 0.00078 | val_0_mse: 0.00015 |  0:00:38s
epoch 44 | loss: 0.00078 | val_0_mse: 0.00015 |  0:00:39s
epoch 45 | los

epoch 170| loss: 0.00037 | val_0_mse: 0.00014 |  0:02:28s
epoch 171| loss: 0.00037 | val_0_mse: 0.00013 |  0:02:29s
epoch 172| loss: 0.00037 | val_0_mse: 0.00014 |  0:02:30s
epoch 173| loss: 0.00037 | val_0_mse: 0.00013 |  0:02:31s
epoch 174| loss: 0.00037 | val_0_mse: 0.00014 |  0:02:31s
epoch 175| loss: 0.00036 | val_0_mse: 0.00014 |  0:02:32s
epoch 176| loss: 0.00036 | val_0_mse: 0.00013 |  0:02:33s
epoch 177| loss: 0.00036 | val_0_mse: 0.00013 |  0:02:34s
epoch 178| loss: 0.00035 | val_0_mse: 0.00013 |  0:02:35s
epoch 179| loss: 0.00036 | val_0_mse: 0.00012 |  0:02:36s
epoch 180| loss: 0.00035 | val_0_mse: 0.00014 |  0:02:37s
epoch 181| loss: 0.00035 | val_0_mse: 0.00013 |  0:02:38s
epoch 182| loss: 0.00035 | val_0_mse: 0.00013 |  0:02:38s
epoch 183| loss: 0.00034 | val_0_mse: 0.00014 |  0:02:39s
epoch 184| loss: 0.00035 | val_0_mse: 0.00013 |  0:02:40s
epoch 185| loss: 0.00035 | val_0_mse: 0.00014 |  0:02:41s
epoch 186| loss: 0.00034 | val_0_mse: 0.00013 |  0:02:42s
epoch 187| los

epoch 3  | loss: 0.00519 | val_0_mse: 0.00046 |  0:00:03s
epoch 4  | loss: 0.00296 | val_0_mse: 0.00029 |  0:00:04s
epoch 5  | loss: 0.00199 | val_0_mse: 0.00027 |  0:00:05s
epoch 6  | loss: 0.00153 | val_0_mse: 0.00022 |  0:00:06s
epoch 7  | loss: 0.00139 | val_0_mse: 0.0002  |  0:00:07s
epoch 8  | loss: 0.00123 | val_0_mse: 0.00018 |  0:00:08s
epoch 9  | loss: 0.00117 | val_0_mse: 0.00018 |  0:00:09s
epoch 10 | loss: 0.00113 | val_0_mse: 0.00019 |  0:00:10s
epoch 11 | loss: 0.00111 | val_0_mse: 0.00019 |  0:00:10s
epoch 12 | loss: 0.00108 | val_0_mse: 0.0002  |  0:00:11s
epoch 13 | loss: 0.00107 | val_0_mse: 0.00018 |  0:00:12s
epoch 14 | loss: 0.00106 | val_0_mse: 0.00018 |  0:00:13s
epoch 15 | loss: 0.00102 | val_0_mse: 0.00021 |  0:00:15s
epoch 16 | loss: 0.00102 | val_0_mse: 0.00019 |  0:00:16s
epoch 17 | loss: 0.00099 | val_0_mse: 0.00018 |  0:00:16s
epoch 18 | loss: 0.001   | val_0_mse: 0.00018 |  0:00:17s
epoch 19 | loss: 0.00099 | val_0_mse: 0.00019 |  0:00:18s
epoch 20 | los

epoch 145| loss: 0.00036 | val_0_mse: 0.0001  |  0:02:15s
epoch 146| loss: 0.00035 | val_0_mse: 8e-05   |  0:02:16s
epoch 147| loss: 0.00034 | val_0_mse: 9e-05   |  0:02:17s
epoch 148| loss: 0.00035 | val_0_mse: 0.0001  |  0:02:18s
epoch 149| loss: 0.00034 | val_0_mse: 8e-05   |  0:02:19s


In [None]:
if not os.path.exists(checkpoint_save_dir):
    os.makedirs(checkpoint_save_dir, exist_ok=True)
with open(saveas, 'w') as f:
    for key in results.keys():
        f.write("%s, %s\n" % (key, results[key]))