In [47]:
import os
import pandas as pd
import numpy as np
import gc

In [48]:
os.chdir("C:/Users/shaur/Downloads/commonlit")
os.getcwd()

'C:\\Users\\shaur\\Downloads\\commonlit'

In [49]:
prompts_train = pd.read_csv("prompts_train.csv")
prompts_test = pd.read_csv("prompts_test.csv")

summaries_train = pd.read_csv("summaries_train.csv")
summaries_test = pd.read_csv("summaries_test.csv")

sample_submission = pd.read_csv("sample_submission.csv")

prompts_train

Unnamed: 0,prompt_id,prompt_question,prompt_title,prompt_text
0,39c16e,Summarize at least 3 elements of an ideal trag...,On Tragedy,Chapter 13 \r\nAs the sequel to what has alrea...
1,3b9047,"In complete sentences, summarize the structure...",Egyptian Social Structure,Egyptian society was structured like a pyramid...
2,814d6b,Summarize how the Third Wave developed over su...,The Third Wave,Background \r\nThe Third Wave experiment took ...
3,ebad26,Summarize the various ways the factory would u...,Excerpt from The Jungle,"With one member trimming beef in a cannery, an..."


In [50]:
train = summaries_train.merge(prompts_train, how="left", on="prompt_id")
test = summaries_test.merge(prompts_test, how="left", on="prompt_id")

In [51]:
import warnings
import logging
import shutil
import json
import transformers
from transformers import AutoModel, AutoTokenizer, AutoConfig, AutoModelForSequenceClassification
from transformers import DataCollatorWithPadding, TrainingArguments, Trainer
from datasets import Dataset, load_dataset, load_from_disk, load_metric, disable_progress_bar
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GroupShuffleSplit, GroupKFold
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [52]:
warnings.simplefilter("ignore")
logging.disable(logging.ERROR)
disable_progress_bar()

In [53]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    rmse = mean_squared_error(labels, predictions, squared=False)
    return {"rmse": rmse}

def compute_mcrmse(eval_pred):
    """
    Calculates mean columnwise root mean squared error
    https://www.kaggle.com/competitions/commonlit-evaluate-student-summaries/overview/evaluation
    """
    preds, labels = eval_pred

    col_rmse = np.sqrt(np.mean((preds - labels) ** 2, axis=0))
    mcrmse = np.mean(col_rmse)

    return {
        "content_rmse": col_rmse[0],
        "wording_rmse": col_rmse[1],
        "mcrmse": mcrmse,
    }

In [54]:
def compt_score(content_true, content_pred, wording_true, wording_pred):
    content_score = mean_squared_error(content_true, content_pred)**(1/2)
    wording_score = mean_squared_error(wording_true, wording_pred)**(1/2)

    return (content_score + wording_score)/2

def seed_everything(seed: int):
    import random, os
    import numpy as np
    import torch

    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True


seed_everything(seed=42)

In [55]:
splitter = GroupShuffleSplit(test_size=.2, n_splits=4, random_state=42)
split = splitter.split(train, groups=train['prompt_id'])
train_ind, val_ind = next(split)

train_split = train.iloc[train_ind]
val_split = train.iloc[val_ind]

In [56]:
train_split.prompt_id.value_counts()

prompt_id
39c16e    2057
ebad26    1996
814d6b    1103
Name: count, dtype: int64

In [57]:
val_split.prompt_id.value_counts()

prompt_id
3b9047    2009
Name: count, dtype: int64

### Function for train and infer

In [58]:
def train_n_infer(train,
                  val,
                  test_content,
                  model_name,
                  num_layers_to_freeze,
                  learning_rate,
                  per_device_train_batch_size,
                  per_device_eval_batch_size,
                  num_train_epochs,
                  ss,
                  hidden_dropout_prob,
                  attention_probs_dropout_prob,
                  fold):
    
    train_content = train[["text", "content", "wording"]]
    val_content = val[["text", "content", "wording"]]
    test_ = test_content[["text"]]

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model_config = AutoConfig.from_pretrained(model_name)
    model_config.update({
        "hidden_dropout_prob": hidden_dropout_prob,
        "attention_probs_dropout_prob": attention_probs_dropout_prob,
        "num_labels": 2,
        "problem_type": "regression",
    })
    seed_everything(seed=42)
    model = AutoModelForSequenceClassification.from_pretrained(model_name, config=model_config).to(device)
    

    layer_count=0
    for name, param in model.named_parameters():
        if layer_count<num_layers_to_freeze:
            param.requires_grad=False
            layer_count+=1
        else: break
    
    train_dataset_content = Dataset.from_pandas(train_content, preserve_index=False)
    val_dataset_content = Dataset.from_pandas(val_content, preserve_index=False)
    test_dataset = Dataset.from_pandas(test_, preserve_index=False)

    def tokenize_function(examples):
        labels = [examples["content"], examples["wording"]]
        tokenized = tokenizer(examples["text"],
                            padding=False,
                            truncation=True,
                            )
        return {
            **tokenized,
            "labels": labels,
            }

    def tokenize_function_test(examples):
        tokenized = tokenizer(examples["text"],
                            padding=False,
                            truncation=True,
                            )
        return tokenized

    train_tokenized_datasets_content = train_dataset_content.map(tokenize_function, batched=False)
    val_tokenized_datasets_content = val_dataset_content.map(tokenize_function, batched=False)
    test_tokenized_dataset = test_dataset.map(tokenize_function_test, batched=False)

    model_dir = f"./Results/{model_name}_results/fold {fold}"
    os.makedirs(model_dir, exist_ok=True)

    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    training_args = TrainingArguments(
        output_dir = model_dir,
        load_best_model_at_end = True,
        learning_rate = learning_rate,
        per_device_train_batch_size=per_device_train_batch_size,
        per_device_eval_batch_size=per_device_eval_batch_size,
        num_train_epochs=num_train_epochs,
        save_strategy="steps",
        evaluation_strategy="steps",
        greater_is_better=False,
        metric_for_best_model="mcrmse",
        eval_steps=ss,
        save_steps=ss,
        report_to="none",
        save_total_limit=3
    )

    trainer_content = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_tokenized_datasets_content,
            eval_dataset=val_tokenized_datasets_content,
            tokenizer=tokenizer,
            compute_metrics=compute_mcrmse,
            data_collator=data_collator
        )
    
    trainer_content.train()

    best_check = os.listdir(model_dir)[0]
    model_content = AutoModelForSequenceClassification.from_pretrained(f"{model_dir}/{best_check}")
    model_content.eval()

    test_args = TrainingArguments(
        output_dir=model_dir,
        do_train = False,
        do_predict = True,
        per_device_eval_batch_size = per_device_eval_batch_size,
        dataloader_drop_last = False,
    )

    # init trainer
    infer_content = Trainer(
                  model = model_content,
                  tokenizer=tokenizer,
                  data_collator=data_collator,
                  args = test_args)

    val_results_content = infer_content.predict(val_tokenized_datasets_content)[0]
    test_results_content = infer_content.predict(test_tokenized_dataset)[0]


    model_content.save_pretrained(model_dir)
    tokenizer.save_pretrained(model_dir)
    
    model.cpu()
    model_content.cpu()
    del model
    del model_content
    gc.collect()
    torch.cuda.empty_cache()


    return val_results_content, test_results_content

### GroupKFold

In [59]:
def get_oof_pred_n_test(train,
                        test,
                        model_name,
                        n_splits,
                        num_layers_to_freeze,
                        learning_rate,
                        per_device_train_batch_size,
                        per_device_eval_batch_size,
                        num_train_epochs,
                        ss,
                        hidden_dropout_prob,
                        attention_probs_dropout_prob):
    
    kf = GroupKFold(n_splits=n_splits)
    oof_content = np.zeros((len(train), 2))
    test_pred_content = np.zeros((len(test), 2))

    for i, (train_indx, val_indx) in enumerate(kf.split(train, groups=train["prompt_id"])):
        print(f"fold {i}:")
        train_ = train.iloc[train_indx]
        val_ = train.iloc[val_indx]

        val_res_content, test_res_content = train_n_infer(train_,
                                                          val_,
                                                          test,
                                                          model_name,
                                                          num_layers_to_freeze,
                                                          learning_rate,
                                                          per_device_train_batch_size,
                                                          per_device_eval_batch_size,
                                                          num_train_epochs,
                                                          ss,
                                                          hidden_dropout_prob,
                                                          attention_probs_dropout_prob,
                                                          i)
        oof_content[val_indx] = val_res_content
        test_pred_content += test_res_content/n_splits
    
    oof_train = pd.DataFrame(oof_content, columns=[f"content_pred_{model_name}", f"wording_pred_{model_name}"])
    test_pred = pd.DataFrame(test_pred_content, columns=[f"content_pred_{model_name}", f"wording_pred_{model_name}"])

    cv_metric = compute_mcrmse((oof_train.values, train[["content", "wording"]]))
    print(f"cv mcrmse: {cv_metric}")
    
    with open(f"./Results/{model_name}_results/cv_metric.json", "w") as outfile:
        json.dump(cv_metric, outfile)
        
    oof_train.to_csv(f"./Results/{model_name}_results/oof_train.csv", index=False)

    return oof_train, test_pred


In [60]:
class CFG:
    model_name="deberta_v3_base"
    learning_rate=1.5e-5
    num_train_epochs=3
    num_layers_to_freeze=20
    n_splits=4
    batch_size=12
    random_seed=42
    save_steps=100
    max_length=512
    per_device_train_batch_size=2
    per_device_eval_batch_size=2
    hidden_dropout_prob=0.005
    attention_probs_dropout_prob=0.005

In [61]:
oof_train, pred_test = get_oof_pred_n_test(train,
                                           test,
                                           CFG.model_name,
                                           CFG.n_splits,
                                           CFG.num_layers_to_freeze,
                                           CFG.learning_rate,
                                           CFG.per_device_train_batch_size,
                                           CFG.per_device_eval_batch_size,
                                           CFG.num_train_epochs,
                                           CFG.save_steps,
                                           CFG.hidden_dropout_prob,
                                           CFG.attention_probs_dropout_prob
                                           )

fold 0:


  0%|          | 7/7662 [14:07:17<15442:59:45, 7262.55s/it]
                                                  
  1%|▏         | 100/7662 [00:30<10:56, 11.52it/s] 

{'eval_loss': 0.4981825351715088, 'eval_content_rmse': 0.5610414743423462, 'eval_wording_rmse': 0.8255892395973206, 'eval_mcrmse': 0.6933153867721558, 'eval_runtime': 20.6148, 'eval_samples_per_second': 99.783, 'eval_steps_per_second': 49.916, 'epoch': 0.04}


                                                    
  3%|▎         | 200/7662 [01:03<11:08, 11.16it/s] 

{'eval_loss': 0.5217586159706116, 'eval_content_rmse': 0.719721794128418, 'eval_wording_rmse': 0.7249259352684021, 'eval_mcrmse': 0.7223238945007324, 'eval_runtime': 20.8727, 'eval_samples_per_second': 98.55, 'eval_steps_per_second': 49.299, 'epoch': 0.08}


  4%|▍         | 300/7662 [01:32<10:36, 11.57it/s]  
  4%|▍         | 300/7662 [01:37<10:36, 11.57it/s] 

{'eval_loss': 0.47021034359931946, 'eval_content_rmse': 0.5406953692436218, 'eval_wording_rmse': 0.8050273060798645, 'eval_mcrmse': 0.6728613376617432, 'eval_runtime': 22.048, 'eval_samples_per_second': 93.296, 'eval_steps_per_second': 46.671, 'epoch': 0.12}


                                                    
  5%|▌         | 400/7662 [02:15<11:24, 10.61it/s] 

{'eval_loss': 0.36454835534095764, 'eval_content_rmse': 0.48134517669677734, 'eval_wording_rmse': 0.7052686810493469, 'eval_mcrmse': 0.5933068990707397, 'eval_runtime': 24.1982, 'eval_samples_per_second': 85.006, 'eval_steps_per_second': 42.524, 'epoch': 0.16}


  7%|▋         | 500/7662 [02:26<11:09, 10.70it/s]  

{'loss': 0.57, 'learning_rate': 1.4021143304620203e-05, 'epoch': 0.2}


                                                  
  7%|▋         | 500/7662 [02:48<11:09, 10.70it/s] 

{'eval_loss': 0.43786734342575073, 'eval_content_rmse': 0.5084481239318848, 'eval_wording_rmse': 0.7856304049491882, 'eval_mcrmse': 0.6470392942428589, 'eval_runtime': 22.172, 'eval_samples_per_second': 92.775, 'eval_steps_per_second': 46.41, 'epoch': 0.2}


                                                    
  8%|▊         | 600/7662 [03:24<14:27,  8.14it/s] 

{'eval_loss': 0.2719472646713257, 'eval_content_rmse': 0.44538232684135437, 'eval_wording_rmse': 0.5878171324729919, 'eval_mcrmse': 0.516599714756012, 'eval_runtime': 23.2267, 'eval_samples_per_second': 88.562, 'eval_steps_per_second': 44.302, 'epoch': 0.23}


                                                     
  9%|▉         | 700/7662 [04:04<15:21,  7.56it/s] 

{'eval_loss': 0.3506985008716583, 'eval_content_rmse': 0.4977996349334717, 'eval_wording_rmse': 0.6734925508499146, 'eval_mcrmse': 0.5856460928916931, 'eval_runtime': 25.4649, 'eval_samples_per_second': 80.778, 'eval_steps_per_second': 40.409, 'epoch': 0.27}


                                                    
 10%|█         | 800/7662 [04:55<12:51,  8.89it/s] 

{'eval_loss': 0.3500678241252899, 'eval_content_rmse': 0.47799333930015564, 'eval_wording_rmse': 0.6867735385894775, 'eval_mcrmse': 0.5823834538459778, 'eval_runtime': 28.8593, 'eval_samples_per_second': 71.277, 'eval_steps_per_second': 35.656, 'epoch': 0.31}


                                                     
 12%|█▏        | 900/7662 [05:32<14:10,  7.95it/s] 

{'eval_loss': 0.2968102693557739, 'eval_content_rmse': 0.477495014667511, 'eval_wording_rmse': 0.604664146900177, 'eval_mcrmse': 0.541079580783844, 'eval_runtime': 24.6807, 'eval_samples_per_second': 83.344, 'eval_steps_per_second': 41.692, 'epoch': 0.35}


 13%|█▎        | 1000/7662 [05:44<11:37,  9.55it/s]  

{'loss': 0.3647, 'learning_rate': 1.3042286609240406e-05, 'epoch': 0.39}


                                                   
 13%|█▎        | 1000/7662 [06:06<11:37,  9.55it/s]

{'eval_loss': 0.46836936473846436, 'eval_content_rmse': 0.5117268562316895, 'eval_wording_rmse': 0.8215073347091675, 'eval_mcrmse': 0.6666170954704285, 'eval_runtime': 21.8698, 'eval_samples_per_second': 94.057, 'eval_steps_per_second': 47.051, 'epoch': 0.39}


                                                     
 14%|█▍        | 1100/7662 [06:49<10:52, 10.05it/s]

{'eval_loss': 0.2875727415084839, 'eval_content_rmse': 0.44729650020599365, 'eval_wording_rmse': 0.6124300360679626, 'eval_mcrmse': 0.5298632383346558, 'eval_runtime': 25.017, 'eval_samples_per_second': 82.224, 'eval_steps_per_second': 41.132, 'epoch': 0.43}


                                                     
 16%|█▌        | 1200/7662 [07:26<10:37, 10.13it/s]

{'eval_loss': 0.2974310517311096, 'eval_content_rmse': 0.4428265690803528, 'eval_wording_rmse': 0.6314799189567566, 'eval_mcrmse': 0.5371532440185547, 'eval_runtime': 24.9903, 'eval_samples_per_second': 82.312, 'eval_steps_per_second': 41.176, 'epoch': 0.47}


                                                     
 17%|█▋        | 1300/7662 [08:02<10:17, 10.30it/s]

{'eval_loss': 0.2741396427154541, 'eval_content_rmse': 0.45947444438934326, 'eval_wording_rmse': 0.5806572437286377, 'eval_mcrmse': 0.5200658440589905, 'eval_runtime': 23.4292, 'eval_samples_per_second': 87.796, 'eval_steps_per_second': 43.92, 'epoch': 0.51}


                                                     
 18%|█▊        | 1400/7662 [08:37<11:05,  9.40it/s]

{'eval_loss': 0.32949724793434143, 'eval_content_rmse': 0.4567551016807556, 'eval_wording_rmse': 0.6710960865020752, 'eval_mcrmse': 0.5639256238937378, 'eval_runtime': 22.5298, 'eval_samples_per_second': 91.301, 'eval_steps_per_second': 45.673, 'epoch': 0.55}


 20%|█▉        | 1500/7662 [08:48<09:05, 11.30it/s]  

{'loss': 0.3451, 'learning_rate': 1.2063429913860613e-05, 'epoch': 0.59}


                                                   
 20%|█▉        | 1500/7662 [09:11<09:05, 11.30it/s]

{'eval_loss': 0.44366025924682617, 'eval_content_rmse': 0.6694937348365784, 'eval_wording_rmse': 0.6626455783843994, 'eval_mcrmse': 0.6660696268081665, 'eval_runtime': 22.6726, 'eval_samples_per_second': 90.726, 'eval_steps_per_second': 45.385, 'epoch': 0.59}


                                                     
 21%|██        | 1600/7662 [09:45<10:51,  9.30it/s]

{'eval_loss': 0.2949422001838684, 'eval_content_rmse': 0.46149972081184387, 'eval_wording_rmse': 0.6139237880706787, 'eval_mcrmse': 0.5377117395401001, 'eval_runtime': 22.4797, 'eval_samples_per_second': 91.505, 'eval_steps_per_second': 45.775, 'epoch': 0.63}


                                                     
 22%|██▏       | 1700/7662 [10:18<08:47, 11.30it/s]

{'eval_loss': 0.27987444400787354, 'eval_content_rmse': 0.511982262134552, 'eval_wording_rmse': 0.5455484390258789, 'eval_mcrmse': 0.5287653207778931, 'eval_runtime': 22.088, 'eval_samples_per_second': 93.127, 'eval_steps_per_second': 46.586, 'epoch': 0.67}


                                                     
 23%|██▎       | 1800/7662 [10:53<10:21,  9.44it/s]

{'eval_loss': 0.2845980226993561, 'eval_content_rmse': 0.4272148311138153, 'eval_wording_rmse': 0.6218387484550476, 'eval_mcrmse': 0.5245267748832703, 'eval_runtime': 22.6874, 'eval_samples_per_second': 90.667, 'eval_steps_per_second': 45.356, 'epoch': 0.7}


                                                     
 25%|██▍       | 1900/7662 [11:28<09:33, 10.04it/s]

{'eval_loss': 0.26319238543510437, 'eval_content_rmse': 0.4649548828601837, 'eval_wording_rmse': 0.5569578409194946, 'eval_mcrmse': 0.510956346988678, 'eval_runtime': 22.841, 'eval_samples_per_second': 90.057, 'eval_steps_per_second': 45.051, 'epoch': 0.74}


 26%|██▌       | 2000/7662 [11:39<07:31, 12.54it/s]  

{'loss': 0.2952, 'learning_rate': 1.1084573218480815e-05, 'epoch': 0.78}


                                                   
 26%|██▌       | 2000/7662 [12:02<07:31, 12.54it/s]

{'eval_loss': 0.2685522437095642, 'eval_content_rmse': 0.4140545725822449, 'eval_wording_rmse': 0.6047012805938721, 'eval_mcrmse': 0.5093779563903809, 'eval_runtime': 22.5448, 'eval_samples_per_second': 91.24, 'eval_steps_per_second': 45.642, 'epoch': 0.78}


                                                     
 27%|██▋       | 2100/7662 [12:36<09:38,  9.62it/s]

{'eval_loss': 0.27521198987960815, 'eval_content_rmse': 0.476802259683609, 'eval_wording_rmse': 0.568404495716095, 'eval_mcrmse': 0.5226033926010132, 'eval_runtime': 22.7515, 'eval_samples_per_second': 90.412, 'eval_steps_per_second': 45.228, 'epoch': 0.82}


 29%|██▊       | 2200/7662 [13:09<07:35, 11.98it/s]  

{'eval_loss': 0.2985340654850006, 'eval_content_rmse': 0.47816044092178345, 'eval_wording_rmse': 0.6069848537445068, 'eval_mcrmse': 0.5425726175308228, 'eval_runtime': 22.0128, 'eval_samples_per_second': 93.446, 'eval_steps_per_second': 46.746, 'epoch': 0.86}


                                                     
 30%|███       | 2300/7662 [13:47<13:39,  6.54it/s]

{'eval_loss': 0.292244017124176, 'eval_content_rmse': 0.44717690348625183, 'eval_wording_rmse': 0.6200976371765137, 'eval_mcrmse': 0.533637285232544, 'eval_runtime': 21.966, 'eval_samples_per_second': 93.645, 'eval_steps_per_second': 46.845, 'epoch': 0.9}


                                                     
 31%|███▏      | 2400/7662 [14:20<09:22,  9.36it/s]

{'eval_loss': 0.24134403467178345, 'eval_content_rmse': 0.41959935426712036, 'eval_wording_rmse': 0.5537366271018982, 'eval_mcrmse': 0.4866679906845093, 'eval_runtime': 21.9906, 'eval_samples_per_second': 93.54, 'eval_steps_per_second': 46.793, 'epoch': 0.94}


 33%|███▎      | 2500/7662 [14:31<08:45,  9.83it/s]  

{'loss': 0.3069, 'learning_rate': 1.0105716523101018e-05, 'epoch': 0.98}


                                                   
 33%|███▎      | 2500/7662 [14:53<08:45,  9.83it/s]

{'eval_loss': 0.30228886008262634, 'eval_content_rmse': 0.5355848670005798, 'eval_wording_rmse': 0.5636720657348633, 'eval_mcrmse': 0.549628496170044, 'eval_runtime': 22.0899, 'eval_samples_per_second': 93.12, 'eval_steps_per_second': 46.582, 'epoch': 0.98}


                                                     
 34%|███▍      | 2600/7662 [15:27<07:49, 10.78it/s]

{'eval_loss': 0.3605421483516693, 'eval_content_rmse': 0.6439388990402222, 'eval_wording_rmse': 0.5535584092140198, 'eval_mcrmse': 0.5987486839294434, 'eval_runtime': 22.4729, 'eval_samples_per_second': 91.532, 'eval_steps_per_second': 45.788, 'epoch': 1.02}


                                                     
 35%|███▌      | 2700/7662 [16:01<09:36,  8.60it/s]

{'eval_loss': 0.2872288227081299, 'eval_content_rmse': 0.5320214629173279, 'eval_wording_rmse': 0.5398252606391907, 'eval_mcrmse': 0.5359233617782593, 'eval_runtime': 22.5192, 'eval_samples_per_second': 91.344, 'eval_steps_per_second': 45.694, 'epoch': 1.06}


                                                     
 37%|███▋      | 2800/7662 [16:35<07:39, 10.58it/s]

{'eval_loss': 0.25694549083709717, 'eval_content_rmse': 0.4405011832714081, 'eval_wording_rmse': 0.5655518174171448, 'eval_mcrmse': 0.5030264854431152, 'eval_runtime': 22.426, 'eval_samples_per_second': 91.724, 'eval_steps_per_second': 45.884, 'epoch': 1.1}


                                                     
 38%|███▊      | 2900/7662 [17:09<07:04, 11.22it/s]

{'eval_loss': 0.350976824760437, 'eval_content_rmse': 0.5342339873313904, 'eval_wording_rmse': 0.6454052925109863, 'eval_mcrmse': 0.5898196697235107, 'eval_runtime': 22.2796, 'eval_samples_per_second': 92.327, 'eval_steps_per_second': 46.186, 'epoch': 1.14}


 39%|███▉      | 3000/7662 [17:23<07:31, 10.31it/s]  

{'loss': 0.2505, 'learning_rate': 9.126859827721221e-06, 'epoch': 1.17}


                                                   
 39%|███▉      | 3000/7662 [17:45<07:31, 10.31it/s]

{'eval_loss': 0.29178571701049805, 'eval_content_rmse': 0.47062841057777405, 'eval_wording_rmse': 0.6017316579818726, 'eval_mcrmse': 0.5361800193786621, 'eval_runtime': 22.2823, 'eval_samples_per_second': 92.316, 'eval_steps_per_second': 46.18, 'epoch': 1.17}


                                                     
 40%|████      | 3100/7662 [18:22<07:57,  9.55it/s]

{'eval_loss': 0.25408974289894104, 'eval_content_rmse': 0.439502477645874, 'eval_wording_rmse': 0.5612643361091614, 'eval_mcrmse': 0.5003833770751953, 'eval_runtime': 22.2195, 'eval_samples_per_second': 92.576, 'eval_steps_per_second': 46.311, 'epoch': 1.21}


                                                     
 42%|████▏     | 3200/7662 [18:56<06:41, 11.13it/s]

{'eval_loss': 0.29536163806915283, 'eval_content_rmse': 0.5183165669441223, 'eval_wording_rmse': 0.5675135254859924, 'eval_mcrmse': 0.5429150462150574, 'eval_runtime': 22.8097, 'eval_samples_per_second': 90.181, 'eval_steps_per_second': 45.112, 'epoch': 1.25}


                                                     
 43%|████▎     | 3300/7662 [19:40<07:20,  9.90it/s]

{'eval_loss': 0.3188832700252533, 'eval_content_rmse': 0.4473669230937958, 'eval_wording_rmse': 0.6615353226661682, 'eval_mcrmse': 0.5544511079788208, 'eval_runtime': 30.5568, 'eval_samples_per_second': 67.317, 'eval_steps_per_second': 33.675, 'epoch': 1.29}


                                                     
 44%|████▍     | 3400/7662 [20:24<06:49, 10.40it/s]

{'eval_loss': 0.3047608435153961, 'eval_content_rmse': 0.5471299290657043, 'eval_wording_rmse': 0.5569295287132263, 'eval_mcrmse': 0.5520297288894653, 'eval_runtime': 30.497, 'eval_samples_per_second': 67.449, 'eval_steps_per_second': 33.741, 'epoch': 1.33}


 46%|████▌     | 3500/7662 [20:36<06:44, 10.28it/s]  

{'loss': 0.2053, 'learning_rate': 8.148003132341424e-06, 'epoch': 1.37}


                                                   
 46%|████▌     | 3500/7662 [21:07<06:44, 10.28it/s]

{'eval_loss': 0.29006242752075195, 'eval_content_rmse': 0.5040276646614075, 'eval_wording_rmse': 0.5710349082946777, 'eval_mcrmse': 0.5375312566757202, 'eval_runtime': 30.7568, 'eval_samples_per_second': 66.879, 'eval_steps_per_second': 33.456, 'epoch': 1.37}


                                                     
 47%|████▋     | 3600/7662 [21:52<07:18,  9.27it/s]

{'eval_loss': 0.2503358721733093, 'eval_content_rmse': 0.4407562017440796, 'eval_wording_rmse': 0.5535392165184021, 'eval_mcrmse': 0.49714770913124084, 'eval_runtime': 30.434, 'eval_samples_per_second': 67.589, 'eval_steps_per_second': 33.811, 'epoch': 1.41}


                                                     
 48%|████▊     | 3700/7662 [22:35<07:00,  9.43it/s]

{'eval_loss': 0.32609453797340393, 'eval_content_rmse': 0.5900822877883911, 'eval_wording_rmse': 0.5513546466827393, 'eval_mcrmse': 0.5707184672355652, 'eval_runtime': 30.6653, 'eval_samples_per_second': 67.079, 'eval_steps_per_second': 33.556, 'epoch': 1.45}


                                                     
 50%|████▉     | 3800/7662 [23:19<05:46, 11.16it/s]

{'eval_loss': 0.30490848422050476, 'eval_content_rmse': 0.512750506401062, 'eval_wording_rmse': 0.5889855623245239, 'eval_mcrmse': 0.550868034362793, 'eval_runtime': 31.8864, 'eval_samples_per_second': 64.51, 'eval_steps_per_second': 32.271, 'epoch': 1.49}


                                                     
 51%|█████     | 3900/7662 [24:06<07:45,  8.09it/s]

{'eval_loss': 0.2934841513633728, 'eval_content_rmse': 0.5091281533241272, 'eval_wording_rmse': 0.5725006461143494, 'eval_mcrmse': 0.5408143997192383, 'eval_runtime': 30.3158, 'eval_samples_per_second': 67.852, 'eval_steps_per_second': 33.943, 'epoch': 1.53}


 52%|█████▏    | 4000/7662 [24:18<05:51, 10.42it/s]  

{'loss': 0.2074, 'learning_rate': 7.169146436961629e-06, 'epoch': 1.57}


                                                   
 52%|█████▏    | 4000/7662 [24:49<05:51, 10.42it/s]

{'eval_loss': 0.3098111152648926, 'eval_content_rmse': 0.5730763673782349, 'eval_wording_rmse': 0.5396347045898438, 'eval_mcrmse': 0.5563555359840393, 'eval_runtime': 30.133, 'eval_samples_per_second': 68.264, 'eval_steps_per_second': 34.149, 'epoch': 1.57}


                                                     
 54%|█████▎    | 4100/7662 [25:30<05:21, 11.08it/s]

{'eval_loss': 0.2810317575931549, 'eval_content_rmse': 0.5227373242378235, 'eval_wording_rmse': 0.5374100208282471, 'eval_mcrmse': 0.5300736427307129, 'eval_runtime': 30.0024, 'eval_samples_per_second': 68.561, 'eval_steps_per_second': 34.297, 'epoch': 1.61}


                                                     
 55%|█████▍    | 4200/7662 [26:12<05:44, 10.04it/s]

{'eval_loss': 0.28584837913513184, 'eval_content_rmse': 0.5104499459266663, 'eval_wording_rmse': 0.5577969551086426, 'eval_mcrmse': 0.534123420715332, 'eval_runtime': 30.1382, 'eval_samples_per_second': 68.252, 'eval_steps_per_second': 34.143, 'epoch': 1.64}


                                                     
 56%|█████▌    | 4300/7662 [26:56<06:05,  9.20it/s]

{'eval_loss': 0.30784541368484497, 'eval_content_rmse': 0.524689793586731, 'eval_wording_rmse': 0.5834304094314575, 'eval_mcrmse': 0.5540601015090942, 'eval_runtime': 30.0881, 'eval_samples_per_second': 68.366, 'eval_steps_per_second': 34.2, 'epoch': 1.68}


                                                     
 57%|█████▋    | 4400/7662 [27:40<05:20, 10.17it/s]

{'eval_loss': 0.27163535356521606, 'eval_content_rmse': 0.4820598363876343, 'eval_wording_rmse': 0.5575743913650513, 'eval_mcrmse': 0.5198171138763428, 'eval_runtime': 30.9547, 'eval_samples_per_second': 66.452, 'eval_steps_per_second': 33.242, 'epoch': 1.72}


 59%|█████▊    | 4500/7662 [27:52<05:24,  9.75it/s]  

{'loss': 0.2157, 'learning_rate': 6.190289741581833e-06, 'epoch': 1.76}


                                                   
 59%|█████▊    | 4500/7662 [28:22<05:24,  9.75it/s]

{'eval_loss': 0.2955758571624756, 'eval_content_rmse': 0.4831407070159912, 'eval_wording_rmse': 0.5981025099754333, 'eval_mcrmse': 0.5406216382980347, 'eval_runtime': 30.2101, 'eval_samples_per_second': 68.09, 'eval_steps_per_second': 34.062, 'epoch': 1.76}


                                                     
 60%|██████    | 4600/7662 [29:06<05:09,  9.89it/s]

{'eval_loss': 0.27499085664749146, 'eval_content_rmse': 0.48519548773765564, 'eval_wording_rmse': 0.5608627200126648, 'eval_mcrmse': 0.523029088973999, 'eval_runtime': 29.7109, 'eval_samples_per_second': 69.234, 'eval_steps_per_second': 34.634, 'epoch': 1.8}


                                                     
 61%|██████▏   | 4700/7662 [29:48<04:33, 10.83it/s]

{'eval_loss': 0.283990740776062, 'eval_content_rmse': 0.5123474597930908, 'eval_wording_rmse': 0.5527037382125854, 'eval_mcrmse': 0.5325255990028381, 'eval_runtime': 30.4073, 'eval_samples_per_second': 67.648, 'eval_steps_per_second': 33.841, 'epoch': 1.84}


                                                     
 63%|██████▎   | 4800/7662 [30:32<05:01,  9.50it/s]

{'eval_loss': 0.3342355191707611, 'eval_content_rmse': 0.5349406003952026, 'eval_wording_rmse': 0.61831134557724, 'eval_mcrmse': 0.5766259431838989, 'eval_runtime': 29.7379, 'eval_samples_per_second': 69.171, 'eval_steps_per_second': 34.602, 'epoch': 1.88}


                                                     
 64%|██████▍   | 4900/7662 [31:13<04:32, 10.13it/s]

{'eval_loss': 0.305769681930542, 'eval_content_rmse': 0.5620089173316956, 'eval_wording_rmse': 0.543769359588623, 'eval_mcrmse': 0.5528891086578369, 'eval_runtime': 29.8586, 'eval_samples_per_second': 68.891, 'eval_steps_per_second': 34.462, 'epoch': 1.92}


 65%|██████▌   | 5000/7662 [31:24<04:38,  9.55it/s]  

{'loss': 0.2417, 'learning_rate': 5.211433046202036e-06, 'epoch': 1.96}


                                                   
 65%|██████▌   | 5000/7662 [31:54<04:38,  9.55it/s]

{'eval_loss': 0.29368361830711365, 'eval_content_rmse': 0.5191289782524109, 'eval_wording_rmse': 0.5638015270233154, 'eval_mcrmse': 0.5414652824401855, 'eval_runtime': 29.5688, 'eval_samples_per_second': 69.567, 'eval_steps_per_second': 34.8, 'epoch': 1.96}


                                                     
 67%|██████▋   | 5100/7662 [32:35<04:02, 10.56it/s]

{'eval_loss': 0.2845624089241028, 'eval_content_rmse': 0.4849781394004822, 'eval_wording_rmse': 0.5778588652610779, 'eval_mcrmse': 0.53141850233078, 'eval_runtime': 29.7068, 'eval_samples_per_second': 69.243, 'eval_steps_per_second': 34.638, 'epoch': 2.0}


                                                     
 68%|██████▊   | 5200/7662 [33:18<04:10,  9.84it/s]

{'eval_loss': 0.304912269115448, 'eval_content_rmse': 0.49807804822921753, 'eval_wording_rmse': 0.6014505624771118, 'eval_mcrmse': 0.5497642755508423, 'eval_runtime': 29.6514, 'eval_samples_per_second': 69.373, 'eval_steps_per_second': 34.703, 'epoch': 2.04}


                                                     
 69%|██████▉   | 5300/7662 [34:00<03:54, 10.09it/s]

{'eval_loss': 0.3330003023147583, 'eval_content_rmse': 0.5302033424377441, 'eval_wording_rmse': 0.6203908920288086, 'eval_mcrmse': 0.5752971172332764, 'eval_runtime': 29.7291, 'eval_samples_per_second': 69.192, 'eval_steps_per_second': 34.613, 'epoch': 2.08}


                                                     
 70%|███████   | 5400/7662 [34:42<03:28, 10.87it/s]

{'eval_loss': 0.33270758390426636, 'eval_content_rmse': 0.481952428817749, 'eval_wording_rmse': 0.6581315398216248, 'eval_mcrmse': 0.5700420141220093, 'eval_runtime': 29.6942, 'eval_samples_per_second': 69.273, 'eval_steps_per_second': 34.653, 'epoch': 2.11}


 72%|███████▏  | 5500/7662 [34:55<04:02,  8.91it/s]  

{'loss': 0.1484, 'learning_rate': 4.23257635082224e-06, 'epoch': 2.15}


                                                   
 72%|███████▏  | 5500/7662 [35:25<04:02,  8.91it/s]

{'eval_loss': 0.2911047637462616, 'eval_content_rmse': 0.49622011184692383, 'eval_wording_rmse': 0.5796332955360413, 'eval_mcrmse': 0.5379266738891602, 'eval_runtime': 29.6796, 'eval_samples_per_second': 69.307, 'eval_steps_per_second': 34.67, 'epoch': 2.15}


                                                     
 73%|███████▎  | 5600/7662 [36:07<04:11,  8.19it/s]

{'eval_loss': 0.28147947788238525, 'eval_content_rmse': 0.49866023659706116, 'eval_wording_rmse': 0.5606217980384827, 'eval_mcrmse': 0.5296410322189331, 'eval_runtime': 29.4244, 'eval_samples_per_second': 69.908, 'eval_steps_per_second': 34.971, 'epoch': 2.19}


                                                     
 74%|███████▍  | 5700/7662 [36:47<03:33,  9.20it/s]

{'eval_loss': 0.287332147359848, 'eval_content_rmse': 0.46856802701950073, 'eval_wording_rmse': 0.5959092378616333, 'eval_mcrmse': 0.5322386026382446, 'eval_runtime': 29.7099, 'eval_samples_per_second': 69.236, 'eval_steps_per_second': 34.635, 'epoch': 2.23}


                                                     
 76%|███████▌  | 5800/7662 [37:29<02:55, 10.60it/s]

{'eval_loss': 0.2935795187950134, 'eval_content_rmse': 0.5199219584465027, 'eval_wording_rmse': 0.5628858804702759, 'eval_mcrmse': 0.5414038896560669, 'eval_runtime': 29.7419, 'eval_samples_per_second': 69.162, 'eval_steps_per_second': 34.598, 'epoch': 2.27}


                                                     
 77%|███████▋  | 5900/7662 [38:10<02:34, 11.42it/s]

{'eval_loss': 0.2981850206851959, 'eval_content_rmse': 0.4858984351158142, 'eval_wording_rmse': 0.6002269387245178, 'eval_mcrmse': 0.543062686920166, 'eval_runtime': 29.8098, 'eval_samples_per_second': 69.004, 'eval_steps_per_second': 34.519, 'epoch': 2.31}


 78%|███████▊  | 6000/7662 [38:21<02:44, 10.13it/s]  

{'loss': 0.1371, 'learning_rate': 3.2537196554424435e-06, 'epoch': 2.35}


                                                   
 78%|███████▊  | 6000/7662 [38:51<02:44, 10.13it/s]

{'eval_loss': 0.2706215977668762, 'eval_content_rmse': 0.47444260120391846, 'eval_wording_rmse': 0.5622701048851013, 'eval_mcrmse': 0.5183563232421875, 'eval_runtime': 29.8173, 'eval_samples_per_second': 68.987, 'eval_steps_per_second': 34.51, 'epoch': 2.35}


                                                     
 80%|███████▉  | 6100/7662 [39:33<02:28, 10.55it/s]

{'eval_loss': 0.29040297865867615, 'eval_content_rmse': 0.4925577640533447, 'eval_wording_rmse': 0.5815433263778687, 'eval_mcrmse': 0.5370505452156067, 'eval_runtime': 29.7493, 'eval_samples_per_second': 69.144, 'eval_steps_per_second': 34.589, 'epoch': 2.39}


                                                     
 81%|████████  | 6200/7662 [40:14<02:16, 10.68it/s]

{'eval_loss': 0.32445064187049866, 'eval_content_rmse': 0.5272984504699707, 'eval_wording_rmse': 0.6089814901351929, 'eval_mcrmse': 0.5681399703025818, 'eval_runtime': 29.8808, 'eval_samples_per_second': 68.84, 'eval_steps_per_second': 34.437, 'epoch': 2.43}


                                                     
 82%|████████▏ | 6300/7662 [40:56<02:24,  9.45it/s]

{'eval_loss': 0.28871485590934753, 'eval_content_rmse': 0.5353918075561523, 'eval_wording_rmse': 0.5392451286315918, 'eval_mcrmse': 0.5373184680938721, 'eval_runtime': 30.1401, 'eval_samples_per_second': 68.248, 'eval_steps_per_second': 34.141, 'epoch': 2.47}


                                                     
 84%|████████▎ | 6400/7662 [41:44<02:10,  9.66it/s]

{'eval_loss': 0.29434868693351746, 'eval_content_rmse': 0.4832359850406647, 'eval_wording_rmse': 0.595970094203949, 'eval_mcrmse': 0.539603054523468, 'eval_runtime': 30.6832, 'eval_samples_per_second': 67.04, 'eval_steps_per_second': 33.536, 'epoch': 2.51}


 85%|████████▍ | 6500/7662 [41:58<02:02,  9.46it/s]  

{'loss': 0.1339, 'learning_rate': 2.274862960062647e-06, 'epoch': 2.55}


                                                   
 85%|████████▍ | 6500/7662 [42:31<02:02,  9.46it/s]

{'eval_loss': 0.3121512234210968, 'eval_content_rmse': 0.49527162313461304, 'eval_wording_rmse': 0.6156368255615234, 'eval_mcrmse': 0.5554542541503906, 'eval_runtime': 33.1787, 'eval_samples_per_second': 61.998, 'eval_steps_per_second': 31.014, 'epoch': 2.55}


                                                     
 86%|████████▌ | 6600/7662 [43:23<01:58,  8.95it/s]

{'eval_loss': 0.2980479896068573, 'eval_content_rmse': 0.5114173293113708, 'eval_wording_rmse': 0.5784023404121399, 'eval_mcrmse': 0.5449098348617554, 'eval_runtime': 36.1633, 'eval_samples_per_second': 56.881, 'eval_steps_per_second': 28.454, 'epoch': 2.58}


                                                     
 87%|████████▋ | 6700/7662 [44:14<01:57,  8.19it/s]

{'eval_loss': 0.29546672105789185, 'eval_content_rmse': 0.5022032856941223, 'eval_wording_rmse': 0.5820009708404541, 'eval_mcrmse': 0.5421020984649658, 'eval_runtime': 37.0469, 'eval_samples_per_second': 55.524, 'eval_steps_per_second': 27.776, 'epoch': 2.62}


                                                     
 89%|████████▊ | 6800/7662 [45:06<01:52,  7.65it/s]

{'eval_loss': 0.31540557742118835, 'eval_content_rmse': 0.5383273959159851, 'eval_wording_rmse': 0.5839647650718689, 'eval_mcrmse': 0.561146080493927, 'eval_runtime': 37.4023, 'eval_samples_per_second': 54.997, 'eval_steps_per_second': 27.512, 'epoch': 2.66}


                                                     
 90%|█████████ | 6900/7662 [45:58<01:30,  8.41it/s]

{'eval_loss': 0.29619860649108887, 'eval_content_rmse': 0.5267723798751831, 'eval_wording_rmse': 0.5611665844917297, 'eval_mcrmse': 0.5439695119857788, 'eval_runtime': 37.3914, 'eval_samples_per_second': 55.013, 'eval_steps_per_second': 27.52, 'epoch': 2.7}


 91%|█████████▏| 7000/7662 [46:17<01:55,  5.74it/s]  

{'loss': 0.12, 'learning_rate': 1.2960062646828505e-06, 'epoch': 2.74}


                                                   
 91%|█████████▏| 7000/7662 [46:55<01:55,  5.74it/s]

{'eval_loss': 0.2965767979621887, 'eval_content_rmse': 0.5096931457519531, 'eval_wording_rmse': 0.5773792862892151, 'eval_mcrmse': 0.5435361862182617, 'eval_runtime': 37.2867, 'eval_samples_per_second': 55.167, 'eval_steps_per_second': 27.597, 'epoch': 2.74}


                                                     
 93%|█████████▎| 7100/7662 [47:46<01:07,  8.30it/s]

{'eval_loss': 0.30258044600486755, 'eval_content_rmse': 0.5345327258110046, 'eval_wording_rmse': 0.5651862621307373, 'eval_mcrmse': 0.5498595237731934, 'eval_runtime': 37.2382, 'eval_samples_per_second': 55.239, 'eval_steps_per_second': 27.633, 'epoch': 2.78}


                                                     
 94%|█████████▍| 7200/7662 [48:38<00:56,  8.21it/s]

{'eval_loss': 0.3097963333129883, 'eval_content_rmse': 0.5335315465927124, 'eval_wording_rmse': 0.5787370800971985, 'eval_mcrmse': 0.5561343431472778, 'eval_runtime': 37.2656, 'eval_samples_per_second': 55.198, 'eval_steps_per_second': 27.613, 'epoch': 2.82}


                                                     
 95%|█████████▌| 7300/7662 [49:28<00:50,  7.15it/s]

{'eval_loss': 0.30348169803619385, 'eval_content_rmse': 0.5319474339485168, 'eval_wording_rmse': 0.5692059993743896, 'eval_mcrmse': 0.5505766868591309, 'eval_runtime': 36.0585, 'eval_samples_per_second': 57.046, 'eval_steps_per_second': 28.537, 'epoch': 2.86}


                                                     
 97%|█████████▋| 7400/7662 [50:11<00:30,  8.62it/s]

{'eval_loss': 0.3001742959022522, 'eval_content_rmse': 0.5298680067062378, 'eval_wording_rmse': 0.5653215646743774, 'eval_mcrmse': 0.5475947856903076, 'eval_runtime': 30.549, 'eval_samples_per_second': 67.334, 'eval_steps_per_second': 33.684, 'epoch': 2.9}


 98%|█████████▊| 7500/7662 [50:26<00:16,  9.79it/s]

{'loss': 0.1292, 'learning_rate': 3.17149569303054e-07, 'epoch': 2.94}


                                                   
 98%|█████████▊| 7500/7662 [50:56<00:16,  9.79it/s]

{'eval_loss': 0.2989133894443512, 'eval_content_rmse': 0.5078958868980408, 'eval_wording_rmse': 0.5829824805259705, 'eval_mcrmse': 0.5454391837120056, 'eval_runtime': 30.3384, 'eval_samples_per_second': 67.802, 'eval_steps_per_second': 33.917, 'epoch': 2.94}


                                                   
 99%|█████████▉| 7600/7662 [51:39<00:07,  8.37it/s]

{'eval_loss': 0.30037450790405273, 'eval_content_rmse': 0.505085825920105, 'eval_wording_rmse': 0.5879091024398804, 'eval_mcrmse': 0.5464974641799927, 'eval_runtime': 30.4488, 'eval_samples_per_second': 67.556, 'eval_steps_per_second': 33.794, 'epoch': 2.98}


100%|██████████| 7662/7662 [51:47<00:00,  2.47it/s]


{'train_runtime': 3107.6791, 'train_samples_per_second': 4.931, 'train_steps_per_second': 2.466, 'train_loss': 0.24219484959032042, 'epoch': 3.0}


100%|██████████| 1029/1029 [00:30<00:00, 33.21it/s]
100%|██████████| 2/2 [00:00<00:00, 330.94it/s]


fold 1:


                                                  
  1%|▏         | 100/7734 [00:39<11:22, 11.19it/s] 

{'eval_loss': 0.7108643651008606, 'eval_content_rmse': 0.751205563545227, 'eval_wording_rmse': 0.9259694218635559, 'eval_mcrmse': 0.8385875225067139, 'eval_runtime': 29.6183, 'eval_samples_per_second': 67.83, 'eval_steps_per_second': 33.932, 'epoch': 0.04}


                                                     
  3%|▎         | 200/7734 [01:18<10:22, 12.10it/s] 

{'eval_loss': 0.4685538113117218, 'eval_content_rmse': 0.5946324467658997, 'eval_wording_rmse': 0.7638849020004272, 'eval_mcrmse': 0.6792587041854858, 'eval_runtime': 27.2322, 'eval_samples_per_second': 73.773, 'eval_steps_per_second': 36.905, 'epoch': 0.08}


                                                    
  4%|▍         | 300/7734 [01:57<13:00,  9.53it/s] 

{'eval_loss': 0.6420978903770447, 'eval_content_rmse': 0.6436063647270203, 'eval_wording_rmse': 0.9327195286750793, 'eval_mcrmse': 0.7881629467010498, 'eval_runtime': 27.6649, 'eval_samples_per_second': 72.619, 'eval_steps_per_second': 36.328, 'epoch': 0.12}


                                                    
  5%|▌         | 400/7734 [02:38<11:12, 10.90it/s] 

{'eval_loss': 0.5289064049720764, 'eval_content_rmse': 0.6655676960945129, 'eval_wording_rmse': 0.7841131687164307, 'eval_mcrmse': 0.7248404026031494, 'eval_runtime': 28.0616, 'eval_samples_per_second': 71.593, 'eval_steps_per_second': 35.814, 'epoch': 0.16}


  6%|▋         | 500/7734 [02:49<09:51, 12.22it/s]   

{'loss': 0.4779, 'learning_rate': 1.4030256012412724e-05, 'epoch': 0.19}


                                                  
  6%|▋         | 500/7734 [03:19<09:51, 12.22it/s] 

{'eval_loss': 0.8804327249526978, 'eval_content_rmse': 0.7553996443748474, 'eval_wording_rmse': 1.090979814529419, 'eval_mcrmse': 0.9231897592544556, 'eval_runtime': 29.3519, 'eval_samples_per_second': 68.445, 'eval_steps_per_second': 34.24, 'epoch': 0.19}


                                                     
  8%|▊         | 600/7734 [03:57<10:51, 10.95it/s] 

{'eval_loss': 0.7743891477584839, 'eval_content_rmse': 0.7016832232475281, 'eval_wording_rmse': 1.0278226137161255, 'eval_mcrmse': 0.8647528886795044, 'eval_runtime': 28.2044, 'eval_samples_per_second': 71.23, 'eval_steps_per_second': 35.633, 'epoch': 0.23}


                                                    
  9%|▉         | 700/7734 [04:37<10:14, 11.44it/s] 

{'eval_loss': 0.5411725640296936, 'eval_content_rmse': 0.6075871586799622, 'eval_wording_rmse': 0.8445015549659729, 'eval_mcrmse': 0.7260443568229675, 'eval_runtime': 29.3359, 'eval_samples_per_second': 68.483, 'eval_steps_per_second': 34.258, 'epoch': 0.27}


                                                    
 10%|█         | 800/7734 [05:18<10:44, 10.76it/s] 

{'eval_loss': 0.4706610143184662, 'eval_content_rmse': 0.5209608674049377, 'eval_wording_rmse': 0.8184873461723328, 'eval_mcrmse': 0.6697241067886353, 'eval_runtime': 29.8836, 'eval_samples_per_second': 67.227, 'eval_steps_per_second': 33.63, 'epoch': 0.31}


                                                    
 12%|█▏        | 900/7734 [05:59<10:57, 10.39it/s] 

{'eval_loss': 0.5352912545204163, 'eval_content_rmse': 0.523178219795227, 'eval_wording_rmse': 0.8926741480827332, 'eval_mcrmse': 0.7079261541366577, 'eval_runtime': 29.8809, 'eval_samples_per_second': 67.234, 'eval_steps_per_second': 33.633, 'epoch': 0.35}


 13%|█▎        | 1000/7734 [06:11<14:14,  7.88it/s]  

{'loss': 0.3065, 'learning_rate': 1.3060512024825446e-05, 'epoch': 0.39}


                                                   
 13%|█▎        | 1000/7734 [06:39<14:14,  7.88it/s]

{'eval_loss': 0.6822434663772583, 'eval_content_rmse': 0.6239966154098511, 'eval_wording_rmse': 0.9874792695045471, 'eval_mcrmse': 0.8057379722595215, 'eval_runtime': 28.4962, 'eval_samples_per_second': 70.501, 'eval_steps_per_second': 35.268, 'epoch': 0.39}


                                                      
 14%|█▍        | 1100/7734 [07:18<10:07, 10.93it/s]

{'eval_loss': 0.6785814762115479, 'eval_content_rmse': 0.6985734105110168, 'eval_wording_rmse': 0.9322872161865234, 'eval_mcrmse': 0.8154302835464478, 'eval_runtime': 27.8023, 'eval_samples_per_second': 72.26, 'eval_steps_per_second': 36.148, 'epoch': 0.43}


                                                      
 16%|█▌        | 1200/7734 [08:02<14:38,  7.44it/s]

{'eval_loss': 0.5852620601654053, 'eval_content_rmse': 0.5907912850379944, 'eval_wording_rmse': 0.9063608646392822, 'eval_mcrmse': 0.7485760450363159, 'eval_runtime': 31.4004, 'eval_samples_per_second': 63.98, 'eval_steps_per_second': 32.006, 'epoch': 0.47}


                                                      
 17%|█▋        | 1300/7734 [08:48<10:16, 10.44it/s]

{'eval_loss': 0.7732252478599548, 'eval_content_rmse': 0.693731427192688, 'eval_wording_rmse': 1.0320793390274048, 'eval_mcrmse': 0.8629053831100464, 'eval_runtime': 29.5838, 'eval_samples_per_second': 67.909, 'eval_steps_per_second': 33.971, 'epoch': 0.5}


                                                      
 18%|█▊        | 1400/7734 [09:27<12:07,  8.71it/s]

{'eval_loss': 0.5114850401878357, 'eval_content_rmse': 0.5244174003601074, 'eval_wording_rmse': 0.8648448586463928, 'eval_mcrmse': 0.6946310997009277, 'eval_runtime': 26.9514, 'eval_samples_per_second': 74.542, 'eval_steps_per_second': 37.289, 'epoch': 0.54}


 19%|█▉        | 1500/7734 [09:46<27:06,  3.83it/s]   

{'loss': 0.3112, 'learning_rate': 1.2090768037238169e-05, 'epoch': 0.58}


                                                   
 19%|█▉        | 1500/7734 [10:39<27:06,  3.83it/s]

{'eval_loss': 0.6406261920928955, 'eval_content_rmse': 0.6255577206611633, 'eval_wording_rmse': 0.9433609843254089, 'eval_mcrmse': 0.7844593524932861, 'eval_runtime': 53.4823, 'eval_samples_per_second': 37.564, 'eval_steps_per_second': 18.791, 'epoch': 0.58}


                                                      
 21%|██        | 1600/7734 [11:17<08:51, 11.54it/s]

{'eval_loss': 0.599395751953125, 'eval_content_rmse': 0.5691260099411011, 'eval_wording_rmse': 0.9353536367416382, 'eval_mcrmse': 0.7522398233413696, 'eval_runtime': 25.8994, 'eval_samples_per_second': 77.569, 'eval_steps_per_second': 38.804, 'epoch': 0.62}


                                                     
 22%|██▏       | 1700/7734 [11:53<10:48,  9.31it/s]

{'eval_loss': 0.8154011368751526, 'eval_content_rmse': 0.6489852070808411, 'eval_wording_rmse': 1.0998269319534302, 'eval_mcrmse': 0.874406099319458, 'eval_runtime': 25.7165, 'eval_samples_per_second': 78.121, 'eval_steps_per_second': 39.08, 'epoch': 0.66}


                                                     
 23%|██▎       | 1800/7734 [12:29<08:08, 12.14it/s]

{'eval_loss': 0.6239291429519653, 'eval_content_rmse': 0.5980390906333923, 'eval_wording_rmse': 0.9435082077980042, 'eval_mcrmse': 0.7707736492156982, 'eval_runtime': 25.9263, 'eval_samples_per_second': 77.489, 'eval_steps_per_second': 38.764, 'epoch': 0.7}


                                                     
 25%|██▍       | 1900/7734 [13:08<08:56, 10.87it/s]

{'eval_loss': 0.5064743757247925, 'eval_content_rmse': 0.5340279936790466, 'eval_wording_rmse': 0.8530897498130798, 'eval_mcrmse': 0.6935588717460632, 'eval_runtime': 26.5224, 'eval_samples_per_second': 75.747, 'eval_steps_per_second': 37.893, 'epoch': 0.74}


 26%|██▌       | 2000/7734 [13:18<08:55, 10.70it/s]  

{'loss': 0.3004, 'learning_rate': 1.1121024049650893e-05, 'epoch': 0.78}


                                                   
 26%|██▌       | 2000/7734 [13:45<08:55, 10.70it/s]

{'eval_loss': 0.6577293872833252, 'eval_content_rmse': 0.5527005195617676, 'eval_wording_rmse': 1.0049781799316406, 'eval_mcrmse': 0.7788393497467041, 'eval_runtime': 26.6065, 'eval_samples_per_second': 75.508, 'eval_steps_per_second': 37.773, 'epoch': 0.78}


                                                     
 27%|██▋       | 2100/7734 [14:24<08:39, 10.85it/s]

{'eval_loss': 0.6210119128227234, 'eval_content_rmse': 0.5698320865631104, 'eval_wording_rmse': 0.9577656388282776, 'eval_mcrmse': 0.7637988328933716, 'eval_runtime': 27.5785, 'eval_samples_per_second': 72.847, 'eval_steps_per_second': 36.441, 'epoch': 0.81}


                                                     
 28%|██▊       | 2200/7734 [15:02<08:02, 11.47it/s]

{'eval_loss': 0.5725006461143494, 'eval_content_rmse': 0.5423539876937866, 'eval_wording_rmse': 0.9224172830581665, 'eval_mcrmse': 0.7323856353759766, 'eval_runtime': 26.7326, 'eval_samples_per_second': 75.152, 'eval_steps_per_second': 37.595, 'epoch': 0.85}


                                                     
 30%|██▉       | 2300/7734 [15:39<07:47, 11.61it/s]

{'eval_loss': 0.6444955468177795, 'eval_content_rmse': 0.5307230949401855, 'eval_wording_rmse': 1.0036550760269165, 'eval_mcrmse': 0.767189085483551, 'eval_runtime': 25.7783, 'eval_samples_per_second': 77.934, 'eval_steps_per_second': 38.986, 'epoch': 0.89}


                                                     
 31%|███       | 2400/7734 [16:16<07:57, 11.18it/s]

{'eval_loss': 0.4310758411884308, 'eval_content_rmse': 0.5111114978790283, 'eval_wording_rmse': 0.775187611579895, 'eval_mcrmse': 0.6431495547294617, 'eval_runtime': 25.8587, 'eval_samples_per_second': 77.691, 'eval_steps_per_second': 38.865, 'epoch': 0.93}


 32%|███▏      | 2500/7734 [16:27<07:28, 11.67it/s]  

{'loss': 0.2723, 'learning_rate': 1.0151280062063614e-05, 'epoch': 0.97}


                                                   
 32%|███▏      | 2500/7734 [16:53<07:28, 11.67it/s]

{'eval_loss': 0.5053865313529968, 'eval_content_rmse': 0.5454899072647095, 'eval_wording_rmse': 0.8445197343826294, 'eval_mcrmse': 0.6950048208236694, 'eval_runtime': 25.7834, 'eval_samples_per_second': 77.918, 'eval_steps_per_second': 38.979, 'epoch': 0.97}


                                                     
 34%|███▎      | 2600/7734 [17:29<07:13, 11.83it/s]

{'eval_loss': 0.437823623418808, 'eval_content_rmse': 0.5161982774734497, 'eval_wording_rmse': 0.7805033326148987, 'eval_mcrmse': 0.6483508348464966, 'eval_runtime': 25.1962, 'eval_samples_per_second': 79.734, 'eval_steps_per_second': 39.887, 'epoch': 1.01}


                                                     
 35%|███▍      | 2700/7734 [18:04<07:26, 11.28it/s]

{'eval_loss': 0.4355430006980896, 'eval_content_rmse': 0.53743577003479, 'eval_wording_rmse': 0.7630518078804016, 'eval_mcrmse': 0.6502437591552734, 'eval_runtime': 25.2673, 'eval_samples_per_second': 79.51, 'eval_steps_per_second': 39.775, 'epoch': 1.05}


                                                     
 36%|███▌      | 2800/7734 [18:41<07:59, 10.29it/s]

{'eval_loss': 0.6037685871124268, 'eval_content_rmse': 0.5560262203216553, 'eval_wording_rmse': 0.9478247165679932, 'eval_mcrmse': 0.7519254684448242, 'eval_runtime': 26.0629, 'eval_samples_per_second': 77.083, 'eval_steps_per_second': 38.561, 'epoch': 1.09}


                                                     
 37%|███▋      | 2900/7734 [19:18<07:24, 10.87it/s]

{'eval_loss': 0.4337949752807617, 'eval_content_rmse': 0.5285306572914124, 'eval_wording_rmse': 0.766972005367279, 'eval_mcrmse': 0.6477513313293457, 'eval_runtime': 25.7392, 'eval_samples_per_second': 78.052, 'eval_steps_per_second': 39.045, 'epoch': 1.12}


 39%|███▉      | 3000/7734 [19:28<06:34, 11.99it/s]  

{'loss': 0.2078, 'learning_rate': 9.181536074476338e-06, 'epoch': 1.16}


                                                   
 39%|███▉      | 3000/7734 [19:55<06:34, 11.99it/s]

{'eval_loss': 0.48746412992477417, 'eval_content_rmse': 0.5384564995765686, 'eval_wording_rmse': 0.827642560005188, 'eval_mcrmse': 0.6830495595932007, 'eval_runtime': 26.3953, 'eval_samples_per_second': 76.112, 'eval_steps_per_second': 38.075, 'epoch': 1.16}


                                                     
 40%|████      | 3100/7734 [20:31<06:33, 11.78it/s]

{'eval_loss': 0.6165713667869568, 'eval_content_rmse': 0.6368343830108643, 'eval_wording_rmse': 0.9097168445587158, 'eval_mcrmse': 0.77327561378479, 'eval_runtime': 25.4094, 'eval_samples_per_second': 79.065, 'eval_steps_per_second': 39.552, 'epoch': 1.2}


                                                     
 41%|████▏     | 3200/7734 [21:07<06:59, 10.80it/s]

{'eval_loss': 0.46064016222953796, 'eval_content_rmse': 0.515718936920166, 'eval_wording_rmse': 0.8095149993896484, 'eval_mcrmse': 0.6626169681549072, 'eval_runtime': 25.013, 'eval_samples_per_second': 80.318, 'eval_steps_per_second': 40.179, 'epoch': 1.24}


                                                     
 43%|████▎     | 3300/7734 [21:43<06:39, 11.09it/s]

{'eval_loss': 0.4632319211959839, 'eval_content_rmse': 0.5429896712303162, 'eval_wording_rmse': 0.7947489023208618, 'eval_mcrmse': 0.6688692569732666, 'eval_runtime': 24.9001, 'eval_samples_per_second': 80.683, 'eval_steps_per_second': 40.361, 'epoch': 1.28}


                                                     
 44%|████▍     | 3400/7734 [22:19<06:19, 11.42it/s]

{'eval_loss': 0.44885748624801636, 'eval_content_rmse': 0.5116379261016846, 'eval_wording_rmse': 0.7974594235420227, 'eval_mcrmse': 0.6545486450195312, 'eval_runtime': 25.0098, 'eval_samples_per_second': 80.328, 'eval_steps_per_second': 40.184, 'epoch': 1.32}


 45%|████▌     | 3500/7734 [22:32<06:51, 10.30it/s]  

{'loss': 0.2332, 'learning_rate': 8.211792086889063e-06, 'epoch': 1.36}


                                                   
 45%|████▌     | 3500/7734 [22:57<06:51, 10.30it/s]

{'eval_loss': 0.5654014945030212, 'eval_content_rmse': 0.5551959276199341, 'eval_wording_rmse': 0.9069515466690063, 'eval_mcrmse': 0.7310737371444702, 'eval_runtime': 25.2139, 'eval_samples_per_second': 79.678, 'eval_steps_per_second': 39.859, 'epoch': 1.36}


                                                     
 47%|████▋     | 3600/7734 [23:32<05:39, 12.19it/s]

{'eval_loss': 0.4649180471897125, 'eval_content_rmse': 0.5203429460525513, 'eval_wording_rmse': 0.8118371367454529, 'eval_mcrmse': 0.6660900115966797, 'eval_runtime': 25.2158, 'eval_samples_per_second': 79.672, 'eval_steps_per_second': 39.856, 'epoch': 1.4}


                                                     
 48%|████▊     | 3700/7734 [24:09<05:36, 11.98it/s]

{'eval_loss': 0.4821898937225342, 'eval_content_rmse': 0.5129781365394592, 'eval_wording_rmse': 0.8373965620994568, 'eval_mcrmse': 0.675187349319458, 'eval_runtime': 25.2241, 'eval_samples_per_second': 79.646, 'eval_steps_per_second': 39.843, 'epoch': 1.44}


                                                     
 49%|████▉     | 3800/7734 [24:44<05:09, 12.71it/s]

{'eval_loss': 0.4168055057525635, 'eval_content_rmse': 0.5124497413635254, 'eval_wording_rmse': 0.7556493282318115, 'eval_mcrmse': 0.6340495347976685, 'eval_runtime': 25.1874, 'eval_samples_per_second': 79.762, 'eval_steps_per_second': 39.901, 'epoch': 1.47}


                                                     
 50%|█████     | 3900/7734 [25:19<05:34, 11.45it/s]

{'eval_loss': 0.4450295865535736, 'eval_content_rmse': 0.510542094707489, 'eval_wording_rmse': 0.7933510541915894, 'eval_mcrmse': 0.6519465446472168, 'eval_runtime': 25.1359, 'eval_samples_per_second': 79.926, 'eval_steps_per_second': 39.983, 'epoch': 1.51}


 52%|█████▏    | 4000/7734 [25:29<05:09, 12.06it/s]  

{'loss': 0.1964, 'learning_rate': 7.242048099301785e-06, 'epoch': 1.55}


                                                   
 52%|█████▏    | 4000/7734 [25:54<05:09, 12.06it/s]

{'eval_loss': 0.42661765217781067, 'eval_content_rmse': 0.5028747916221619, 'eval_wording_rmse': 0.7748241424560547, 'eval_mcrmse': 0.6388494968414307, 'eval_runtime': 25.0553, 'eval_samples_per_second': 80.183, 'eval_steps_per_second': 40.111, 'epoch': 1.55}


                                                     
 53%|█████▎    | 4100/7734 [26:29<04:50, 12.49it/s]

{'eval_loss': 0.4603249430656433, 'eval_content_rmse': 0.524071991443634, 'eval_wording_rmse': 0.8037403225898743, 'eval_mcrmse': 0.6639061570167542, 'eval_runtime': 25.0779, 'eval_samples_per_second': 80.11, 'eval_steps_per_second': 40.075, 'epoch': 1.59}


                                                     
 54%|█████▍    | 4200/7734 [27:04<04:43, 12.47it/s]

{'eval_loss': 0.4678765535354614, 'eval_content_rmse': 0.5317925214767456, 'eval_wording_rmse': 0.8080532550811768, 'eval_mcrmse': 0.6699228882789612, 'eval_runtime': 25.066, 'eval_samples_per_second': 80.148, 'eval_steps_per_second': 40.094, 'epoch': 1.63}


                                                     
 56%|█████▌    | 4300/7734 [27:40<05:01, 11.41it/s]

{'eval_loss': 0.46053430438041687, 'eval_content_rmse': 0.5130263566970825, 'eval_wording_rmse': 0.811093807220459, 'eval_mcrmse': 0.6620600819587708, 'eval_runtime': 25.8566, 'eval_samples_per_second': 77.698, 'eval_steps_per_second': 38.868, 'epoch': 1.67}


                                                     
 57%|█████▋    | 4400/7734 [28:14<04:40, 11.87it/s]

{'eval_loss': 0.4149192273616791, 'eval_content_rmse': 0.5134129524230957, 'eval_wording_rmse': 0.752493143081665, 'eval_mcrmse': 0.6329530477523804, 'eval_runtime': 24.4499, 'eval_samples_per_second': 82.168, 'eval_steps_per_second': 41.104, 'epoch': 1.71}


 58%|█████▊    | 4500/7734 [28:25<04:30, 11.95it/s]  

{'loss': 0.1919, 'learning_rate': 6.272304111714507e-06, 'epoch': 1.75}


                                                   
 58%|█████▊    | 4500/7734 [28:50<04:30, 11.95it/s]

{'eval_loss': 0.5071992874145508, 'eval_content_rmse': 0.5217239260673523, 'eval_wording_rmse': 0.8615116477012634, 'eval_mcrmse': 0.6916177868843079, 'eval_runtime': 24.8026, 'eval_samples_per_second': 81.0, 'eval_steps_per_second': 40.52, 'epoch': 1.75}


                                                     
 59%|█████▉    | 4600/7734 [29:24<04:08, 12.62it/s]

{'eval_loss': 0.4844059944152832, 'eval_content_rmse': 0.524286150932312, 'eval_wording_rmse': 0.8330284357070923, 'eval_mcrmse': 0.6786572933197021, 'eval_runtime': 24.6333, 'eval_samples_per_second': 81.556, 'eval_steps_per_second': 40.798, 'epoch': 1.78}


                                                     
 61%|██████    | 4700/7734 [30:00<04:28, 11.31it/s]

{'eval_loss': 0.47675633430480957, 'eval_content_rmse': 0.5305411219596863, 'eval_wording_rmse': 0.819779634475708, 'eval_mcrmse': 0.6751604080200195, 'eval_runtime': 24.7098, 'eval_samples_per_second': 81.304, 'eval_steps_per_second': 40.672, 'epoch': 1.82}


                                                     
 62%|██████▏   | 4800/7734 [30:36<04:27, 10.96it/s]

{'eval_loss': 0.4916677474975586, 'eval_content_rmse': 0.5209081172943115, 'eval_wording_rmse': 0.8437952995300293, 'eval_mcrmse': 0.6823517084121704, 'eval_runtime': 24.9007, 'eval_samples_per_second': 80.68, 'eval_steps_per_second': 40.36, 'epoch': 1.86}


                                                     
 63%|██████▎   | 4900/7734 [31:13<14:50,  3.18it/s]

{'eval_loss': 0.477533221244812, 'eval_content_rmse': 0.5203092694282532, 'eval_wording_rmse': 0.8272516131401062, 'eval_mcrmse': 0.6737804412841797, 'eval_runtime': 24.5238, 'eval_samples_per_second': 81.92, 'eval_steps_per_second': 40.981, 'epoch': 1.9}


 65%|██████▍   | 5000/7734 [31:23<03:36, 12.62it/s]  

{'loss': 0.1817, 'learning_rate': 5.302560124127231e-06, 'epoch': 1.94}


                                                   
 65%|██████▍   | 5000/7734 [31:48<03:36, 12.62it/s]

{'eval_loss': 0.5215855836868286, 'eval_content_rmse': 0.5181651711463928, 'eval_wording_rmse': 0.8801563382148743, 'eval_mcrmse': 0.6991607546806335, 'eval_runtime': 24.8911, 'eval_samples_per_second': 80.712, 'eval_steps_per_second': 40.376, 'epoch': 1.94}


                                                     
 66%|██████▌   | 5100/7734 [32:23<03:50, 11.42it/s]

{'eval_loss': 0.4660875201225281, 'eval_content_rmse': 0.5232008099555969, 'eval_wording_rmse': 0.8114405274391174, 'eval_mcrmse': 0.6673206686973572, 'eval_runtime': 24.6444, 'eval_samples_per_second': 81.519, 'eval_steps_per_second': 40.78, 'epoch': 1.98}


                                                     
 67%|██████▋   | 5200/7734 [32:58<03:35, 11.75it/s]

{'eval_loss': 0.4033120572566986, 'eval_content_rmse': 0.5131906270980835, 'eval_wording_rmse': 0.7370612621307373, 'eval_mcrmse': 0.6251259446144104, 'eval_runtime': 24.7476, 'eval_samples_per_second': 81.179, 'eval_steps_per_second': 40.61, 'epoch': 2.02}


                                                     
 69%|██████▊   | 5300/7734 [33:32<03:03, 13.27it/s]

{'eval_loss': 0.47089827060699463, 'eval_content_rmse': 0.511659562587738, 'eval_wording_rmse': 0.8246214389801025, 'eval_mcrmse': 0.6681405305862427, 'eval_runtime': 24.9613, 'eval_samples_per_second': 80.484, 'eval_steps_per_second': 40.262, 'epoch': 2.06}


                                                     
 70%|██████▉   | 5400/7734 [34:07<03:40, 10.58it/s]

{'eval_loss': 0.4540746510028839, 'eval_content_rmse': 0.5151430368423462, 'eval_wording_rmse': 0.8017333745956421, 'eval_mcrmse': 0.6584382057189941, 'eval_runtime': 24.3652, 'eval_samples_per_second': 82.454, 'eval_steps_per_second': 41.247, 'epoch': 2.09}


 71%|███████   | 5500/7734 [34:17<03:23, 10.96it/s]  

{'loss': 0.1564, 'learning_rate': 4.332816136539954e-06, 'epoch': 2.13}


                                                   
 71%|███████   | 5500/7734 [34:42<03:23, 10.96it/s]

{'eval_loss': 0.40454763174057007, 'eval_content_rmse': 0.5107076168060303, 'eval_wording_rmse': 0.7404546141624451, 'eval_mcrmse': 0.6255811452865601, 'eval_runtime': 25.0084, 'eval_samples_per_second': 80.333, 'eval_steps_per_second': 40.187, 'epoch': 2.13}


                                                     
 72%|███████▏  | 5600/7734 [35:16<02:49, 12.61it/s]

{'eval_loss': 0.4251950681209564, 'eval_content_rmse': 0.5078436732292175, 'eval_wording_rmse': 0.7697304487228394, 'eval_mcrmse': 0.638787031173706, 'eval_runtime': 24.769, 'eval_samples_per_second': 81.109, 'eval_steps_per_second': 40.575, 'epoch': 2.17}


                                                     
 74%|███████▎  | 5700/7734 [35:51<02:55, 11.56it/s]

{'eval_loss': 0.45203155279159546, 'eval_content_rmse': 0.5274225473403931, 'eval_wording_rmse': 0.791130781173706, 'eval_mcrmse': 0.6592766642570496, 'eval_runtime': 24.7784, 'eval_samples_per_second': 81.079, 'eval_steps_per_second': 40.56, 'epoch': 2.21}


                                                     
 75%|███████▍  | 5800/7734 [36:27<02:41, 12.00it/s]

{'eval_loss': 0.4106655418872833, 'eval_content_rmse': 0.5048527717590332, 'eval_wording_rmse': 0.7526320219039917, 'eval_mcrmse': 0.6287423968315125, 'eval_runtime': 24.6145, 'eval_samples_per_second': 81.619, 'eval_steps_per_second': 40.83, 'epoch': 2.25}


                                                     
 76%|███████▋  | 5900/7734 [37:02<02:38, 11.58it/s]

{'eval_loss': 0.407886266708374, 'eval_content_rmse': 0.5157666206359863, 'eval_wording_rmse': 0.7414560914039612, 'eval_mcrmse': 0.6286113262176514, 'eval_runtime': 24.8102, 'eval_samples_per_second': 80.975, 'eval_steps_per_second': 40.508, 'epoch': 2.29}


 78%|███████▊  | 6000/7734 [37:13<02:10, 13.30it/s]  

{'loss': 0.1466, 'learning_rate': 3.363072148952677e-06, 'epoch': 2.33}


                                                   
 78%|███████▊  | 6000/7734 [37:38<02:10, 13.30it/s]

{'eval_loss': 0.3918156325817108, 'eval_content_rmse': 0.5174899697303772, 'eval_wording_rmse': 0.7182170152664185, 'eval_mcrmse': 0.6178535223007202, 'eval_runtime': 24.8652, 'eval_samples_per_second': 80.796, 'eval_steps_per_second': 40.418, 'epoch': 2.33}


                                                     
 79%|███████▉  | 6100/7734 [38:12<02:11, 12.39it/s]

{'eval_loss': 0.5232292413711548, 'eval_content_rmse': 0.5172993540763855, 'eval_wording_rmse': 0.8825302124023438, 'eval_mcrmse': 0.699914813041687, 'eval_runtime': 24.4569, 'eval_samples_per_second': 82.145, 'eval_steps_per_second': 41.093, 'epoch': 2.37}


                                                     
 80%|████████  | 6200/7734 [38:47<02:26, 10.46it/s]

{'eval_loss': 0.4279170334339142, 'eval_content_rmse': 0.5048834681510925, 'eval_wording_rmse': 0.7751945853233337, 'eval_mcrmse': 0.6400390267372131, 'eval_runtime': 24.9895, 'eval_samples_per_second': 80.394, 'eval_steps_per_second': 40.217, 'epoch': 2.4}


                                                     
 81%|████████▏ | 6300/7734 [39:23<02:05, 11.40it/s]

{'eval_loss': 0.4600241184234619, 'eval_content_rmse': 0.5109565258026123, 'eval_wording_rmse': 0.8117707371711731, 'eval_mcrmse': 0.6613636016845703, 'eval_runtime': 24.8033, 'eval_samples_per_second': 80.997, 'eval_steps_per_second': 40.519, 'epoch': 2.44}


                                                     
 83%|████████▎ | 6400/7734 [39:58<02:12, 10.09it/s]

{'eval_loss': 0.4738946855068207, 'eval_content_rmse': 0.5180216431617737, 'eval_wording_rmse': 0.8242834806442261, 'eval_mcrmse': 0.6711525917053223, 'eval_runtime': 24.7272, 'eval_samples_per_second': 81.247, 'eval_steps_per_second': 40.644, 'epoch': 2.48}


 84%|████████▍ | 6500/7734 [40:08<01:56, 10.59it/s]  

{'loss': 0.1296, 'learning_rate': 2.3933281613653994e-06, 'epoch': 2.52}


                                                   
 84%|████████▍ | 6500/7734 [40:33<01:56, 10.59it/s]

{'eval_loss': 0.43644312024116516, 'eval_content_rmse': 0.5153182148933411, 'eval_wording_rmse': 0.779315710067749, 'eval_mcrmse': 0.6473169326782227, 'eval_runtime': 24.6864, 'eval_samples_per_second': 81.381, 'eval_steps_per_second': 40.711, 'epoch': 2.52}


                                                     
 85%|████████▌ | 6600/7734 [41:07<01:34, 12.00it/s]

{'eval_loss': 0.462132066488266, 'eval_content_rmse': 0.5103368163108826, 'eval_wording_rmse': 0.8147512674331665, 'eval_mcrmse': 0.6625440120697021, 'eval_runtime': 24.5637, 'eval_samples_per_second': 81.787, 'eval_steps_per_second': 40.914, 'epoch': 2.56}


                                                     
 87%|████████▋ | 6700/7734 [41:42<01:34, 10.91it/s]

{'eval_loss': 0.42005234956741333, 'eval_content_rmse': 0.510430097579956, 'eval_wording_rmse': 0.7612920999526978, 'eval_mcrmse': 0.6358610987663269, 'eval_runtime': 24.8676, 'eval_samples_per_second': 80.788, 'eval_steps_per_second': 40.414, 'epoch': 2.6}


                                                     
 88%|████████▊ | 6800/7734 [42:19<01:12, 12.80it/s]

{'eval_loss': 0.4217335879802704, 'eval_content_rmse': 0.5099517703056335, 'eval_wording_rmse': 0.7638166546821594, 'eval_mcrmse': 0.6368842124938965, 'eval_runtime': 24.6452, 'eval_samples_per_second': 81.517, 'eval_steps_per_second': 40.779, 'epoch': 2.64}


                                                     
 89%|████████▉ | 6900/7734 [42:54<01:15, 11.01it/s]

{'eval_loss': 0.42974981665611267, 'eval_content_rmse': 0.5092738270759583, 'eval_wording_rmse': 0.7746867537498474, 'eval_mcrmse': 0.6419802904129028, 'eval_runtime': 24.8705, 'eval_samples_per_second': 80.778, 'eval_steps_per_second': 40.409, 'epoch': 2.68}


 91%|█████████ | 7000/7734 [43:04<01:02, 11.74it/s]

{'loss': 0.1265, 'learning_rate': 1.4235841737781227e-06, 'epoch': 2.72}


                                                   
 91%|█████████ | 7000/7734 [43:28<01:02, 11.74it/s]

{'eval_loss': 0.4052389860153198, 'eval_content_rmse': 0.5162903070449829, 'eval_wording_rmse': 0.7375110983848572, 'eval_mcrmse': 0.6269006729125977, 'eval_runtime': 24.7307, 'eval_samples_per_second': 81.235, 'eval_steps_per_second': 40.638, 'epoch': 2.72}


                                                   
 92%|█████████▏| 7100/7734 [44:03<00:58, 10.86it/s]

{'eval_loss': 0.417884886264801, 'eval_content_rmse': 0.5043699741363525, 'eval_wording_rmse': 0.7624832391738892, 'eval_mcrmse': 0.6334266066551208, 'eval_runtime': 24.6044, 'eval_samples_per_second': 81.652, 'eval_steps_per_second': 40.846, 'epoch': 2.75}


                                                   
 93%|█████████▎| 7200/7734 [44:38<00:53, 10.05it/s]

{'eval_loss': 0.40612250566482544, 'eval_content_rmse': 0.5027146935462952, 'eval_wording_rmse': 0.7480128407478333, 'eval_mcrmse': 0.6253637671470642, 'eval_runtime': 24.7613, 'eval_samples_per_second': 81.135, 'eval_steps_per_second': 40.588, 'epoch': 2.79}


                                                   
 94%|█████████▍| 7300/7734 [45:13<00:42, 10.18it/s]

{'eval_loss': 0.4006761908531189, 'eval_content_rmse': 0.5039021372795105, 'eval_wording_rmse': 0.7398889064788818, 'eval_mcrmse': 0.6218955516815186, 'eval_runtime': 24.5516, 'eval_samples_per_second': 81.828, 'eval_steps_per_second': 40.934, 'epoch': 2.83}


                                                   
 96%|█████████▌| 7400/7734 [45:48<00:30, 11.09it/s]

{'eval_loss': 0.39427366852760315, 'eval_content_rmse': 0.505711019039154, 'eval_wording_rmse': 0.7299339771270752, 'eval_mcrmse': 0.617822527885437, 'eval_runtime': 24.4819, 'eval_samples_per_second': 82.061, 'eval_steps_per_second': 41.051, 'epoch': 2.87}


 97%|█████████▋| 7500/7734 [45:58<00:22, 10.44it/s]

{'loss': 0.1268, 'learning_rate': 4.5384018619084566e-07, 'epoch': 2.91}


                                                   
 97%|█████████▋| 7500/7734 [46:23<00:22, 10.44it/s]

{'eval_loss': 0.3957735002040863, 'eval_content_rmse': 0.5021229982376099, 'eval_wording_rmse': 0.7344521284103394, 'eval_mcrmse': 0.6182875633239746, 'eval_runtime': 24.6197, 'eval_samples_per_second': 81.601, 'eval_steps_per_second': 40.821, 'epoch': 2.91}


                                                   
 98%|█████████▊| 7600/7734 [46:58<00:12, 10.96it/s]

{'eval_loss': 0.40801802277565, 'eval_content_rmse': 0.5028083324432373, 'eval_wording_rmse': 0.7504799365997314, 'eval_mcrmse': 0.6266441345214844, 'eval_runtime': 24.6957, 'eval_samples_per_second': 81.35, 'eval_steps_per_second': 40.695, 'epoch': 2.95}


                                                   
100%|█████████▉| 7700/7734 [47:32<00:02, 11.53it/s]

{'eval_loss': 0.40379592776298523, 'eval_content_rmse': 0.5045918226242065, 'eval_wording_rmse': 0.7436255812644958, 'eval_mcrmse': 0.6241086721420288, 'eval_runtime': 24.8749, 'eval_samples_per_second': 80.764, 'eval_steps_per_second': 40.402, 'epoch': 2.99}


100%|██████████| 7734/7734 [47:37<00:00,  2.71it/s]


{'train_runtime': 2857.6261, 'train_samples_per_second': 5.413, 'train_steps_per_second': 2.706, 'train_loss': 0.22142115174444696, 'epoch': 3.0}


100%|██████████| 1005/1005 [00:24<00:00, 40.54it/s]
100%|██████████| 2/2 [00:00<00:00, 117.79it/s]


fold 2:


                                                  
  1%|▏         | 100/7755 [00:31<10:33, 12.09it/s]

{'eval_loss': 0.5363470911979675, 'eval_content_rmse': 0.6948831677436829, 'eval_wording_rmse': 0.7680047154426575, 'eval_mcrmse': 0.7314439415931702, 'eval_runtime': 22.9522, 'eval_samples_per_second': 86.963, 'eval_steps_per_second': 43.482, 'epoch': 0.04}


                                                    
  3%|▎         | 200/7755 [01:04<11:21, 11.09it/s]

{'eval_loss': 0.5515784025192261, 'eval_content_rmse': 0.5692477226257324, 'eval_wording_rmse': 0.8826736807823181, 'eval_mcrmse': 0.7259607315063477, 'eval_runtime': 22.7669, 'eval_samples_per_second': 87.671, 'eval_steps_per_second': 43.836, 'epoch': 0.08}


                                                    
  4%|▍         | 300/7755 [01:39<09:31, 13.05it/s]

{'eval_loss': 0.3839608132839203, 'eval_content_rmse': 0.5674700140953064, 'eval_wording_rmse': 0.6677567362785339, 'eval_mcrmse': 0.6176133751869202, 'eval_runtime': 22.8411, 'eval_samples_per_second': 87.386, 'eval_steps_per_second': 43.693, 'epoch': 0.12}


                                                    
  5%|▌         | 400/7755 [02:14<10:35, 11.57it/s]

{'eval_loss': 0.40948185324668884, 'eval_content_rmse': 0.5166723728179932, 'eval_wording_rmse': 0.7429759502410889, 'eval_mcrmse': 0.629824161529541, 'eval_runtime': 24.8061, 'eval_samples_per_second': 80.464, 'eval_steps_per_second': 40.232, 'epoch': 0.15}


  6%|▋         | 500/7755 [02:25<10:29, 11.53it/s]  

{'loss': 0.5712, 'learning_rate': 1.4032882011605415e-05, 'epoch': 0.19}


                                                  
  6%|▋         | 500/7755 [02:48<10:29, 11.53it/s]

{'eval_loss': 0.35833463072776794, 'eval_content_rmse': 0.4738041162490845, 'eval_wording_rmse': 0.7015546560287476, 'eval_mcrmse': 0.587679386138916, 'eval_runtime': 23.3641, 'eval_samples_per_second': 85.43, 'eval_steps_per_second': 42.715, 'epoch': 0.19}


                                                    
  8%|▊         | 600/7755 [03:22<10:52, 10.97it/s]

{'eval_loss': 0.3143882155418396, 'eval_content_rmse': 0.4851861596107483, 'eval_wording_rmse': 0.6271928548812866, 'eval_mcrmse': 0.5561895370483398, 'eval_runtime': 23.382, 'eval_samples_per_second': 85.365, 'eval_steps_per_second': 42.682, 'epoch': 0.23}


                                                    
  9%|▉         | 700/7755 [03:56<11:04, 10.62it/s]

{'eval_loss': 0.34381967782974243, 'eval_content_rmse': 0.4759591221809387, 'eval_wording_rmse': 0.6790454983711243, 'eval_mcrmse': 0.5775023102760315, 'eval_runtime': 23.3478, 'eval_samples_per_second': 85.49, 'eval_steps_per_second': 42.745, 'epoch': 0.27}


                                                    
 10%|█         | 800/7755 [04:32<10:14, 11.33it/s]

{'eval_loss': 0.4138766825199127, 'eval_content_rmse': 0.480427086353302, 'eval_wording_rmse': 0.7726205587387085, 'eval_mcrmse': 0.6265238523483276, 'eval_runtime': 23.2911, 'eval_samples_per_second': 85.698, 'eval_steps_per_second': 42.849, 'epoch': 0.31}


                                                    
 12%|█▏        | 900/7755 [05:06<13:33,  8.43it/s]

{'eval_loss': 0.38287219405174255, 'eval_content_rmse': 0.5093413591384888, 'eval_wording_rmse': 0.7115585803985596, 'eval_mcrmse': 0.6104499697685242, 'eval_runtime': 23.3559, 'eval_samples_per_second': 85.46, 'eval_steps_per_second': 42.73, 'epoch': 0.35}


 13%|█▎        | 1000/7755 [05:16<09:38, 11.67it/s] 

{'loss': 0.3289, 'learning_rate': 1.3065764023210833e-05, 'epoch': 0.39}


                                                   
 13%|█▎        | 1000/7755 [05:40<09:38, 11.67it/s]

{'eval_loss': 0.3745216131210327, 'eval_content_rmse': 0.47465577721595764, 'eval_wording_rmse': 0.7237022519111633, 'eval_mcrmse': 0.5991790294647217, 'eval_runtime': 23.3863, 'eval_samples_per_second': 85.349, 'eval_steps_per_second': 42.674, 'epoch': 0.39}


                                                     
 14%|█▍        | 1100/7755 [06:13<11:44,  9.45it/s]

{'eval_loss': 0.32350045442581177, 'eval_content_rmse': 0.5038043260574341, 'eval_wording_rmse': 0.6270425915718079, 'eval_mcrmse': 0.5654234886169434, 'eval_runtime': 23.313, 'eval_samples_per_second': 85.618, 'eval_steps_per_second': 42.809, 'epoch': 0.43}


                                                     
 15%|█▌        | 1200/7755 [06:47<09:26, 11.57it/s]

{'eval_loss': 0.34598660469055176, 'eval_content_rmse': 0.5390509963035583, 'eval_wording_rmse': 0.6335592269897461, 'eval_mcrmse': 0.5863051414489746, 'eval_runtime': 23.3066, 'eval_samples_per_second': 85.641, 'eval_steps_per_second': 42.82, 'epoch': 0.46}


                                                     
 17%|█▋        | 1300/7755 [07:21<10:07, 10.63it/s]

{'eval_loss': 0.3485870957374573, 'eval_content_rmse': 0.4662374258041382, 'eval_wording_rmse': 0.692673921585083, 'eval_mcrmse': 0.5794556736946106, 'eval_runtime': 23.5475, 'eval_samples_per_second': 84.765, 'eval_steps_per_second': 42.382, 'epoch': 0.5}


                                                     
 18%|█▊        | 1400/7755 [07:57<09:26, 11.23it/s]

{'eval_loss': 0.3183518946170807, 'eval_content_rmse': 0.552138090133667, 'eval_wording_rmse': 0.5760616660118103, 'eval_mcrmse': 0.564099907875061, 'eval_runtime': 23.9966, 'eval_samples_per_second': 83.179, 'eval_steps_per_second': 41.589, 'epoch': 0.54}


 19%|█▉        | 1500/7755 [08:08<09:28, 10.99it/s]  

{'loss': 0.3178, 'learning_rate': 1.2098646034816248e-05, 'epoch': 0.58}


                                                   
 19%|█▉        | 1500/7755 [08:32<09:28, 10.99it/s]

{'eval_loss': 0.4226398169994354, 'eval_content_rmse': 0.48993968963623047, 'eval_wording_rmse': 0.7779707312583923, 'eval_mcrmse': 0.6339552402496338, 'eval_runtime': 23.7802, 'eval_samples_per_second': 83.935, 'eval_steps_per_second': 41.968, 'epoch': 0.58}


                                                     
 21%|██        | 1600/7755 [09:07<09:47, 10.48it/s]

{'eval_loss': 0.31824997067451477, 'eval_content_rmse': 0.5678912401199341, 'eval_wording_rmse': 0.560356616973877, 'eval_mcrmse': 0.5641239285469055, 'eval_runtime': 23.6973, 'eval_samples_per_second': 84.229, 'eval_steps_per_second': 42.115, 'epoch': 0.62}


                                                     
 22%|██▏       | 1700/7755 [09:41<09:43, 10.37it/s]

{'eval_loss': 0.28722450137138367, 'eval_content_rmse': 0.46609771251678467, 'eval_wording_rmse': 0.5976636409759521, 'eval_mcrmse': 0.5318806767463684, 'eval_runtime': 23.5281, 'eval_samples_per_second': 84.835, 'eval_steps_per_second': 42.417, 'epoch': 0.66}


                                                     
 23%|██▎       | 1800/7755 [10:14<10:36,  9.36it/s]

{'eval_loss': 0.3225571811199188, 'eval_content_rmse': 0.5235757827758789, 'eval_wording_rmse': 0.6090837121009827, 'eval_mcrmse': 0.5663297176361084, 'eval_runtime': 23.3067, 'eval_samples_per_second': 85.641, 'eval_steps_per_second': 42.82, 'epoch': 0.7}


                                                     
 25%|██▍       | 1900/7755 [10:48<07:51, 12.42it/s]

{'eval_loss': 0.35113075375556946, 'eval_content_rmse': 0.5714780688285828, 'eval_wording_rmse': 0.6129229068756104, 'eval_mcrmse': 0.592200517654419, 'eval_runtime': 23.5859, 'eval_samples_per_second': 84.627, 'eval_steps_per_second': 42.313, 'epoch': 0.74}


 26%|██▌       | 2000/7755 [10:59<08:28, 11.31it/s]  

{'loss': 0.326, 'learning_rate': 1.1131528046421663e-05, 'epoch': 0.77}


                                                   
 26%|██▌       | 2000/7755 [11:22<08:28, 11.31it/s]

{'eval_loss': 0.33498963713645935, 'eval_content_rmse': 0.5065584778785706, 'eval_wording_rmse': 0.6429446339607239, 'eval_mcrmse': 0.5747515559196472, 'eval_runtime': 23.4394, 'eval_samples_per_second': 85.156, 'eval_steps_per_second': 42.578, 'epoch': 0.77}


                                                     
 27%|██▋       | 2100/7755 [11:56<08:04, 11.67it/s]

{'eval_loss': 0.3382902443408966, 'eval_content_rmse': 0.5269136428833008, 'eval_wording_rmse': 0.6316190958023071, 'eval_mcrmse': 0.579266369342804, 'eval_runtime': 23.3993, 'eval_samples_per_second': 85.302, 'eval_steps_per_second': 42.651, 'epoch': 0.81}


                                                     
 28%|██▊       | 2200/7755 [12:30<07:43, 11.98it/s]

{'eval_loss': 0.3274477422237396, 'eval_content_rmse': 0.48551493883132935, 'eval_wording_rmse': 0.6474337577819824, 'eval_mcrmse': 0.5664743185043335, 'eval_runtime': 23.585, 'eval_samples_per_second': 84.63, 'eval_steps_per_second': 42.315, 'epoch': 0.85}


                                                     
 30%|██▉       | 2300/7755 [13:03<07:31, 12.07it/s]

{'eval_loss': 0.3262585699558258, 'eval_content_rmse': 0.5427315831184387, 'eval_wording_rmse': 0.5982973575592041, 'eval_mcrmse': 0.570514440536499, 'eval_runtime': 23.4545, 'eval_samples_per_second': 85.101, 'eval_steps_per_second': 42.55, 'epoch': 0.89}


                                                     
 31%|███       | 2400/7755 [13:37<07:41, 11.61it/s]

{'eval_loss': 0.3382541239261627, 'eval_content_rmse': 0.5164539217948914, 'eval_wording_rmse': 0.6401433944702148, 'eval_mcrmse': 0.5782986879348755, 'eval_runtime': 23.4988, 'eval_samples_per_second': 84.941, 'eval_steps_per_second': 42.47, 'epoch': 0.93}


 32%|███▏      | 2500/7755 [13:48<08:32, 10.26it/s]  

{'loss': 0.287, 'learning_rate': 1.0164410058027079e-05, 'epoch': 0.97}


                                                   
 32%|███▏      | 2500/7755 [14:11<08:32, 10.26it/s]

{'eval_loss': 0.3080926239490509, 'eval_content_rmse': 0.5151339173316956, 'eval_wording_rmse': 0.5923026204109192, 'eval_mcrmse': 0.5537182688713074, 'eval_runtime': 23.3308, 'eval_samples_per_second': 85.552, 'eval_steps_per_second': 42.776, 'epoch': 0.97}


                                                     
 34%|███▎      | 2600/7755 [14:48<08:40,  9.90it/s]

{'eval_loss': 0.2975481152534485, 'eval_content_rmse': 0.48858025670051575, 'eval_wording_rmse': 0.5969802141189575, 'eval_mcrmse': 0.5427802205085754, 'eval_runtime': 23.5616, 'eval_samples_per_second': 84.714, 'eval_steps_per_second': 42.357, 'epoch': 1.01}


                                                     
 35%|███▍      | 2700/7755 [15:22<07:24, 11.38it/s]

{'eval_loss': 0.32145386934280396, 'eval_content_rmse': 0.5470613837242126, 'eval_wording_rmse': 0.5862011909484863, 'eval_mcrmse': 0.5666313171386719, 'eval_runtime': 23.4401, 'eval_samples_per_second': 85.153, 'eval_steps_per_second': 42.577, 'epoch': 1.04}


                                                     
 36%|███▌      | 2800/7755 [15:56<06:38, 12.42it/s]

{'eval_loss': 0.3021019995212555, 'eval_content_rmse': 0.484557181596756, 'eval_wording_rmse': 0.6077895164489746, 'eval_mcrmse': 0.5461733341217041, 'eval_runtime': 24.0652, 'eval_samples_per_second': 82.941, 'eval_steps_per_second': 41.471, 'epoch': 1.08}


                                                     
 37%|███▋      | 2900/7755 [16:30<06:58, 11.59it/s]

{'eval_loss': 0.2927860617637634, 'eval_content_rmse': 0.47367438673973083, 'eval_wording_rmse': 0.6010029911994934, 'eval_mcrmse': 0.5373386740684509, 'eval_runtime': 23.3903, 'eval_samples_per_second': 85.335, 'eval_steps_per_second': 42.667, 'epoch': 1.12}


 39%|███▊      | 3000/7755 [16:40<07:00, 11.30it/s]  

{'loss': 0.2329, 'learning_rate': 9.197292069632496e-06, 'epoch': 1.16}


                                                   
 39%|███▊      | 3000/7755 [17:04<07:00, 11.30it/s]

{'eval_loss': 0.2951050400733948, 'eval_content_rmse': 0.46952304244041443, 'eval_wording_rmse': 0.6080768704414368, 'eval_mcrmse': 0.5387999415397644, 'eval_runtime': 23.5274, 'eval_samples_per_second': 84.837, 'eval_steps_per_second': 42.419, 'epoch': 1.16}


                                                     
 40%|███▉      | 3100/7755 [17:37<07:27, 10.40it/s]

{'eval_loss': 0.3599271774291992, 'eval_content_rmse': 0.5215446949005127, 'eval_wording_rmse': 0.6692127585411072, 'eval_mcrmse': 0.5953787565231323, 'eval_runtime': 23.4364, 'eval_samples_per_second': 85.167, 'eval_steps_per_second': 42.583, 'epoch': 1.2}


                                                     
 41%|████▏     | 3200/7755 [18:11<06:40, 11.36it/s]

{'eval_loss': 0.2910035252571106, 'eval_content_rmse': 0.4733986556529999, 'eval_wording_rmse': 0.5982480049133301, 'eval_mcrmse': 0.5358233451843262, 'eval_runtime': 23.3335, 'eval_samples_per_second': 85.542, 'eval_steps_per_second': 42.771, 'epoch': 1.24}


                                                     
 43%|████▎     | 3300/7755 [18:50<08:31,  8.72it/s]

{'eval_loss': 0.27848494052886963, 'eval_content_rmse': 0.49555492401123047, 'eval_wording_rmse': 0.5580281019210815, 'eval_mcrmse': 0.526791512966156, 'eval_runtime': 23.605, 'eval_samples_per_second': 84.558, 'eval_steps_per_second': 42.279, 'epoch': 1.28}


                                                     
 44%|████▍     | 3400/7755 [19:23<05:48, 12.51it/s]

{'eval_loss': 0.3461683988571167, 'eval_content_rmse': 0.4615556597709656, 'eval_wording_rmse': 0.6923167705535889, 'eval_mcrmse': 0.5769362449645996, 'eval_runtime': 23.7759, 'eval_samples_per_second': 83.95, 'eval_steps_per_second': 41.975, 'epoch': 1.32}


 45%|████▌     | 3500/7755 [19:34<05:41, 12.44it/s]  

{'loss': 0.2334, 'learning_rate': 8.230174081237912e-06, 'epoch': 1.35}


                                                   
 45%|████▌     | 3500/7755 [19:57<05:41, 12.44it/s]

{'eval_loss': 0.29575783014297485, 'eval_content_rmse': 0.44513922929763794, 'eval_wording_rmse': 0.6271896958351135, 'eval_mcrmse': 0.5361644625663757, 'eval_runtime': 23.3679, 'eval_samples_per_second': 85.416, 'eval_steps_per_second': 42.708, 'epoch': 1.35}


                                                     
 46%|████▋     | 3600/7755 [20:34<24:45,  2.80it/s]

{'eval_loss': 0.28493401408195496, 'eval_content_rmse': 0.46326616406440735, 'eval_wording_rmse': 0.596030056476593, 'eval_mcrmse': 0.5296481251716614, 'eval_runtime': 23.3431, 'eval_samples_per_second': 85.507, 'eval_steps_per_second': 42.754, 'epoch': 1.39}


                                                     
 48%|████▊     | 3700/7755 [21:07<05:30, 12.25it/s]

{'eval_loss': 0.3007839024066925, 'eval_content_rmse': 0.4704475998878479, 'eval_wording_rmse': 0.6166412234306335, 'eval_mcrmse': 0.5435444116592407, 'eval_runtime': 23.4175, 'eval_samples_per_second': 85.235, 'eval_steps_per_second': 42.618, 'epoch': 1.43}


                                                     
 49%|████▉     | 3800/7755 [22:01<05:45, 11.43it/s]

{'eval_loss': 0.3113081753253937, 'eval_content_rmse': 0.5079029202461243, 'eval_wording_rmse': 0.6038630604743958, 'eval_mcrmse': 0.55588299036026, 'eval_runtime': 42.5146, 'eval_samples_per_second': 46.949, 'eval_steps_per_second': 23.474, 'epoch': 1.47}


                                                     
 50%|█████     | 3900/7755 [22:35<06:03, 10.60it/s]

{'eval_loss': 0.27640214562416077, 'eval_content_rmse': 0.4415219724178314, 'eval_wording_rmse': 0.5982161164283752, 'eval_mcrmse': 0.5198690295219421, 'eval_runtime': 23.9789, 'eval_samples_per_second': 83.24, 'eval_steps_per_second': 41.62, 'epoch': 1.51}


 52%|█████▏    | 4000/7755 [22:46<06:50,  9.14it/s]  

{'loss': 0.2582, 'learning_rate': 7.2630560928433275e-06, 'epoch': 1.55}


                                                   
 52%|█████▏    | 4000/7755 [23:09<06:50,  9.14it/s]

{'eval_loss': 0.314134418964386, 'eval_content_rmse': 0.5115872621536255, 'eval_wording_rmse': 0.6054319143295288, 'eval_mcrmse': 0.5585095882415771, 'eval_runtime': 23.0215, 'eval_samples_per_second': 86.702, 'eval_steps_per_second': 43.351, 'epoch': 1.55}


                                                     
 53%|█████▎    | 4100/7755 [23:42<04:47, 12.69it/s]

{'eval_loss': 0.2549566328525543, 'eval_content_rmse': 0.43942004442214966, 'eval_wording_rmse': 0.5628705620765686, 'eval_mcrmse': 0.5011453032493591, 'eval_runtime': 23.3885, 'eval_samples_per_second': 85.341, 'eval_steps_per_second': 42.671, 'epoch': 1.59}


                                                     
 54%|█████▍    | 4200/7755 [24:16<05:23, 10.99it/s]

{'eval_loss': 0.3083244860172272, 'eval_content_rmse': 0.5009641647338867, 'eval_wording_rmse': 0.6047177314758301, 'eval_mcrmse': 0.5528409481048584, 'eval_runtime': 23.0912, 'eval_samples_per_second': 86.44, 'eval_steps_per_second': 43.22, 'epoch': 1.62}


                                                     
 55%|█████▌    | 4300/7755 [24:50<04:35, 12.55it/s]

{'eval_loss': 0.27597755193710327, 'eval_content_rmse': 0.46783745288848877, 'eval_wording_rmse': 0.5771340131759644, 'eval_mcrmse': 0.5224857330322266, 'eval_runtime': 23.0713, 'eval_samples_per_second': 86.514, 'eval_steps_per_second': 43.257, 'epoch': 1.66}


                                                     
 57%|█████▋    | 4400/7755 [25:23<04:25, 12.62it/s]

{'eval_loss': 0.2811528742313385, 'eval_content_rmse': 0.4480772912502289, 'eval_wording_rmse': 0.6012756824493408, 'eval_mcrmse': 0.524676501750946, 'eval_runtime': 22.9778, 'eval_samples_per_second': 86.866, 'eval_steps_per_second': 43.433, 'epoch': 1.7}


 58%|█████▊    | 4500/7755 [25:33<04:51, 11.16it/s]  

{'loss': 0.1903, 'learning_rate': 6.295938104448742e-06, 'epoch': 1.74}


                                                   
 58%|█████▊    | 4500/7755 [25:56<04:51, 11.16it/s]

{'eval_loss': 0.28789880871772766, 'eval_content_rmse': 0.46022793650627136, 'eval_wording_rmse': 0.6033139824867249, 'eval_mcrmse': 0.5317709445953369, 'eval_runtime': 22.85, 'eval_samples_per_second': 87.352, 'eval_steps_per_second': 43.676, 'epoch': 1.74}


                                                     
 59%|█████▉    | 4600/7755 [26:29<04:29, 11.71it/s]

{'eval_loss': 0.2849777936935425, 'eval_content_rmse': 0.49572324752807617, 'eval_wording_rmse': 0.5693982243537903, 'eval_mcrmse': 0.5325607061386108, 'eval_runtime': 23.286, 'eval_samples_per_second': 85.717, 'eval_steps_per_second': 42.858, 'epoch': 1.78}


                                                     
 61%|██████    | 4700/7755 [27:02<04:10, 12.20it/s]

{'eval_loss': 0.31485891342163086, 'eval_content_rmse': 0.4724506437778473, 'eval_wording_rmse': 0.6375800371170044, 'eval_mcrmse': 0.5550153255462646, 'eval_runtime': 23.145, 'eval_samples_per_second': 86.239, 'eval_steps_per_second': 43.12, 'epoch': 1.82}


                                                     
 62%|██████▏   | 4800/7755 [27:36<04:22, 11.24it/s]

{'eval_loss': 0.2943800985813141, 'eval_content_rmse': 0.4946984052658081, 'eval_wording_rmse': 0.5865440964698792, 'eval_mcrmse': 0.540621280670166, 'eval_runtime': 23.265, 'eval_samples_per_second': 85.794, 'eval_steps_per_second': 42.897, 'epoch': 1.86}


                                                     
 63%|██████▎   | 4900/7755 [28:09<04:03, 11.74it/s]

{'eval_loss': 0.2773447334766388, 'eval_content_rmse': 0.5082452893257141, 'eval_wording_rmse': 0.5444044470787048, 'eval_mcrmse': 0.5263248682022095, 'eval_runtime': 22.9682, 'eval_samples_per_second': 86.903, 'eval_steps_per_second': 43.451, 'epoch': 1.9}


 64%|██████▍   | 5000/7755 [28:20<04:12, 10.90it/s]  

{'loss': 0.1989, 'learning_rate': 5.328820116054159e-06, 'epoch': 1.93}


                                                   
 64%|██████▍   | 5000/7755 [28:43<04:12, 10.90it/s]

{'eval_loss': 0.2936365604400635, 'eval_content_rmse': 0.47402074933052063, 'eval_wording_rmse': 0.6021443009376526, 'eval_mcrmse': 0.5380825400352478, 'eval_runtime': 23.1047, 'eval_samples_per_second': 86.389, 'eval_steps_per_second': 43.195, 'epoch': 1.93}


                                                     
 66%|██████▌   | 5100/7755 [29:17<03:47, 11.68it/s]

{'eval_loss': 0.28676068782806396, 'eval_content_rmse': 0.4591185748577118, 'eval_wording_rmse': 0.6022716760635376, 'eval_mcrmse': 0.5306951403617859, 'eval_runtime': 22.7448, 'eval_samples_per_second': 87.756, 'eval_steps_per_second': 43.878, 'epoch': 1.97}


                                                     
 67%|██████▋   | 5200/7755 [29:50<03:40, 11.59it/s]

{'eval_loss': 0.26497361063957214, 'eval_content_rmse': 0.4559348523616791, 'eval_wording_rmse': 0.5675127506256104, 'eval_mcrmse': 0.5117238163948059, 'eval_runtime': 22.975, 'eval_samples_per_second': 86.877, 'eval_steps_per_second': 43.439, 'epoch': 2.01}


                                                     
 68%|██████▊   | 5300/7755 [30:23<03:03, 13.41it/s]

{'eval_loss': 0.25497713685035706, 'eval_content_rmse': 0.46299439668655396, 'eval_wording_rmse': 0.5436826348304749, 'eval_mcrmse': 0.5033385157585144, 'eval_runtime': 23.2347, 'eval_samples_per_second': 85.906, 'eval_steps_per_second': 42.953, 'epoch': 2.05}


                                                     
 70%|██████▉   | 5400/7755 [30:57<03:25, 11.48it/s]

{'eval_loss': 0.27640199661254883, 'eval_content_rmse': 0.44759491086006165, 'eval_wording_rmse': 0.5936857461929321, 'eval_mcrmse': 0.5206403136253357, 'eval_runtime': 22.8678, 'eval_samples_per_second': 87.284, 'eval_steps_per_second': 43.642, 'epoch': 2.09}


 71%|███████   | 5500/7755 [31:07<03:06, 12.10it/s]  

{'loss': 0.1588, 'learning_rate': 4.361702127659574e-06, 'epoch': 2.13}


                                                   
 71%|███████   | 5500/7755 [31:30<03:06, 12.10it/s]

{'eval_loss': 0.2603631019592285, 'eval_content_rmse': 0.46310028433799744, 'eval_wording_rmse': 0.5534117221832275, 'eval_mcrmse': 0.5082560181617737, 'eval_runtime': 23.1281, 'eval_samples_per_second': 86.302, 'eval_steps_per_second': 43.151, 'epoch': 2.13}


                                                     
 72%|███████▏  | 5600/7755 [32:03<02:59, 11.99it/s]

{'eval_loss': 0.2725885212421417, 'eval_content_rmse': 0.463166743516922, 'eval_wording_rmse': 0.5750246644020081, 'eval_mcrmse': 0.5190957188606262, 'eval_runtime': 23.133, 'eval_samples_per_second': 86.284, 'eval_steps_per_second': 43.142, 'epoch': 2.17}


                                                     
 74%|███████▎  | 5700/7755 [32:37<03:16, 10.48it/s]

{'eval_loss': 0.3027075529098511, 'eval_content_rmse': 0.4553431570529938, 'eval_wording_rmse': 0.6309337615966797, 'eval_mcrmse': 0.5431384444236755, 'eval_runtime': 23.0539, 'eval_samples_per_second': 86.58, 'eval_steps_per_second': 43.29, 'epoch': 2.21}


                                                     
 75%|███████▍  | 5800/7755 [33:11<02:51, 11.40it/s]

{'eval_loss': 0.265784353017807, 'eval_content_rmse': 0.4518882930278778, 'eval_wording_rmse': 0.572158694267273, 'eval_mcrmse': 0.5120235085487366, 'eval_runtime': 24.1899, 'eval_samples_per_second': 82.514, 'eval_steps_per_second': 41.257, 'epoch': 2.24}


                                                     
 76%|███████▌  | 5900/7755 [33:44<02:49, 10.97it/s]

{'eval_loss': 0.27938827872276306, 'eval_content_rmse': 0.4585976004600525, 'eval_wording_rmse': 0.5903087854385376, 'eval_mcrmse': 0.5244531631469727, 'eval_runtime': 22.72, 'eval_samples_per_second': 87.852, 'eval_steps_per_second': 43.926, 'epoch': 2.28}


 77%|███████▋  | 6000/7755 [33:57<02:34, 11.37it/s]  

{'loss': 0.1508, 'learning_rate': 3.3945841392649903e-06, 'epoch': 2.32}


                                                   
 77%|███████▋  | 6000/7755 [34:20<02:34, 11.37it/s]

{'eval_loss': 0.2467678040266037, 'eval_content_rmse': 0.44887134432792664, 'eval_wording_rmse': 0.5404165387153625, 'eval_mcrmse': 0.4946439266204834, 'eval_runtime': 22.9136, 'eval_samples_per_second': 87.11, 'eval_steps_per_second': 43.555, 'epoch': 2.32}


                                                     
 79%|███████▊  | 6100/7755 [34:54<02:26, 11.33it/s]

{'eval_loss': 0.2629767060279846, 'eval_content_rmse': 0.4702933132648468, 'eval_wording_rmse': 0.552066445350647, 'eval_mcrmse': 0.5111798644065857, 'eval_runtime': 22.9922, 'eval_samples_per_second': 86.812, 'eval_steps_per_second': 43.406, 'epoch': 2.36}


                                                     
 80%|███████▉  | 6200/7755 [35:27<02:17, 11.32it/s]

{'eval_loss': 0.29241839051246643, 'eval_content_rmse': 0.5004003643989563, 'eval_wording_rmse': 0.57830411195755, 'eval_mcrmse': 0.5393522381782532, 'eval_runtime': 22.905, 'eval_samples_per_second': 87.142, 'eval_steps_per_second': 43.571, 'epoch': 2.4}


                                                     
 81%|████████  | 6300/7755 [36:00<02:09, 11.25it/s]

{'eval_loss': 0.2980714738368988, 'eval_content_rmse': 0.4717291593551636, 'eval_wording_rmse': 0.6112400889396667, 'eval_mcrmse': 0.5414845943450928, 'eval_runtime': 23.1161, 'eval_samples_per_second': 86.347, 'eval_steps_per_second': 43.173, 'epoch': 2.44}


                                                     
 83%|████████▎ | 6400/7755 [36:34<02:03, 10.99it/s]

{'eval_loss': 0.3586016893386841, 'eval_content_rmse': 0.45213252305984497, 'eval_wording_rmse': 0.7160865664482117, 'eval_mcrmse': 0.5841095447540283, 'eval_runtime': 23.056, 'eval_samples_per_second': 86.572, 'eval_steps_per_second': 43.286, 'epoch': 2.48}


 84%|████████▍ | 6500/7755 [36:44<01:46, 11.83it/s]  

{'loss': 0.1537, 'learning_rate': 2.4274661508704063e-06, 'epoch': 2.51}


                                                   
 84%|████████▍ | 6500/7755 [37:07<01:46, 11.83it/s]

{'eval_loss': 0.2767629325389862, 'eval_content_rmse': 0.45359668135643005, 'eval_wording_rmse': 0.5897252559661865, 'eval_mcrmse': 0.5216609835624695, 'eval_runtime': 22.7221, 'eval_samples_per_second': 87.844, 'eval_steps_per_second': 43.922, 'epoch': 2.51}


                                                     
 85%|████████▌ | 6600/7755 [37:40<01:39, 11.56it/s]

{'eval_loss': 0.29116594791412354, 'eval_content_rmse': 0.4607270061969757, 'eval_wording_rmse': 0.6083275079727173, 'eval_mcrmse': 0.5345272421836853, 'eval_runtime': 22.9776, 'eval_samples_per_second': 86.867, 'eval_steps_per_second': 43.434, 'epoch': 2.55}


                                                     
 86%|████████▋ | 6700/7755 [38:14<01:40, 10.50it/s]

{'eval_loss': 0.30421456694602966, 'eval_content_rmse': 0.4677296578884125, 'eval_wording_rmse': 0.6242260932922363, 'eval_mcrmse': 0.5459778904914856, 'eval_runtime': 23.066, 'eval_samples_per_second': 86.534, 'eval_steps_per_second': 43.267, 'epoch': 2.59}


                                                     
 88%|████████▊ | 6800/7755 [38:47<01:20, 11.84it/s]

{'eval_loss': 0.2762087881565094, 'eval_content_rmse': 0.47562745213508606, 'eval_wording_rmse': 0.5711360573768616, 'eval_mcrmse': 0.523381769657135, 'eval_runtime': 23.0985, 'eval_samples_per_second': 86.413, 'eval_steps_per_second': 43.206, 'epoch': 2.63}


                                                     
 89%|████████▉ | 6900/7755 [39:21<01:16, 11.15it/s]

{'eval_loss': 0.27555930614471436, 'eval_content_rmse': 0.4918133020401001, 'eval_wording_rmse': 0.5560921430587769, 'eval_mcrmse': 0.5239527225494385, 'eval_runtime': 22.9352, 'eval_samples_per_second': 87.028, 'eval_steps_per_second': 43.514, 'epoch': 2.67}


 90%|█████████ | 7000/7755 [39:31<01:01, 12.19it/s]

{'loss': 0.1354, 'learning_rate': 1.460348162475822e-06, 'epoch': 2.71}


                                                   
 90%|█████████ | 7000/7755 [39:54<01:01, 12.19it/s]

{'eval_loss': 0.267242968082428, 'eval_content_rmse': 0.4899439215660095, 'eval_wording_rmse': 0.5426242351531982, 'eval_mcrmse': 0.5162841081619263, 'eval_runtime': 22.7236, 'eval_samples_per_second': 87.838, 'eval_steps_per_second': 43.919, 'epoch': 2.71}


                                                   
 92%|█████████▏| 7100/7755 [40:28<00:58, 11.20it/s]

{'eval_loss': 0.26803749799728394, 'eval_content_rmse': 0.45085012912750244, 'eval_wording_rmse': 0.5768961906433105, 'eval_mcrmse': 0.5138731598854065, 'eval_runtime': 23.1161, 'eval_samples_per_second': 86.347, 'eval_steps_per_second': 43.173, 'epoch': 2.75}


                                                   
 93%|█████████▎| 7200/7755 [41:01<00:49, 11.22it/s]

{'eval_loss': 0.27529820799827576, 'eval_content_rmse': 0.4624611437320709, 'eval_wording_rmse': 0.580281138420105, 'eval_mcrmse': 0.5213711261749268, 'eval_runtime': 22.941, 'eval_samples_per_second': 87.006, 'eval_steps_per_second': 43.503, 'epoch': 2.79}


                                                   
 94%|█████████▍| 7300/7755 [41:37<00:38, 11.91it/s]

{'eval_loss': 0.27996379137039185, 'eval_content_rmse': 0.48088183999061584, 'eval_wording_rmse': 0.5733065605163574, 'eval_mcrmse': 0.5270941853523254, 'eval_runtime': 22.9563, 'eval_samples_per_second': 86.948, 'eval_steps_per_second': 43.474, 'epoch': 2.82}


                                                   
 95%|█████████▌| 7400/7755 [42:10<00:28, 12.55it/s]

{'eval_loss': 0.27960512042045593, 'eval_content_rmse': 0.4810048043727875, 'eval_wording_rmse': 0.5725772976875305, 'eval_mcrmse': 0.5267910361289978, 'eval_runtime': 22.7079, 'eval_samples_per_second': 87.899, 'eval_steps_per_second': 43.949, 'epoch': 2.86}


 97%|█████████▋| 7500/7755 [42:20<00:21, 11.72it/s]

{'loss': 0.134, 'learning_rate': 4.93230174081238e-07, 'epoch': 2.9}


                                                   
 97%|█████████▋| 7500/7755 [42:43<00:21, 11.72it/s]

{'eval_loss': 0.27894482016563416, 'eval_content_rmse': 0.47494807839393616, 'eval_wording_rmse': 0.5764670372009277, 'eval_mcrmse': 0.5257075428962708, 'eval_runtime': 22.9579, 'eval_samples_per_second': 86.942, 'eval_steps_per_second': 43.471, 'epoch': 2.9}


                                                   
 98%|█████████▊| 7600/7755 [43:18<00:13, 11.80it/s]

{'eval_loss': 0.27440524101257324, 'eval_content_rmse': 0.4775269329547882, 'eval_wording_rmse': 0.5663730502128601, 'eval_mcrmse': 0.5219500064849854, 'eval_runtime': 23.7632, 'eval_samples_per_second': 83.995, 'eval_steps_per_second': 41.998, 'epoch': 2.94}


                                                   
 99%|█████████▉| 7700/7755 [43:52<00:04, 11.95it/s]

{'eval_loss': 0.2766886353492737, 'eval_content_rmse': 0.47577133774757385, 'eval_wording_rmse': 0.5718554258346558, 'eval_mcrmse': 0.5238133668899536, 'eval_runtime': 24.1704, 'eval_samples_per_second': 82.58, 'eval_steps_per_second': 41.29, 'epoch': 2.98}


100%|██████████| 7755/7755 [43:59<00:00,  2.94it/s]


{'train_runtime': 2639.7909, 'train_samples_per_second': 5.874, 'train_steps_per_second': 2.938, 'train_loss': 0.24173875042579007, 'epoch': 3.0}


100%|██████████| 998/998 [00:23<00:00, 42.49it/s]
100%|██████████| 2/2 [00:00<00:00, 126.35it/s]


fold 3:


  1%|          | 100/9093 [00:09<12:39, 11.85it/s]
  1%|          | 100/9093 [00:20<12:39, 11.85it/s]

{'eval_loss': 0.7771609425544739, 'eval_content_rmse': 0.6885316371917725, 'eval_wording_rmse': 1.0393483638763428, 'eval_mcrmse': 0.8639400005340576, 'eval_runtime': 11.5565, 'eval_samples_per_second': 95.444, 'eval_steps_per_second': 47.766, 'epoch': 0.03}


  2%|▏         | 200/9093 [00:41<11:59, 12.35it/s]  
  2%|▏         | 200/9093 [00:42<11:59, 12.35it/s]

{'eval_loss': 0.7337146401405334, 'eval_content_rmse': 0.6567062139511108, 'eval_wording_rmse': 1.0179221630096436, 'eval_mcrmse': 0.8373141884803772, 'eval_runtime': 11.5829, 'eval_samples_per_second': 95.227, 'eval_steps_per_second': 47.657, 'epoch': 0.07}


  3%|▎         | 300/9093 [00:54<12:50, 11.41it/s]  
  3%|▎         | 300/9093 [01:05<12:50, 11.41it/s]

{'eval_loss': 0.5997489094734192, 'eval_content_rmse': 0.6073934435844421, 'eval_wording_rmse': 0.9113569259643555, 'eval_mcrmse': 0.7593752145767212, 'eval_runtime': 11.6056, 'eval_samples_per_second': 95.04, 'eval_steps_per_second': 47.563, 'epoch': 0.1}


                                                    
  4%|▍         | 400/9093 [01:29<10:44, 13.48it/s]

{'eval_loss': 0.5847964882850647, 'eval_content_rmse': 0.6155611872673035, 'eval_wording_rmse': 0.8892005681991577, 'eval_mcrmse': 0.7523808479309082, 'eval_runtime': 11.517, 'eval_samples_per_second': 95.771, 'eval_steps_per_second': 47.929, 'epoch': 0.13}


  5%|▌         | 500/9093 [01:39<12:48, 11.17it/s]  

{'loss': 0.4423, 'learning_rate': 1.4175189706367537e-05, 'epoch': 0.16}


                                                  
  5%|▌         | 500/9093 [01:51<12:48, 11.17it/s]

{'eval_loss': 0.5692111253738403, 'eval_content_rmse': 0.5603337287902832, 'eval_wording_rmse': 0.907991349697113, 'eval_mcrmse': 0.7341625690460205, 'eval_runtime': 11.7314, 'eval_samples_per_second': 94.021, 'eval_steps_per_second': 47.053, 'epoch': 0.16}


                                                    
  7%|▋         | 600/9093 [02:13<11:04, 12.77it/s]

{'eval_loss': 0.7532769441604614, 'eval_content_rmse': 0.6402177810668945, 'eval_wording_rmse': 1.0472224950790405, 'eval_mcrmse': 0.8437201380729675, 'eval_runtime': 11.8682, 'eval_samples_per_second': 92.938, 'eval_steps_per_second': 46.511, 'epoch': 0.2}


                                                    
  8%|▊         | 700/9093 [02:35<13:42, 10.21it/s]

{'eval_loss': 0.5836065411567688, 'eval_content_rmse': 0.5549944639205933, 'eval_wording_rmse': 0.9269273281097412, 'eval_mcrmse': 0.7409608960151672, 'eval_runtime': 11.5901, 'eval_samples_per_second': 95.167, 'eval_steps_per_second': 47.627, 'epoch': 0.23}


                                                    
  9%|▉         | 800/9093 [02:57<11:24, 12.12it/s]

{'eval_loss': 0.9314583539962769, 'eval_content_rmse': 0.7777508497238159, 'eval_wording_rmse': 1.1216151714324951, 'eval_mcrmse': 0.9496830105781555, 'eval_runtime': 11.7321, 'eval_samples_per_second': 94.016, 'eval_steps_per_second': 47.05, 'epoch': 0.26}


                                                    
 10%|▉         | 900/9093 [03:23<11:53, 11.49it/s]

{'eval_loss': 0.4833580553531647, 'eval_content_rmse': 0.5793153047561646, 'eval_wording_rmse': 0.7944243550300598, 'eval_mcrmse': 0.6868698596954346, 'eval_runtime': 11.4735, 'eval_samples_per_second': 96.135, 'eval_steps_per_second': 48.111, 'epoch': 0.3}


 11%|█         | 1000/9093 [03:38<12:58, 10.39it/s] 

{'loss': 0.3192, 'learning_rate': 1.3350379412735072e-05, 'epoch': 0.33}


                                                   
 11%|█         | 1000/9093 [03:51<12:58, 10.39it/s]

{'eval_loss': 0.8098862767219543, 'eval_content_rmse': 0.732462465763092, 'eval_wording_rmse': 1.04080331325531, 'eval_mcrmse': 0.8866329193115234, 'eval_runtime': 12.1339, 'eval_samples_per_second': 90.902, 'eval_steps_per_second': 45.492, 'epoch': 0.33}


                                                     
 12%|█▏        | 1100/9093 [04:13<12:51, 10.37it/s]

{'eval_loss': 0.676311194896698, 'eval_content_rmse': 0.7567845582962036, 'eval_wording_rmse': 0.8831189274787903, 'eval_mcrmse': 0.8199517726898193, 'eval_runtime': 11.5823, 'eval_samples_per_second': 95.232, 'eval_steps_per_second': 47.659, 'epoch': 0.36}


                                                     
 13%|█▎        | 1200/9093 [04:34<11:35, 11.35it/s]

{'eval_loss': 0.7778690457344055, 'eval_content_rmse': 0.7930580973625183, 'eval_wording_rmse': 0.9627030491828918, 'eval_mcrmse': 0.8778805732727051, 'eval_runtime': 11.7058, 'eval_samples_per_second': 94.226, 'eval_steps_per_second': 47.156, 'epoch': 0.4}


                                                     
 14%|█▍        | 1300/9093 [05:01<11:58, 10.84it/s]

{'eval_loss': 0.5161569118499756, 'eval_content_rmse': 0.5313117504119873, 'eval_wording_rmse': 0.8660377860069275, 'eval_mcrmse': 0.6986747980117798, 'eval_runtime': 11.469, 'eval_samples_per_second': 96.172, 'eval_steps_per_second': 48.13, 'epoch': 0.43}


                                                     
 15%|█▌        | 1400/9093 [05:23<14:14,  9.00it/s]

{'eval_loss': 0.9516938924789429, 'eval_content_rmse': 0.9478767514228821, 'eval_wording_rmse': 1.0024560689926147, 'eval_mcrmse': 0.9751664400100708, 'eval_runtime': 11.3675, 'eval_samples_per_second': 97.031, 'eval_steps_per_second': 48.559, 'epoch': 0.46}


 16%|█▋        | 1500/9093 [05:33<10:02, 12.60it/s]  

{'loss': 0.2894, 'learning_rate': 1.2525569119102606e-05, 'epoch': 0.49}


                                                   
 16%|█▋        | 1500/9093 [05:44<10:02, 12.60it/s]

{'eval_loss': 0.6093422174453735, 'eval_content_rmse': 0.719394326210022, 'eval_wording_rmse': 0.8373510837554932, 'eval_mcrmse': 0.7783727049827576, 'eval_runtime': 11.5237, 'eval_samples_per_second': 95.715, 'eval_steps_per_second': 47.901, 'epoch': 0.49}


                                                     
 18%|█▊        | 1600/9093 [06:06<10:16, 12.15it/s]

{'eval_loss': 0.6482486128807068, 'eval_content_rmse': 0.7130435109138489, 'eval_wording_rmse': 0.8877309560775757, 'eval_mcrmse': 0.8003872632980347, 'eval_runtime': 11.6141, 'eval_samples_per_second': 94.971, 'eval_steps_per_second': 47.529, 'epoch': 0.53}


                                                     
 19%|█▊        | 1700/9093 [06:29<12:12, 10.10it/s]

{'eval_loss': 0.48571640253067017, 'eval_content_rmse': 0.5871345400810242, 'eval_wording_rmse': 0.7916476726531982, 'eval_mcrmse': 0.6893911361694336, 'eval_runtime': 11.4379, 'eval_samples_per_second': 96.434, 'eval_steps_per_second': 48.261, 'epoch': 0.56}


                                                     
 20%|█▉        | 1800/9093 [06:51<11:46, 10.32it/s]

{'eval_loss': 0.6178632974624634, 'eval_content_rmse': 0.7495618462562561, 'eval_wording_rmse': 0.8209043741226196, 'eval_mcrmse': 0.7852331399917603, 'eval_runtime': 11.7272, 'eval_samples_per_second': 94.055, 'eval_steps_per_second': 47.07, 'epoch': 0.59}


                                                     
 21%|██        | 1900/9093 [07:14<11:14, 10.66it/s]

{'eval_loss': 0.5182784795761108, 'eval_content_rmse': 0.6332648396492004, 'eval_wording_rmse': 0.7972029447555542, 'eval_mcrmse': 0.7152339220046997, 'eval_runtime': 11.716, 'eval_samples_per_second': 94.145, 'eval_steps_per_second': 47.115, 'epoch': 0.63}


 22%|██▏       | 2000/9093 [07:24<10:46, 10.97it/s]  

{'loss': 0.2696, 'learning_rate': 1.1700758825470141e-05, 'epoch': 0.66}


                                                   
 22%|██▏       | 2000/9093 [07:36<10:46, 10.97it/s]

{'eval_loss': 0.5446394681930542, 'eval_content_rmse': 0.6645287871360779, 'eval_wording_rmse': 0.804786205291748, 'eval_mcrmse': 0.7346575260162354, 'eval_runtime': 11.6305, 'eval_samples_per_second': 94.836, 'eval_steps_per_second': 47.461, 'epoch': 0.66}


                                                     
 23%|██▎       | 2100/9093 [07:58<11:07, 10.48it/s]

{'eval_loss': 0.6144695281982422, 'eval_content_rmse': 0.5693565011024475, 'eval_wording_rmse': 0.9511953592300415, 'eval_mcrmse': 0.7602759599685669, 'eval_runtime': 11.6425, 'eval_samples_per_second': 94.739, 'eval_steps_per_second': 47.412, 'epoch': 0.69}


                                                     
 24%|██▍       | 2200/9093 [08:20<10:46, 10.66it/s]

{'eval_loss': 0.5118482708930969, 'eval_content_rmse': 0.7308876514434814, 'eval_wording_rmse': 0.6996427178382874, 'eval_mcrmse': 0.715265154838562, 'eval_runtime': 11.4273, 'eval_samples_per_second': 96.523, 'eval_steps_per_second': 48.305, 'epoch': 0.73}


                                                     
 25%|██▌       | 2300/9093 [08:42<09:09, 12.37it/s]

{'eval_loss': 0.5697913765907288, 'eval_content_rmse': 0.691301703453064, 'eval_wording_rmse': 0.8134397268295288, 'eval_mcrmse': 0.7523707151412964, 'eval_runtime': 11.5028, 'eval_samples_per_second': 95.89, 'eval_steps_per_second': 47.988, 'epoch': 0.76}


                                                     
 26%|██▋       | 2400/9093 [09:04<08:42, 12.81it/s]

{'eval_loss': 0.539092481136322, 'eval_content_rmse': 0.628655195236206, 'eval_wording_rmse': 0.8264246582984924, 'eval_mcrmse': 0.7275398969650269, 'eval_runtime': 11.5951, 'eval_samples_per_second': 95.126, 'eval_steps_per_second': 47.606, 'epoch': 0.79}


 27%|██▋       | 2500/9093 [09:15<10:39, 10.31it/s]  

{'loss': 0.2474, 'learning_rate': 1.0875948531837678e-05, 'epoch': 0.82}


                                                   
 27%|██▋       | 2500/9093 [09:27<10:39, 10.31it/s]

{'eval_loss': 0.6034067869186401, 'eval_content_rmse': 0.7312642931938171, 'eval_wording_rmse': 0.819796621799469, 'eval_mcrmse': 0.7755304574966431, 'eval_runtime': 11.3557, 'eval_samples_per_second': 97.132, 'eval_steps_per_second': 48.61, 'epoch': 0.82}


                                                     
 29%|██▊       | 2600/9093 [09:49<09:01, 12.00it/s]

{'eval_loss': 0.600296139717102, 'eval_content_rmse': 0.7025183439254761, 'eval_wording_rmse': 0.8408681750297546, 'eval_mcrmse': 0.771693229675293, 'eval_runtime': 11.7325, 'eval_samples_per_second': 94.012, 'eval_steps_per_second': 47.049, 'epoch': 0.86}


                                                     
 30%|██▉       | 2700/9093 [10:11<10:41,  9.97it/s]

{'eval_loss': 0.5137946605682373, 'eval_content_rmse': 0.6693219542503357, 'eval_wording_rmse': 0.7613129615783691, 'eval_mcrmse': 0.7153174877166748, 'eval_runtime': 11.6339, 'eval_samples_per_second': 94.809, 'eval_steps_per_second': 47.448, 'epoch': 0.89}


                                                     
 31%|███       | 2800/9093 [10:33<09:51, 10.63it/s]

{'eval_loss': 0.43243616819381714, 'eval_content_rmse': 0.5663167834281921, 'eval_wording_rmse': 0.7376703023910522, 'eval_mcrmse': 0.6519935131072998, 'eval_runtime': 11.4516, 'eval_samples_per_second': 96.319, 'eval_steps_per_second': 48.203, 'epoch': 0.92}


                                                     
 32%|███▏      | 2900/9093 [10:55<09:50, 10.48it/s]

{'eval_loss': 0.6763200759887695, 'eval_content_rmse': 0.7857645153999329, 'eval_wording_rmse': 0.8574466109275818, 'eval_mcrmse': 0.8216055631637573, 'eval_runtime': 11.6028, 'eval_samples_per_second': 95.063, 'eval_steps_per_second': 47.575, 'epoch': 0.96}


 33%|███▎      | 3000/9093 [11:06<09:44, 10.42it/s]  

{'loss': 0.2607, 'learning_rate': 1.0051138238205214e-05, 'epoch': 0.99}


                                                   
 33%|███▎      | 3000/9093 [11:17<09:44, 10.42it/s]

{'eval_loss': 0.47830113768577576, 'eval_content_rmse': 0.6093809604644775, 'eval_wording_rmse': 0.7650210857391357, 'eval_mcrmse': 0.6872010231018066, 'eval_runtime': 11.3396, 'eval_samples_per_second': 97.27, 'eval_steps_per_second': 48.679, 'epoch': 0.99}


                                                     
 34%|███▍      | 3100/9093 [11:39<08:05, 12.35it/s]

{'eval_loss': 0.5031214952468872, 'eval_content_rmse': 0.6963313817977905, 'eval_wording_rmse': 0.7220564484596252, 'eval_mcrmse': 0.7091939449310303, 'eval_runtime': 11.4823, 'eval_samples_per_second': 96.061, 'eval_steps_per_second': 48.074, 'epoch': 1.02}


                                                     
 35%|███▌      | 3200/9093 [12:01<08:12, 11.96it/s]

{'eval_loss': 0.5643486380577087, 'eval_content_rmse': 0.6622217297554016, 'eval_wording_rmse': 0.8307585120201111, 'eval_mcrmse': 0.7464901208877563, 'eval_runtime': 11.6009, 'eval_samples_per_second': 95.079, 'eval_steps_per_second': 47.583, 'epoch': 1.06}


                                                     
 36%|███▋      | 3300/9093 [12:23<08:30, 11.35it/s]

{'eval_loss': 0.4863215982913971, 'eval_content_rmse': 0.6547120213508606, 'eval_wording_rmse': 0.7375603914260864, 'eval_mcrmse': 0.6961362361907959, 'eval_runtime': 11.4857, 'eval_samples_per_second': 96.033, 'eval_steps_per_second': 48.06, 'epoch': 1.09}


                                                     
 37%|███▋      | 3400/9093 [12:44<07:15, 13.07it/s]

{'eval_loss': 0.49898093938827515, 'eval_content_rmse': 0.6854627728462219, 'eval_wording_rmse': 0.7267070412635803, 'eval_mcrmse': 0.7060849070549011, 'eval_runtime': 11.4915, 'eval_samples_per_second': 95.984, 'eval_steps_per_second': 48.036, 'epoch': 1.12}


 38%|███▊      | 3500/9093 [12:55<08:02, 11.59it/s]  

{'loss': 0.1864, 'learning_rate': 9.226327944572747e-06, 'epoch': 1.15}


                                                   
 38%|███▊      | 3500/9093 [13:07<08:02, 11.59it/s]

{'eval_loss': 0.46388959884643555, 'eval_content_rmse': 0.64876389503479, 'eval_wording_rmse': 0.711958110332489, 'eval_mcrmse': 0.6803610324859619, 'eval_runtime': 11.4959, 'eval_samples_per_second': 95.947, 'eval_steps_per_second': 48.017, 'epoch': 1.15}


                                                     
 40%|███▉      | 3600/9093 [13:28<10:21,  8.84it/s]

{'eval_loss': 0.6544095277786255, 'eval_content_rmse': 0.7994992136955261, 'eval_wording_rmse': 0.8183032274246216, 'eval_mcrmse': 0.8089011907577515, 'eval_runtime': 11.3292, 'eval_samples_per_second': 97.359, 'eval_steps_per_second': 48.724, 'epoch': 1.19}


                                                     
 41%|████      | 3700/9093 [13:50<08:03, 11.16it/s]

{'eval_loss': 0.4315047264099121, 'eval_content_rmse': 0.602517306804657, 'eval_wording_rmse': 0.7070940732955933, 'eval_mcrmse': 0.6548056602478027, 'eval_runtime': 11.782, 'eval_samples_per_second': 93.618, 'eval_steps_per_second': 46.851, 'epoch': 1.22}


                                                     
 42%|████▏     | 3800/9093 [14:12<07:10, 12.31it/s]

{'eval_loss': 0.4172956943511963, 'eval_content_rmse': 0.5605742931365967, 'eval_wording_rmse': 0.7213516235351562, 'eval_mcrmse': 0.6409629583358765, 'eval_runtime': 11.5128, 'eval_samples_per_second': 95.806, 'eval_steps_per_second': 47.946, 'epoch': 1.25}


                                                     
 43%|████▎     | 3900/9093 [14:39<08:13, 10.53it/s]

{'eval_loss': 0.6105090975761414, 'eval_content_rmse': 0.7900243401527405, 'eval_wording_rmse': 0.7725798487663269, 'eval_mcrmse': 0.7813020944595337, 'eval_runtime': 11.5174, 'eval_samples_per_second': 95.768, 'eval_steps_per_second': 47.927, 'epoch': 1.29}


 44%|████▍     | 4000/9093 [14:50<07:20, 11.57it/s]  

{'loss': 0.194, 'learning_rate': 8.401517650940284e-06, 'epoch': 1.32}


                                                   
 44%|████▍     | 4000/9093 [15:02<07:20, 11.57it/s]

{'eval_loss': 0.41458556056022644, 'eval_content_rmse': 0.5960577130317688, 'eval_wording_rmse': 0.6883938312530518, 'eval_mcrmse': 0.6422257423400879, 'eval_runtime': 12.3092, 'eval_samples_per_second': 89.608, 'eval_steps_per_second': 44.845, 'epoch': 1.32}


                                                     
 45%|████▌     | 4100/9093 [15:24<07:24, 11.24it/s]

{'eval_loss': 0.6476739645004272, 'eval_content_rmse': 0.768568754196167, 'eval_wording_rmse': 0.8394338488578796, 'eval_mcrmse': 0.8040013313293457, 'eval_runtime': 11.5909, 'eval_samples_per_second': 95.161, 'eval_steps_per_second': 47.623, 'epoch': 1.35}


                                                     
 46%|████▌     | 4200/9093 [15:47<07:34, 10.76it/s]

{'eval_loss': 0.5059490203857422, 'eval_content_rmse': 0.6703864932060242, 'eval_wording_rmse': 0.7499869465827942, 'eval_mcrmse': 0.7101867198944092, 'eval_runtime': 11.6564, 'eval_samples_per_second': 94.626, 'eval_steps_per_second': 47.356, 'epoch': 1.39}


                                                     
 47%|████▋     | 4300/9093 [16:11<07:07, 11.21it/s]

{'eval_loss': 0.5046272277832031, 'eval_content_rmse': 0.6464970707893372, 'eval_wording_rmse': 0.7689576148986816, 'eval_mcrmse': 0.707727313041687, 'eval_runtime': 11.6041, 'eval_samples_per_second': 95.053, 'eval_steps_per_second': 47.569, 'epoch': 1.42}


                                                     
 48%|████▊     | 4400/9093 [16:33<07:03, 11.07it/s]

{'eval_loss': 0.5480673909187317, 'eval_content_rmse': 0.7526171803474426, 'eval_wording_rmse': 0.7278061509132385, 'eval_mcrmse': 0.7402116656303406, 'eval_runtime': 11.6133, 'eval_samples_per_second': 94.977, 'eval_steps_per_second': 47.532, 'epoch': 1.45}


 49%|████▉     | 4500/9093 [16:44<06:04, 12.60it/s]  

{'loss': 0.1867, 'learning_rate': 7.5767073573078195e-06, 'epoch': 1.48}


                                                   
 49%|████▉     | 4500/9093 [16:55<06:04, 12.60it/s]

{'eval_loss': 0.4773922562599182, 'eval_content_rmse': 0.6896535754203796, 'eval_wording_rmse': 0.6922155618667603, 'eval_mcrmse': 0.6909345388412476, 'eval_runtime': 11.6422, 'eval_samples_per_second': 94.742, 'eval_steps_per_second': 47.414, 'epoch': 1.48}


                                                     
 51%|█████     | 4600/9093 [17:17<06:31, 11.47it/s]

{'eval_loss': 0.4329835772514343, 'eval_content_rmse': 0.621514618396759, 'eval_wording_rmse': 0.6925943493843079, 'eval_mcrmse': 0.6570544838905334, 'eval_runtime': 11.59, 'eval_samples_per_second': 95.169, 'eval_steps_per_second': 47.627, 'epoch': 1.52}


                                                     
 52%|█████▏    | 4700/9093 [17:39<06:36, 11.07it/s]

{'eval_loss': 0.4892730712890625, 'eval_content_rmse': 0.565468966960907, 'eval_wording_rmse': 0.8116593956947327, 'eval_mcrmse': 0.6885641813278198, 'eval_runtime': 11.6434, 'eval_samples_per_second': 94.732, 'eval_steps_per_second': 47.409, 'epoch': 1.55}


                                                     
 53%|█████▎    | 4800/9093 [18:02<06:07, 11.69it/s]

{'eval_loss': 0.570477306842804, 'eval_content_rmse': 0.6976007223129272, 'eval_wording_rmse': 0.8088933229446411, 'eval_mcrmse': 0.7532470226287842, 'eval_runtime': 11.7702, 'eval_samples_per_second': 93.711, 'eval_steps_per_second': 46.898, 'epoch': 1.58}


                                                     
 54%|█████▍    | 4900/9093 [18:23<05:57, 11.73it/s]

{'eval_loss': 0.5601500272750854, 'eval_content_rmse': 0.7328913807868958, 'eval_wording_rmse': 0.7636553645133972, 'eval_mcrmse': 0.7482733726501465, 'eval_runtime': 11.6655, 'eval_samples_per_second': 94.552, 'eval_steps_per_second': 47.319, 'epoch': 1.62}


 55%|█████▍    | 5000/9093 [18:33<05:48, 11.73it/s]  

{'loss': 0.1748, 'learning_rate': 6.751897063675355e-06, 'epoch': 1.65}


                                                   
 55%|█████▍    | 5000/9093 [18:45<05:48, 11.73it/s]

{'eval_loss': 0.4353365898132324, 'eval_content_rmse': 0.6033258438110352, 'eval_wording_rmse': 0.7118082046508789, 'eval_mcrmse': 0.657567024230957, 'eval_runtime': 11.6891, 'eval_samples_per_second': 94.362, 'eval_steps_per_second': 47.224, 'epoch': 1.65}


                                                     
 56%|█████▌    | 5100/9093 [19:07<06:03, 10.99it/s]

{'eval_loss': 0.39270851016044617, 'eval_content_rmse': 0.559158980846405, 'eval_wording_rmse': 0.6875736117362976, 'eval_mcrmse': 0.6233662962913513, 'eval_runtime': 11.7233, 'eval_samples_per_second': 94.086, 'eval_steps_per_second': 47.086, 'epoch': 1.68}


                                                     
 57%|█████▋    | 5200/9093 [19:30<05:44, 11.28it/s]

{'eval_loss': 0.5389819741249084, 'eval_content_rmse': 0.6292849183082581, 'eval_wording_rmse': 0.825811505317688, 'eval_mcrmse': 0.7275482416152954, 'eval_runtime': 11.7007, 'eval_samples_per_second': 94.268, 'eval_steps_per_second': 47.177, 'epoch': 1.72}


                                                     
 58%|█████▊    | 5300/9093 [19:52<06:09, 10.26it/s]

{'eval_loss': 0.569097638130188, 'eval_content_rmse': 0.7624866962432861, 'eval_wording_rmse': 0.7461963295936584, 'eval_mcrmse': 0.7543414831161499, 'eval_runtime': 12.1427, 'eval_samples_per_second': 90.836, 'eval_steps_per_second': 45.459, 'epoch': 1.75}


                                                     
 59%|█████▉    | 5400/9093 [20:19<05:01, 12.26it/s]

{'eval_loss': 0.6360993385314941, 'eval_content_rmse': 0.8192242980003357, 'eval_wording_rmse': 0.775286853313446, 'eval_mcrmse': 0.7972555756568909, 'eval_runtime': 11.4881, 'eval_samples_per_second': 96.012, 'eval_steps_per_second': 48.05, 'epoch': 1.78}


 60%|██████    | 5500/9093 [20:29<05:18, 11.29it/s]  

{'loss': 0.187, 'learning_rate': 5.92708677004289e-06, 'epoch': 1.81}


                                                   
 60%|██████    | 5500/9093 [20:40<05:18, 11.29it/s]

{'eval_loss': 0.5382242798805237, 'eval_content_rmse': 0.714409589767456, 'eval_wording_rmse': 0.7523747086524963, 'eval_mcrmse': 0.7333921194076538, 'eval_runtime': 11.4618, 'eval_samples_per_second': 96.233, 'eval_steps_per_second': 48.16, 'epoch': 1.81}


                                                     
 62%|██████▏   | 5600/9093 [21:07<05:03, 11.50it/s]

{'eval_loss': 0.5462802648544312, 'eval_content_rmse': 0.6407303810119629, 'eval_wording_rmse': 0.8258480429649353, 'eval_mcrmse': 0.7332892417907715, 'eval_runtime': 11.4097, 'eval_samples_per_second': 96.672, 'eval_steps_per_second': 48.38, 'epoch': 1.85}


                                                     
 63%|██████▎   | 5700/9093 [21:28<04:47, 11.79it/s]

{'eval_loss': 0.5737130045890808, 'eval_content_rmse': 0.7095988988876343, 'eval_wording_rmse': 0.8024311661720276, 'eval_mcrmse': 0.7560150623321533, 'eval_runtime': 11.3691, 'eval_samples_per_second': 97.018, 'eval_steps_per_second': 48.553, 'epoch': 1.88}


                                                     
 64%|██████▍   | 5800/9093 [21:50<04:33, 12.06it/s]

{'eval_loss': 0.5317720174789429, 'eval_content_rmse': 0.6547331809997559, 'eval_wording_rmse': 0.7967861890792847, 'eval_mcrmse': 0.7257596850395203, 'eval_runtime': 11.6571, 'eval_samples_per_second': 94.62, 'eval_steps_per_second': 47.353, 'epoch': 1.91}


                                                     
 65%|██████▍   | 5900/9093 [22:12<04:53, 10.90it/s]

{'eval_loss': 0.6765733957290649, 'eval_content_rmse': 0.7639681100845337, 'eval_wording_rmse': 0.8772110342979431, 'eval_mcrmse': 0.820589542388916, 'eval_runtime': 11.4846, 'eval_samples_per_second': 96.042, 'eval_steps_per_second': 48.065, 'epoch': 1.95}


 66%|██████▌   | 6000/9093 [22:22<04:16, 12.05it/s]  

{'loss': 0.168, 'learning_rate': 5.102276476410426e-06, 'epoch': 1.98}


                                                   
 66%|██████▌   | 6000/9093 [22:34<04:16, 12.05it/s]

{'eval_loss': 0.48214685916900635, 'eval_content_rmse': 0.6524421572685242, 'eval_wording_rmse': 0.7339022159576416, 'eval_mcrmse': 0.6931722164154053, 'eval_runtime': 11.3435, 'eval_samples_per_second': 97.237, 'eval_steps_per_second': 48.662, 'epoch': 1.98}


                                                     
 67%|██████▋   | 6100/9093 [22:56<04:27, 11.17it/s]

{'eval_loss': 0.5688298344612122, 'eval_content_rmse': 0.7402284741401672, 'eval_wording_rmse': 0.7679327726364136, 'eval_mcrmse': 0.7540806531906128, 'eval_runtime': 11.766, 'eval_samples_per_second': 93.745, 'eval_steps_per_second': 46.915, 'epoch': 2.01}


                                                     
 68%|██████▊   | 6200/9093 [23:18<04:26, 10.86it/s]

{'eval_loss': 0.4924018681049347, 'eval_content_rmse': 0.6767070293426514, 'eval_wording_rmse': 0.7258589267730713, 'eval_mcrmse': 0.7012829780578613, 'eval_runtime': 11.2762, 'eval_samples_per_second': 97.817, 'eval_steps_per_second': 48.953, 'epoch': 2.05}


                                                     
 69%|██████▉   | 6300/9093 [23:44<04:05, 11.36it/s]

{'eval_loss': 0.5489266514778137, 'eval_content_rmse': 0.7433717250823975, 'eval_wording_rmse': 0.7384117245674133, 'eval_mcrmse': 0.740891695022583, 'eval_runtime': 11.5347, 'eval_samples_per_second': 95.624, 'eval_steps_per_second': 47.855, 'epoch': 2.08}


                                                     
 70%|███████   | 6400/9093 [24:06<03:56, 11.41it/s]

{'eval_loss': 0.6133775115013123, 'eval_content_rmse': 0.7902252078056335, 'eval_wording_rmse': 0.7760791778564453, 'eval_mcrmse': 0.7831522226333618, 'eval_runtime': 11.5645, 'eval_samples_per_second': 95.378, 'eval_steps_per_second': 47.732, 'epoch': 2.11}


 71%|███████▏  | 6500/9093 [24:16<03:43, 11.62it/s]  

{'loss': 0.1254, 'learning_rate': 4.277466182777961e-06, 'epoch': 2.14}


                                                   
 71%|███████▏  | 6500/9093 [24:27<03:43, 11.62it/s]

{'eval_loss': 0.4690593183040619, 'eval_content_rmse': 0.6375967860221863, 'eval_wording_rmse': 0.729101300239563, 'eval_mcrmse': 0.6833490133285522, 'eval_runtime': 11.4631, 'eval_samples_per_second': 96.222, 'eval_steps_per_second': 48.155, 'epoch': 2.14}


                                                     
 73%|███████▎  | 6600/9093 [24:49<03:35, 11.58it/s]

{'eval_loss': 0.5613123178482056, 'eval_content_rmse': 0.7903811931610107, 'eval_wording_rmse': 0.705636203289032, 'eval_mcrmse': 0.7480087280273438, 'eval_runtime': 11.5274, 'eval_samples_per_second': 95.685, 'eval_steps_per_second': 47.886, 'epoch': 2.18}


                                                     
 74%|███████▎  | 6700/9093 [25:11<03:18, 12.03it/s]

{'eval_loss': 0.587780773639679, 'eval_content_rmse': 0.7755570411682129, 'eval_wording_rmse': 0.7576759457588196, 'eval_mcrmse': 0.7666164636611938, 'eval_runtime': 11.5553, 'eval_samples_per_second': 95.454, 'eval_steps_per_second': 47.77, 'epoch': 2.21}


                                                     
 75%|███████▍  | 6800/9093 [25:32<03:08, 12.17it/s]

{'eval_loss': 0.4590378403663635, 'eval_content_rmse': 0.6194198727607727, 'eval_wording_rmse': 0.7310233116149902, 'eval_mcrmse': 0.6752215623855591, 'eval_runtime': 11.3846, 'eval_samples_per_second': 96.885, 'eval_steps_per_second': 48.487, 'epoch': 2.24}


                                                     
 76%|███████▌  | 6900/9093 [25:54<02:52, 12.69it/s]

{'eval_loss': 0.480063796043396, 'eval_content_rmse': 0.6734977960586548, 'eval_wording_rmse': 0.7117080688476562, 'eval_mcrmse': 0.6926029324531555, 'eval_runtime': 11.6855, 'eval_samples_per_second': 94.391, 'eval_steps_per_second': 47.238, 'epoch': 2.28}


 77%|███████▋  | 7000/9093 [26:10<02:56, 11.88it/s]  

{'loss': 0.1143, 'learning_rate': 3.452655889145497e-06, 'epoch': 2.31}


                                                   
 77%|███████▋  | 7000/9093 [26:21<02:56, 11.88it/s]

{'eval_loss': 0.45110297203063965, 'eval_content_rmse': 0.6593466997146606, 'eval_wording_rmse': 0.6837161779403687, 'eval_mcrmse': 0.6715314388275146, 'eval_runtime': 11.4254, 'eval_samples_per_second': 96.54, 'eval_steps_per_second': 48.314, 'epoch': 2.31}


                                                     
 78%|███████▊  | 7100/9093 [26:43<02:47, 11.88it/s]

{'eval_loss': 0.5227640867233276, 'eval_content_rmse': 0.7489913105964661, 'eval_wording_rmse': 0.696088969707489, 'eval_mcrmse': 0.7225401401519775, 'eval_runtime': 11.5261, 'eval_samples_per_second': 95.696, 'eval_steps_per_second': 47.891, 'epoch': 2.34}


                                                     
 79%|███████▉  | 7200/9093 [27:05<02:49, 11.17it/s]

{'eval_loss': 0.48595860600471497, 'eval_content_rmse': 0.6763207316398621, 'eval_wording_rmse': 0.7172918915748596, 'eval_mcrmse': 0.6968063116073608, 'eval_runtime': 11.686, 'eval_samples_per_second': 94.387, 'eval_steps_per_second': 47.236, 'epoch': 2.38}


                                                     
 80%|████████  | 7300/9093 [27:29<02:59,  9.97it/s]

{'eval_loss': 0.46059274673461914, 'eval_content_rmse': 0.650600254535675, 'eval_wording_rmse': 0.7056237459182739, 'eval_mcrmse': 0.6781120300292969, 'eval_runtime': 12.0594, 'eval_samples_per_second': 91.464, 'eval_steps_per_second': 45.773, 'epoch': 2.41}


                                                     
 81%|████████▏ | 7400/9093 [27:53<02:33, 11.05it/s]

{'eval_loss': 0.477129191160202, 'eval_content_rmse': 0.6957151293754578, 'eval_wording_rmse': 0.6857397556304932, 'eval_mcrmse': 0.6907274723052979, 'eval_runtime': 12.0523, 'eval_samples_per_second': 91.518, 'eval_steps_per_second': 45.8, 'epoch': 2.44}


 82%|████████▏ | 7500/9093 [28:04<02:34, 10.33it/s]  

{'loss': 0.1129, 'learning_rate': 2.627845595513032e-06, 'epoch': 2.47}


                                                   
 82%|████████▏ | 7500/9093 [28:16<02:34, 10.33it/s]

{'eval_loss': 0.5558547973632812, 'eval_content_rmse': 0.7651497721672058, 'eval_wording_rmse': 0.7254347205162048, 'eval_mcrmse': 0.7452922463417053, 'eval_runtime': 12.5087, 'eval_samples_per_second': 88.179, 'eval_steps_per_second': 44.129, 'epoch': 2.47}


                                                     
 84%|████████▎ | 7600/9093 [28:41<02:29,  9.98it/s]

{'eval_loss': 0.5370160341262817, 'eval_content_rmse': 0.7304589152336121, 'eval_wording_rmse': 0.7351608276367188, 'eval_mcrmse': 0.7328099012374878, 'eval_runtime': 12.7276, 'eval_samples_per_second': 86.662, 'eval_steps_per_second': 43.37, 'epoch': 2.51}


                                                   
 85%|████████▍ | 7700/9093 [29:06<02:06, 10.97it/s]

{'eval_loss': 0.5432744026184082, 'eval_content_rmse': 0.7481691837310791, 'eval_wording_rmse': 0.7258039116859436, 'eval_mcrmse': 0.736986517906189, 'eval_runtime': 13.56, 'eval_samples_per_second': 81.342, 'eval_steps_per_second': 40.708, 'epoch': 2.54}


                                                   
 86%|████████▌ | 7800/9093 [29:31<02:08, 10.09it/s]

{'eval_loss': 0.5011699199676514, 'eval_content_rmse': 0.6934045553207397, 'eval_wording_rmse': 0.7221704125404358, 'eval_mcrmse': 0.7077875137329102, 'eval_runtime': 12.7389, 'eval_samples_per_second': 86.585, 'eval_steps_per_second': 43.332, 'epoch': 2.57}


                                                   
 87%|████████▋ | 7900/9093 [29:56<01:56, 10.21it/s]

{'eval_loss': 0.5918725728988647, 'eval_content_rmse': 0.7880927920341492, 'eval_wording_rmse': 0.7501032948493958, 'eval_mcrmse': 0.7690980434417725, 'eval_runtime': 13.3138, 'eval_samples_per_second': 82.846, 'eval_steps_per_second': 41.461, 'epoch': 2.61}


 88%|████████▊ | 8000/9093 [30:09<01:56,  9.38it/s]

{'loss': 0.1043, 'learning_rate': 1.8030353018805676e-06, 'epoch': 2.64}


                                                   
 88%|████████▊ | 8000/9093 [30:24<01:56,  9.38it/s]

{'eval_loss': 0.5745278596878052, 'eval_content_rmse': 0.7798406481742859, 'eval_wording_rmse': 0.7354617714881897, 'eval_mcrmse': 0.7576512098312378, 'eval_runtime': 15.0465, 'eval_samples_per_second': 73.306, 'eval_steps_per_second': 36.686, 'epoch': 2.64}


                                                   
 89%|████████▉ | 8100/9093 [30:50<01:34, 10.49it/s]

{'eval_loss': 0.5109315514564514, 'eval_content_rmse': 0.7263309955596924, 'eval_wording_rmse': 0.7030690908432007, 'eval_mcrmse': 0.7147000432014465, 'eval_runtime': 12.8909, 'eval_samples_per_second': 85.565, 'eval_steps_per_second': 42.821, 'epoch': 2.67}


                                                   
 90%|█████████ | 8200/9093 [31:17<01:55,  7.73it/s]

{'eval_loss': 0.5328289270401001, 'eval_content_rmse': 0.7339917421340942, 'eval_wording_rmse': 0.7258882522583008, 'eval_mcrmse': 0.7299399971961975, 'eval_runtime': 12.9242, 'eval_samples_per_second': 85.344, 'eval_steps_per_second': 42.711, 'epoch': 2.71}


                                                   
 91%|█████████▏| 8300/9093 [31:47<05:09,  2.56it/s]

{'eval_loss': 0.5296362042427063, 'eval_content_rmse': 0.7200532555580139, 'eval_wording_rmse': 0.7353881001472473, 'eval_mcrmse': 0.7277206778526306, 'eval_runtime': 13.3487, 'eval_samples_per_second': 82.63, 'eval_steps_per_second': 41.352, 'epoch': 2.74}


                                                   
 92%|█████████▏| 8400/9093 [32:12<01:04, 10.71it/s]

{'eval_loss': 0.5596352219581604, 'eval_content_rmse': 0.7613449096679688, 'eval_wording_rmse': 0.7345908880233765, 'eval_mcrmse': 0.7479678988456726, 'eval_runtime': 12.5758, 'eval_samples_per_second': 87.708, 'eval_steps_per_second': 43.894, 'epoch': 2.77}


 93%|█████████▎| 8500/9093 [32:25<01:04,  9.27it/s]

{'loss': 0.1129, 'learning_rate': 9.78225008248103e-07, 'epoch': 2.8}


                                                   
 93%|█████████▎| 8500/9093 [32:37<01:04,  9.27it/s]

{'eval_loss': 0.5108921527862549, 'eval_content_rmse': 0.6887117028236389, 'eval_wording_rmse': 0.739905595779419, 'eval_mcrmse': 0.7143086194992065, 'eval_runtime': 12.3913, 'eval_samples_per_second': 89.014, 'eval_steps_per_second': 44.547, 'epoch': 2.8}


                                                   
 95%|█████████▍| 8600/9093 [33:01<00:42, 11.57it/s]

{'eval_loss': 0.5140261650085449, 'eval_content_rmse': 0.7034050822257996, 'eval_wording_rmse': 0.7302557229995728, 'eval_mcrmse': 0.7168303728103638, 'eval_runtime': 12.5871, 'eval_samples_per_second': 87.629, 'eval_steps_per_second': 43.854, 'epoch': 2.84}


                                                   
 96%|█████████▌| 8700/9093 [33:27<00:41,  9.57it/s]

{'eval_loss': 0.5235146284103394, 'eval_content_rmse': 0.7205055952072144, 'eval_wording_rmse': 0.7265679836273193, 'eval_mcrmse': 0.7235367894172668, 'eval_runtime': 13.9667, 'eval_samples_per_second': 78.973, 'eval_steps_per_second': 39.522, 'epoch': 2.87}


                                                   
 97%|█████████▋| 8800/9093 [33:51<00:25, 11.34it/s]

{'eval_loss': 0.5223122239112854, 'eval_content_rmse': 0.7171739935874939, 'eval_wording_rmse': 0.7282072901725769, 'eval_mcrmse': 0.7226906418800354, 'eval_runtime': 12.665, 'eval_samples_per_second': 87.09, 'eval_steps_per_second': 43.585, 'epoch': 2.9}


                                                   
 98%|█████████▊| 8900/9093 [34:15<00:18, 10.42it/s]

{'eval_loss': 0.5567365884780884, 'eval_content_rmse': 0.7595764398574829, 'eval_wording_rmse': 0.7324733734130859, 'eval_mcrmse': 0.7460249066352844, 'eval_runtime': 12.5079, 'eval_samples_per_second': 88.184, 'eval_steps_per_second': 44.132, 'epoch': 2.94}


 99%|█████████▉| 9000/9093 [34:27<00:10,  9.05it/s]

{'loss': 0.1004, 'learning_rate': 1.5341471461563842e-07, 'epoch': 2.97}


                                                   
 99%|█████████▉| 9000/9093 [34:39<00:10,  9.05it/s]

{'eval_loss': 0.5359641313552856, 'eval_content_rmse': 0.7341975569725037, 'eval_wording_rmse': 0.7299876809120178, 'eval_mcrmse': 0.7320926189422607, 'eval_runtime': 12.6302, 'eval_samples_per_second': 87.33, 'eval_steps_per_second': 43.705, 'epoch': 2.97}


100%|██████████| 9093/9093 [34:50<00:00,  4.35it/s]


{'train_runtime': 2090.8739, 'train_samples_per_second': 8.698, 'train_steps_per_second': 4.349, 'train_loss': 0.19879677874360505, 'epoch': 3.0}


100%|██████████| 552/552 [00:12<00:00, 45.20it/s]
100%|██████████| 2/2 [00:00<00:00, 140.95it/s]


cv mcrmse: {'content_rmse': 0.47593582945081453, 'wording_rmse': 0.6257460187358439, 'mcrmse': 0.5508409240933292}


In [64]:
sample_submission["content"] = pred_test.values[:, 0]
sample_submission["wording"] = pred_test.values[:, 1]

In [65]:
sample_submission.to_csv("submission.csv", index=False)

In [66]:
sample_submission

Unnamed: 0,student_id,content,wording
0,000000ffffff,-1.469326,-1.329178
1,111111eeeeee,-1.470571,-1.328479
2,222222cccccc,-1.467416,-1.322249
3,333333dddddd,-1.474524,-1.330803
