
### Logs
* 30.08: use 2048 for max_len and max_position_embeddings
* 03.09: use 512 instead
* 23.09: 512 pretrained with both datasets in MLM. training with 2048 and try to infer with 512. Pretraining notebook: pretrain1_task_ver3(512_larger dataset)
* 25.09 512 pretrained with both datasets in MLM. training with 1536. Pretraining notebook: pretrain1_task_ver3(512_larger dataset)

In [4]:
!pip install transformers -q
!pip install sentencepiece -q
!pip install datasets -q
!pip install accelerate -U -q



In [5]:
from transformers import AutoTokenizer

tok = AutoTokenizer.from_pretrained("microsoft/deberta-v3-large")

from datasets import Dataset, disable_progress_bar
import pandas as pd

pdf = pd.read_csv("./input/prompts_train.csv")
sdf = pd.read_csv("./input/summaries_train.csv")

'(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /microsoft/deberta-v3-large/resolve/main/tokenizer_config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f81b28b5dc0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: cc86c86f-953a-42cb-8ee8-388388f38a0a)')' thrown while requesting HEAD https://huggingface.co/microsoft/deberta-v3-large/resolve/main/tokenizer_config.json
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
from transformers import AutoModelForSequenceClassification
#model = AutoModelForSequenceClassification.from_pretrained("microsoft/deberta-v3-large")
## use the pretrained model
# from transformers import AutoConfig
# config = AutoConfig.from_pretrained('./input/pretrain/pretrained_model/')
# model = AutoModelForSequenceClassification.from_pretrained('./input/pretrain/pretrained_model', config = config)

## Train

In [16]:
%%writefile train.py

import os
import logging
import warnings
from dataclasses import dataclass, field
from typing import Optional

from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    AutoConfig,
    set_seed,
    Trainer,
    TrainingArguments,
    HfArgumentParser,
    DataCollatorWithPadding,
)
from datasets import Dataset, disable_progress_bar
import pandas as pd
import numpy as np

warnings.simplefilter("ignore")
logging.disable(logging.ERROR)
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['WANDB_PROJECT'] = 'kaggle-commonlit-eval-student-summaries-2509'

disable_progress_bar()

@dataclass
class Config:
    model_name_or_path: Optional[str] = field(
        default="microsoft/deberta-v3-base",
        metadata={"help": "Model name or path"},
    )

    data_dir: Optional[str] = field(
        default="/kaggle/input/commonlit-evaluate-student-summaries",
        metadata={"help": "Data directory"},
    )

    max_seq_length: Optional[int] = field(
        default=1536,
        #default = 512,
        #default = 512,
        metadata={"help": "Max sequence length"},
    )

    add_prompt_question: Optional[bool] = field(
        default=False,
        metadata={"help": "Add prompt question into input"},
    )

    add_prompt_text: Optional[bool] = field(
        default=False,
        metadata={"help": "Add prompt text into input"},
    )

    fold: Optional[int] = field(
        default=0,
        metadata={"help": "Fold"},
    )

    num_proc: Optional[int] = field(
        default=4,
        metadata={"help": "Number of processes"},
    )

    dropout: Optional[float] = field(
        default=0.,
        metadata={"help": "Amount of dropout to apply"},
    )
    max_position_embeddings: Optional[int] = field(
        default=1536,
        #default=512,
        #default = 512,
        metadata={"help": "Amount of dropout to apply"},
    )


# Spell auto correction
# from spellchecker import SpellChecker

# def correct_spelling(input_text):
#     print('input_text: ', input_text)
#     # Initialize the spell checker
#     spell = SpellChecker()
#     # Split the input text into words
#     words = input_text.split()
#     # Initialize an empty list to store the corrected words
#     corrected_words = []
#     for word in words:
#         # Check if the word has any punctuation at the end
#         if word[-1].isalpha():
#             # Extract the punctuation
#             punctuation_end = ""
#         else:
#             punctuation_end = word[-1]
#             word = word[:-1]
#         # check if the word has any punctuation at the start
#         if word[0].isalpha():
#            # Extract the punctuation
#             punctuation_start = ""
#         else:
#             punctuation_start = word[0]
#             word = word[1:]
#         # Check the spelling of the word (case insensitive)
#         corrected_word = spell.correction(word.lower())
#         # Preserve the original capitalization
#         if word[0].isupper():
#             corrected_word = corrected_word.capitalize()
#         # Combine the corrected word and punctuation (if any)
#         corrected_word = punctuation_start+corrected_word+punctuation_end
#         # Append the corrected word to the list
#         corrected_words.append(corrected_word)
#     # Join the corrected words back into a single string
#     corrected_text = " ".join(corrected_words)
#     return corrected_text

def tokenize(example, tokenizer, config):
    sep = tokenizer.sep_token

    # if config.add_prompt_question:
    #     text = sep.join(
    #         [example["prompt_question"], example["prompt_text"], example["text"]]
    #     )
    # elif config.add_prompt_text:
    #     text = sep.join([example["prompt_text"], example["text"]])
    # else:
    #     text = example["text"]
    prompt = sep.join([example["prompt_title"], example["prompt_text"], example["prompt_question"]])
    labels = [example["content"], example["wording"]]

    tokenized = tokenizer(
#         prompt,
#         example["text"],
        example['text'],
        prompt,
        padding=False,
        truncation=True, # changed by Peng, turn on the truncation
        max_length=config.max_seq_length,
    )

    return {
        **tokenized,
        "labels": labels,
    }




def compute_mcrmse(eval_pred):
    """
    Calculates mean columnwise root mean squared error
    https://www.kaggle.com/competitions/commonlit-evaluate-student-summaries/overview/evaluation
    """
    preds, labels = eval_pred

    col_rmse = np.sqrt(np.mean((preds - labels) ** 2, axis=0))
    mcrmse = np.mean(col_rmse)

    return {
        "content_rmse": col_rmse[0],
        "wording_rmse": col_rmse[1],
        "mcrmse": mcrmse,
    }


def main():
    parser = HfArgumentParser((Config, TrainingArguments))

    config, training_args = parser.parse_args_into_dataclasses()

    set_seed(training_args.seed)

    if "wandb" in training_args.report_to:
        import wandb

        try:
#             from kaggle_secrets import UserSecretsClient
#             user_secrets = UserSecretsClient()
#             key = user_secrets.get_secret("wandb")

#             wandb.login(key=key)
            wandb.login()
        except:
            print("Could not log in to WandB")

    tokenizer = AutoTokenizer.from_pretrained(config.model_name_or_path)
    model_config = AutoConfig.from_pretrained(config.model_name_or_path)

    model_config.update({
        "hidden_dropout_prob": config.dropout,
        "attention_probs_dropout_prob": config.dropout,
        "num_labels": 2,
        "problem_type": "regression",
        "max_position_embeddings": config.max_position_embeddings,
        "cfg": config.__dict__,
    })

    print(model_config)
    
    # Do not use pretrained model
#     model = AutoModelForSequenceClassification.from_pretrained(
#         config.model_name_or_path, config=model_config
#     )
    
    # use pretrained model
    print('use pretrained_model')
    print('./input/pretrain/pretrained_model_2109/')
    model = AutoModelForSequenceClassification.from_pretrained('./input/pretrain/pretrained_model_2109/', config = model_config)

    #pdf = pd.read_csv(f"{config.data_dir}/prompts_train.csv")
    pdf = pd.read_csv(f"./input/prompts_train.csv")
    #sdf = pd.read_csv(f"{config.data_dir}/summaries_train.csv")
    sdf = pd.read_csv(f"./input/summaries_train.csv")

    df = pdf.merge(sdf, on="prompt_id")

    # 4 prompt ids, 4 folds
    id2fold = {
        "814d6b": 0,
        "39c16e": 1,
        "3b9047": 2,
        "ebad26": 3,
    }

    df["fold"] = df["prompt_id"].map(id2fold)

    train_ds = Dataset.from_pandas(df[df["fold"] != config.fold])
    val_ds = Dataset.from_pandas(df[df["fold"] == config.fold])

    train_ds = train_ds.map(
        tokenize,
        batched=False,
        num_proc=config.num_proc,
        fn_kwargs={"tokenizer": tokenizer, "config": config},
    )

    val_ds = val_ds.map(
        tokenize,
        batched=False,
        num_proc=config.num_proc,
        fn_kwargs={"tokenizer": tokenizer, "config": config},
    )

    data_collator = DataCollatorWithPadding(
        tokenizer=tokenizer,
        pad_to_multiple_of=16 if training_args.fp16 else None,
    )

    training_args.bf16 =True
    training_args.gradient_accumulation_steps = 1
    training_args.load_best_model_at_end = True
    training_args.greater_is_better = False
    training_args.metric_for_best_model = 'mcrmse'
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        data_collator=data_collator,
        tokenizer=tokenizer,
        compute_metrics=compute_mcrmse,
    )

    trainer.train()

    model.config.best_metric = trainer.state.best_metric
    model.config.save_pretrained(training_args.output_dir)

    trainer.log({"eval_best_mcrmse": trainer.state.best_metric})


if __name__ == "__main__":
    main()

Overwriting train.py


In [17]:
from pathlib import Path

seed = 42

fold = 0

output = f"output_fold{fold}_seed{seed}_2509"

!python train.py \
  --model_name_or_path "microsoft/deberta-v3-large" \
  --add_prompt_question True \
  --fold $fold \
  --data_dir "./" \
  --output_dir $output \
  --fp16 \
  --num_train_epochs 4 \
  --dataloader_num_workers 4 \
  --learning_rate 2e-6 \
  --weight_decay 0.01 \
  --warmup_ratio 0 \
  --optim "adamw_torch" \
  --per_device_train_batch_size 2 \
  --per_device_eval_batch_size 2 \
  --evaluation_strategy "steps" \
  --eval_steps 150 \
  --save_strategy "steps" \
  --save_steps 150 \
  --save_total_limit 1 \
  --report_to "wandb" \
  --metric_for_best_model "mcrmse" \
  --greater_is_better False \
  --logging_steps 10 \
  --log_level "error" \
  --disable_tqdm True \
  --ddp_find_unused_parameters False \
  --dropout 0 \
  --seed $seed


output_dir = Path.cwd() / output
# add json files
for json_file in output_dir.glob("checkpoint*/*token*.json"):
    json_file.rename(output_dir/json_file.name)

# model files
for model_file in output_dir.glob("checkpoint*/*model*"):
    model_file.rename(output_dir/model_file.name)

# remove optimizer states and other files
to_delete = str(list(output_dir.glob("checkpoint*"))[0])
!rm -r $to_delete

[34m[1mwandb[0m: Currently logged in as: [33mpeng_sun[0m. Use [1m`wandb login --relogin`[0m to force relogin
DebertaV2Config {
  "_name_or_path": "microsoft/deberta-v3-large",
  "attention_probs_dropout_prob": 0.0,
  "cfg": {
    "add_prompt_question": true,
    "add_prompt_text": false,
    "data_dir": "./",
    "dropout": 0.0,
    "fold": 0,
    "max_position_embeddings": 1536,
    "max_seq_length": 1536,
    "model_name_or_path": "microsoft/deberta-v3-large",
    "num_proc": 4
  },
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.0,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-07,
  "max_position_embeddings": 1536,
  "max_relative_positions": -1,
  "model_type": "deberta-v2",
  "norm_rel_ebd": "layer_norm",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 0,
  "pooler_dropout": 0,
  "pooler_hidden_act": "gelu",
  "pooler_hidden_size": 1024,
  "pos_att_type": [
    "p2c",
    "c2p"
  ],
  "po

{'loss': 0.3841, 'learning_rate': 1.8771032662487628e-06, 'epoch': 0.25}
{'eval_loss': 0.6345586776733398, 'eval_content_rmse': 0.5860022902488708, 'eval_wording_rmse': 0.9621424078941345, 'eval_mcrmse': 0.7740723490715027, 'eval_runtime': 57.6224, 'eval_samples_per_second': 19.142, 'eval_steps_per_second': 9.58, 'epoch': 0.25}
{'loss': 0.1535, 'learning_rate': 1.8754536456614978e-06, 'epoch': 0.25}
{'loss': 0.2464, 'learning_rate': 1.8738040250742327e-06, 'epoch': 0.25}
{'loss': 0.2815, 'learning_rate': 1.872154404486968e-06, 'epoch': 0.26}
{'loss': 0.3091, 'learning_rate': 1.870504783899703e-06, 'epoch': 0.26}
{'loss': 0.3534, 'learning_rate': 1.8688551633124382e-06, 'epoch': 0.26}
{'loss': 0.2734, 'learning_rate': 1.8672055427251732e-06, 'epoch': 0.27}
{'loss': 0.4121, 'learning_rate': 1.8655559221379081e-06, 'epoch': 0.27}
{'loss': 0.2051, 'learning_rate': 1.8639063015506434e-06, 'epoch': 0.27}
{'loss': 0.2821, 'learning_rate': 1.8622566809633784e-06, 'epoch': 0.28}
{'loss': 0.3061

{'loss': 0.2375, 'learning_rate': 1.726987792807654e-06, 'epoch': 0.55}
{'loss': 0.1951, 'learning_rate': 1.7253381722203893e-06, 'epoch': 0.55}
{'loss': 0.2626, 'learning_rate': 1.7236885516331243e-06, 'epoch': 0.55}
{'loss': 0.2552, 'learning_rate': 1.7220389310458595e-06, 'epoch': 0.56}
{'loss': 0.3176, 'learning_rate': 1.7203893104585945e-06, 'epoch': 0.56}
{'loss': 0.2284, 'learning_rate': 1.7187396898713295e-06, 'epoch': 0.56}
{'loss': 0.2081, 'learning_rate': 1.7170900692840647e-06, 'epoch': 0.57}
{'loss': 0.2948, 'learning_rate': 1.7154404486967997e-06, 'epoch': 0.57}
{'loss': 0.2609, 'learning_rate': 1.713790828109535e-06, 'epoch': 0.57}
{'loss': 0.2437, 'learning_rate': 1.7121412075222699e-06, 'epoch': 0.58}
{'loss': 0.2658, 'learning_rate': 1.7104915869350049e-06, 'epoch': 0.58}
{'loss': 0.3114, 'learning_rate': 1.70884196634774e-06, 'epoch': 0.58}
{'loss': 0.2397, 'learning_rate': 1.707192345760475e-06, 'epoch': 0.59}
{'loss': 0.3287, 'learning_rate': 1.70554272517321e-06, 

{'loss': 0.1816, 'learning_rate': 1.5752226987792806e-06, 'epoch': 0.85}
{'loss': 0.1133, 'learning_rate': 1.5735730781920158e-06, 'epoch': 0.85}
{'loss': 0.3828, 'learning_rate': 1.5719234576047508e-06, 'epoch': 0.86}
{'loss': 0.1572, 'learning_rate': 1.570273837017486e-06, 'epoch': 0.86}
{'loss': 0.2781, 'learning_rate': 1.568624216430221e-06, 'epoch': 0.86}
{'loss': 0.3286, 'learning_rate': 1.566974595842956e-06, 'epoch': 0.87}
{'loss': 0.2286, 'learning_rate': 1.5653249752556912e-06, 'epoch': 0.87}
{'loss': 0.2455, 'learning_rate': 1.5636753546684262e-06, 'epoch': 0.87}
{'loss': 0.238, 'learning_rate': 1.5620257340811614e-06, 'epoch': 0.88}
{'loss': 0.2429, 'learning_rate': 1.5603761134938964e-06, 'epoch': 0.88}
{'loss': 0.166, 'learning_rate': 1.5587264929066314e-06, 'epoch': 0.88}
{'loss': 0.212, 'learning_rate': 1.5570768723193666e-06, 'epoch': 0.89}
{'loss': 0.3021, 'learning_rate': 1.5554272517321016e-06, 'epoch': 0.89}
{'eval_loss': 0.6099691390991211, 'eval_content_rmse': 0.

{'loss': 0.2766, 'learning_rate': 1.4234576047509072e-06, 'epoch': 1.15}
{'loss': 0.2268, 'learning_rate': 1.4218079841636424e-06, 'epoch': 1.16}
{'loss': 0.2809, 'learning_rate': 1.4201583635763774e-06, 'epoch': 1.16}
{'loss': 0.1955, 'learning_rate': 1.4185087429891126e-06, 'epoch': 1.16}
{'loss': 0.1037, 'learning_rate': 1.4168591224018476e-06, 'epoch': 1.17}
{'loss': 0.2469, 'learning_rate': 1.4152095018145826e-06, 'epoch': 1.17}
{'loss': 0.1427, 'learning_rate': 1.4135598812273178e-06, 'epoch': 1.17}
{'loss': 0.1286, 'learning_rate': 1.4119102606400528e-06, 'epoch': 1.18}
{'loss': 0.1953, 'learning_rate': 1.410260640052788e-06, 'epoch': 1.18}
{'loss': 0.1134, 'learning_rate': 1.408611019465523e-06, 'epoch': 1.18}
{'loss': 0.324, 'learning_rate': 1.406961398878258e-06, 'epoch': 1.19}
{'eval_loss': 0.5557912588119507, 'eval_content_rmse': 0.6532548069953918, 'eval_wording_rmse': 0.827551007270813, 'eval_mcrmse': 0.7404029369354248, 'eval_runtime': 57.4154, 'eval_samples_per_second':

{'loss': 0.2864, 'learning_rate': 1.271692510722534e-06, 'epoch': 1.46}
{'loss': 0.1879, 'learning_rate': 1.270042890135269e-06, 'epoch': 1.46}
{'loss': 0.2101, 'learning_rate': 1.2683932695480039e-06, 'epoch': 1.46}
{'loss': 0.1365, 'learning_rate': 1.266743648960739e-06, 'epoch': 1.47}
{'loss': 0.1812, 'learning_rate': 1.265094028373474e-06, 'epoch': 1.47}
{'loss': 0.1322, 'learning_rate': 1.2634444077862093e-06, 'epoch': 1.47}
{'loss': 0.1737, 'learning_rate': 1.2617947871989443e-06, 'epoch': 1.48}
{'loss': 0.2037, 'learning_rate': 1.2601451666116793e-06, 'epoch': 1.48}
{'loss': 0.1781, 'learning_rate': 1.2584955460244145e-06, 'epoch': 1.48}
{'eval_loss': 0.5755925178527832, 'eval_content_rmse': 0.6235155463218689, 'eval_wording_rmse': 0.8731629848480225, 'eval_mcrmse': 0.7483392953872681, 'eval_runtime': 57.3852, 'eval_samples_per_second': 19.221, 'eval_steps_per_second': 9.619, 'epoch': 1.48}
{'loss': 0.2484, 'learning_rate': 1.2568459254371493e-06, 'epoch': 1.49}
{'loss': 0.1615,

{'loss': 0.1993, 'learning_rate': 1.1199274166941604e-06, 'epoch': 1.76}
{'loss': 0.1296, 'learning_rate': 1.1182777961068954e-06, 'epoch': 1.77}
{'loss': 0.1163, 'learning_rate': 1.1166281755196304e-06, 'epoch': 1.77}
{'loss': 0.3441, 'learning_rate': 1.1149785549323656e-06, 'epoch': 1.77}
{'loss': 0.1924, 'learning_rate': 1.1133289343451006e-06, 'epoch': 1.77}
{'loss': 0.163, 'learning_rate': 1.1116793137578358e-06, 'epoch': 1.78}
{'loss': 0.1489, 'learning_rate': 1.1100296931705708e-06, 'epoch': 1.78}
{'eval_loss': 0.5156794786453247, 'eval_content_rmse': 0.6098341941833496, 'eval_wording_rmse': 0.8120712637901306, 'eval_mcrmse': 0.7109527587890625, 'eval_runtime': 57.3613, 'eval_samples_per_second': 19.229, 'eval_steps_per_second': 9.623, 'epoch': 1.78}
{'loss': 0.2314, 'learning_rate': 1.1083800725833056e-06, 'epoch': 1.78}
{'loss': 0.1829, 'learning_rate': 1.1067304519960408e-06, 'epoch': 1.79}
{'loss': 0.1836, 'learning_rate': 1.1050808314087758e-06, 'epoch': 1.79}
{'loss': 0.19

{'loss': 0.1234, 'learning_rate': 9.681623226657868e-07, 'epoch': 2.07}
{'loss': 0.1731, 'learning_rate': 9.66512702078522e-07, 'epoch': 2.07}
{'loss': 0.1409, 'learning_rate': 9.64863081491257e-07, 'epoch': 2.07}
{'loss': 0.226, 'learning_rate': 9.63213460903992e-07, 'epoch': 2.08}
{'loss': 0.1087, 'learning_rate': 9.615638403167272e-07, 'epoch': 2.08}
{'eval_loss': 0.41054874658584595, 'eval_content_rmse': 0.5058389902114868, 'eval_wording_rmse': 0.7518141865730286, 'eval_mcrmse': 0.6288266181945801, 'eval_runtime': 57.2722, 'eval_samples_per_second': 19.259, 'eval_steps_per_second': 9.638, 'epoch': 2.08}
{'loss': 0.1026, 'learning_rate': 9.599142197294622e-07, 'epoch': 2.08}
{'loss': 0.1845, 'learning_rate': 9.582645991421974e-07, 'epoch': 2.09}
{'loss': 0.1566, 'learning_rate': 9.566149785549324e-07, 'epoch': 2.09}
{'loss': 0.168, 'learning_rate': 9.549653579676674e-07, 'epoch': 2.09}
{'loss': 0.0948, 'learning_rate': 9.533157373804025e-07, 'epoch': 2.1}
{'loss': 0.2095, 'learning_

{'loss': 0.1282, 'learning_rate': 8.14912570108875e-07, 'epoch': 2.37}
{'loss': 0.203, 'learning_rate': 8.132629495216101e-07, 'epoch': 2.38}
{'eval_loss': 0.38668930530548096, 'eval_content_rmse': 0.5007444024085999, 'eval_wording_rmse': 0.7229341864585876, 'eval_mcrmse': 0.6118392944335938, 'eval_runtime': 57.5648, 'eval_samples_per_second': 19.161, 'eval_steps_per_second': 9.589, 'epoch': 2.38}
{'loss': 0.1806, 'learning_rate': 8.11613328934345e-07, 'epoch': 2.38}
{'loss': 0.2174, 'learning_rate': 8.099637083470802e-07, 'epoch': 2.38}
{'loss': 0.0848, 'learning_rate': 8.083140877598153e-07, 'epoch': 2.39}
{'loss': 0.2492, 'learning_rate': 8.066644671725504e-07, 'epoch': 2.39}
{'loss': 0.1453, 'learning_rate': 8.050148465852854e-07, 'epoch': 2.39}
{'loss': 0.1129, 'learning_rate': 8.033652259980203e-07, 'epoch': 2.4}
{'loss': 0.126, 'learning_rate': 8.017156054107554e-07, 'epoch': 2.4}
{'loss': 0.1334, 'learning_rate': 8.000659848234905e-07, 'epoch': 2.4}
{'loss': 0.1857, 'learning_r

{'loss': 0.1777, 'learning_rate': 6.631474760805015e-07, 'epoch': 2.68}
{'loss': 0.1419, 'learning_rate': 6.614978554932366e-07, 'epoch': 2.68}
{'loss': 0.1679, 'learning_rate': 6.598482349059716e-07, 'epoch': 2.68}
{'loss': 0.233, 'learning_rate': 6.581986143187067e-07, 'epoch': 2.69}
{'loss': 0.1955, 'learning_rate': 6.565489937314418e-07, 'epoch': 2.69}
{'loss': 0.1323, 'learning_rate': 6.548993731441768e-07, 'epoch': 2.69}
{'loss': 0.1174, 'learning_rate': 6.532497525569119e-07, 'epoch': 2.7}
{'loss': 0.154, 'learning_rate': 6.516001319696469e-07, 'epoch': 2.7}
{'loss': 0.1279, 'learning_rate': 6.49950511382382e-07, 'epoch': 2.7}
{'loss': 0.204, 'learning_rate': 6.483008907951171e-07, 'epoch': 2.71}
{'loss': 0.2704, 'learning_rate': 6.466512702078522e-07, 'epoch': 2.71}
{'loss': 0.1069, 'learning_rate': 6.450016496205872e-07, 'epoch': 2.71}
{'loss': 0.1502, 'learning_rate': 6.433520290333223e-07, 'epoch': 2.72}
{'loss': 0.1449, 'learning_rate': 6.417024084460574e-07, 'epoch': 2.72}

{'loss': 0.1395, 'learning_rate': 5.098977235235895e-07, 'epoch': 2.98}
{'loss': 0.0872, 'learning_rate': 5.082481029363246e-07, 'epoch': 2.99}
{'loss': 0.1152, 'learning_rate': 5.065984823490597e-07, 'epoch': 2.99}
{'loss': 0.1162, 'learning_rate': 5.049488617617948e-07, 'epoch': 2.99}
{'loss': 0.2095, 'learning_rate': 5.032992411745298e-07, 'epoch': 3.0}
{'loss': 0.1504, 'learning_rate': 5.016496205872649e-07, 'epoch': 3.0}
{'loss': 0.1426, 'learning_rate': 5e-07, 'epoch': 3.0}
{'loss': 0.1585, 'learning_rate': 4.983503794127351e-07, 'epoch': 3.01}
{'loss': 0.1111, 'learning_rate': 4.967007588254702e-07, 'epoch': 3.01}
{'loss': 0.0895, 'learning_rate': 4.950511382382052e-07, 'epoch': 3.01}
{'loss': 0.1366, 'learning_rate': 4.934015176509403e-07, 'epoch': 3.02}
{'loss': 0.1259, 'learning_rate': 4.917518970636753e-07, 'epoch': 3.02}
{'eval_loss': 0.3869900107383728, 'eval_content_rmse': 0.4939224421977997, 'eval_wording_rmse': 0.7280250191688538, 'eval_mcrmse': 0.6109737157821655, 'eva

{'loss': 0.0919, 'learning_rate': 3.564830089079511e-07, 'epoch': 3.29}
{'loss': 0.1347, 'learning_rate': 3.548333883206862e-07, 'epoch': 3.29}
{'loss': 0.1241, 'learning_rate': 3.531837677334213e-07, 'epoch': 3.3}
{'loss': 0.0954, 'learning_rate': 3.5153414714615636e-07, 'epoch': 3.3}
{'loss': 0.1, 'learning_rate': 3.4988452655889146e-07, 'epoch': 3.3}
{'loss': 0.1872, 'learning_rate': 3.482349059716265e-07, 'epoch': 3.31}
{'loss': 0.1268, 'learning_rate': 3.465852853843616e-07, 'epoch': 3.31}
{'loss': 0.175, 'learning_rate': 3.449356647970966e-07, 'epoch': 3.31}
{'loss': 0.1235, 'learning_rate': 3.432860442098317e-07, 'epoch': 3.32}
{'eval_loss': 0.37854138016700745, 'eval_content_rmse': 0.48612335324287415, 'eval_wording_rmse': 0.7216421365737915, 'eval_mcrmse': 0.6038827300071716, 'eval_runtime': 57.7278, 'eval_samples_per_second': 19.107, 'eval_steps_per_second': 9.562, 'epoch': 3.32}
{'loss': 0.1165, 'learning_rate': 3.4163642362256676e-07, 'epoch': 3.32}
{'loss': 0.1816, 'learni

{'loss': 0.0823, 'learning_rate': 2.0488287693830418e-07, 'epoch': 3.59}
{'loss': 0.1275, 'learning_rate': 2.0323325635103923e-07, 'epoch': 3.6}
{'loss': 0.1967, 'learning_rate': 2.0158363576377433e-07, 'epoch': 3.6}
{'loss': 0.2151, 'learning_rate': 1.999340151765094e-07, 'epoch': 3.6}
{'loss': 0.2043, 'learning_rate': 1.9828439458924446e-07, 'epoch': 3.61}
{'loss': 0.1638, 'learning_rate': 1.9663477400197953e-07, 'epoch': 3.61}
{'loss': 0.1183, 'learning_rate': 1.949851534147146e-07, 'epoch': 3.61}
{'eval_loss': 0.40984901785850525, 'eval_content_rmse': 0.5337173342704773, 'eval_wording_rmse': 0.7313302159309387, 'eval_mcrmse': 0.632523775100708, 'eval_runtime': 57.504, 'eval_samples_per_second': 19.181, 'eval_steps_per_second': 9.599, 'epoch': 3.61}
{'loss': 0.1248, 'learning_rate': 1.9333553282744968e-07, 'epoch': 3.62}
{'loss': 0.0883, 'learning_rate': 1.9168591224018473e-07, 'epoch': 3.62}
{'loss': 0.0787, 'learning_rate': 1.9003629165291983e-07, 'epoch': 3.62}
{'loss': 0.0775, '

{'loss': 0.0834, 'learning_rate': 5.1468162322665786e-08, 'epoch': 3.9}
{'loss': 0.1503, 'learning_rate': 4.9818541735400854e-08, 'epoch': 3.9}
{'loss': 0.25, 'learning_rate': 4.816892114813593e-08, 'epoch': 3.91}
{'loss': 0.149, 'learning_rate': 4.6519300560871e-08, 'epoch': 3.91}
{'eval_loss': 0.4091701805591583, 'eval_content_rmse': 0.5153412222862244, 'eval_wording_rmse': 0.7434805631637573, 'eval_mcrmse': 0.6294108629226685, 'eval_runtime': 57.6854, 'eval_samples_per_second': 19.121, 'eval_steps_per_second': 9.569, 'epoch': 3.91}
{'loss': 0.1189, 'learning_rate': 4.486967997360607e-08, 'epoch': 3.91}
{'loss': 0.0762, 'learning_rate': 4.3220059386341145e-08, 'epoch': 3.92}
{'loss': 0.1256, 'learning_rate': 4.1570438799076207e-08, 'epoch': 3.92}
{'loss': 0.0853, 'learning_rate': 3.992081821181128e-08, 'epoch': 3.92}
{'loss': 0.0895, 'learning_rate': 3.827119762454635e-08, 'epoch': 3.93}
{'loss': 0.0643, 'learning_rate': 3.6621577037281423e-08, 'epoch': 3.93}
{'loss': 0.157, 'learnin

In [18]:
from pathlib import Path
import json

seed = 42
scores = []
for fold in range(0,1):
    output = f"./output_fold{fold}_seed{seed}_2509"
    p = Path(output) / "config.json"

    with open(p) as fp:
        cfg = json.load(fp)


    print(f"MCRMSE for fold {fold}, seed {seed}: {cfg['best_metric']:.4f}")
    scores.append(cfg['best_metric'])
print(sum(scores)/len(scores))

MCRMSE for fold 0, seed 42: 0.5715
0.5715427994728088


#df.loc[0, 'prompt_text']

In [18]:
# from pathlib import Path
# import json

# seed = 42
# scores = []
# for fold in range(4):
#     output = f"./output_fold{fold}_seed{seed}_3008"
#     p = Path(output) / "config.json"

#     with open(p) as fp:
#         cfg = json.load(fp)


#     print(f"MCRMSE for fold {fold}, seed {seed}: {cfg['best_metric']:.4f}")
#     scores.append(cfg['best_metric'])
# print(sum(scores)/len(scores))