In [None]:
!pip install transformers datasets stanza

Collecting datasets
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting stanza
  Downloading stanza-1.10.1-py3-none-any.whl.metadata (13 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Collecting emoji (from stanza)
  Downloading emoji-2.14.1-py3-none-any.whl.metadata (5.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.3.0->stanza)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>

In [None]:
!unzip lessons_final.zip -d lessons_final

Archive:  lessons_final.zip
   creating: lessons_final/lessons_final/
  inflating: lessons_final/lessons_final/v1_l1.pkl  
  inflating: lessons_final/lessons_final/v1_l10.pkl  
  inflating: lessons_final/lessons_final/v1_l100.pkl  
  inflating: lessons_final/lessons_final/v1_l101.pkl  
  inflating: lessons_final/lessons_final/v1_l102.pkl  
  inflating: lessons_final/lessons_final/v1_l103.pkl  
  inflating: lessons_final/lessons_final/v1_l104.pkl  
  inflating: lessons_final/lessons_final/v1_l105.pkl  
  inflating: lessons_final/lessons_final/v1_l106.pkl  
  inflating: lessons_final/lessons_final/v1_l107.pkl  
  inflating: lessons_final/lessons_final/v1_l108.pkl  
  inflating: lessons_final/lessons_final/v1_l109.pkl  
  inflating: lessons_final/lessons_final/v1_l11.pkl  
  inflating: lessons_final/lessons_final/v1_l110.pkl  
  inflating: lessons_final/lessons_final/v1_l111.pkl  
  inflating: lessons_final/lessons_final/v1_l112.pkl  
  inflating: lessons_final/lessons_final/v1_l113.pkl  

# Pretraining

In [None]:
import os
import pickle
import random
import torch
import stanza
import torch.nn as nn
from datasets import Dataset
from transformers import (
    T5Tokenizer, T5Config, Trainer, TrainingArguments,
    T5ForConditionalGeneration
)
from transformers import EarlyStoppingCallback

# ──────── Device ────────
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ──────── Stanza setup ────────
stanza.download("en", verbose=False)
nlp = stanza.Pipeline(
    lang="en",
    processors="tokenize,pos,lemma,depparse,ner",
    tokenize_no_ssplit=True,
    use_gpu=torch.cuda.is_available()
)

# ──────── Combo‑ID utils ────────
combo_counter = 1
combo2id = {}
combo_frequency = {}  # ✅ new dict to record frequency
MAX_COMBO_ID = 1800
UNKNOWN_COMBO_ID = 0

def get_combo_id(pos, dep, ner, morph):
    global combo_counter
    key = (pos, dep, ner, morph)
    combo_frequency[key] = combo_frequency.get(key, 0) + 1

    if key not in combo2id:
        if combo_counter >= MAX_COMBO_ID:
            return UNKNOWN_COMBO_ID
        combo2id[key] = combo_counter
        combo_counter += 1

    return combo2id[key]

def get_char_offset_ner_map(doc):
    ner_map = {}
    for ent in doc.ents:
        for i in range(ent.start_char, ent.end_char):
            ner_map[i] = ent.type
    return ner_map

def extract_combo_ids_from_doc(doc, tokenizer, max_len=256):
    ner_map = get_char_offset_ner_map(doc)
    combo_ids = []
    for sent in doc.sentences:
        for word in sent.words:
            pos   = word.upos or "X"
            dep   = word.deprel or "dep"
            morph = word.feats or ""
            start = word.start_char or 0
            ner   = ner_map.get(start, "O")
            cid   = get_combo_id(pos, dep, ner, morph)
            sub_len = len(tokenizer(word.text).input_ids) - 1
            combo_ids.extend([cid] * sub_len)
    return combo_ids[:max_len]

# ──────── Simple span‑masking ────────
def simple_t5_mask(text):
    words = text.strip().split()
    if len(words) < 4:
        return text, text
    span_len = random.randint(1, min(3, len(words) - 1))
    start = random.randint(0, len(words) - span_len)
    masked = words[:start] + ["<extra_id_0>"] + words[start + span_len:]
    masked_text = " ".join(masked)
    target_text = "<extra_id_0> " + " ".join(words[start:start + span_len])
    return masked_text, target_text

# ──────── Model ────────
class SyntaxT5(T5ForConditionalGeneration):
    def __init__(self, config, combo_vocab_size=MAX_COMBO_ID):
        super().__init__(config)
        self.shared       = nn.Embedding(config.vocab_size, config.d_model)
        self.encoder.embed_tokens = self.shared
        self.decoder.embed_tokens = self.shared
        self.combo_embed  = nn.Embedding(combo_vocab_size, config.d_model)
        self.fuse_proj    = nn.Linear(2 * config.d_model, config.d_model)
        self.stored_combo_ids = None

    def prepare_inputs_for_generation(
        self, input_ids, past_key_values=None, attention_mask=None,
        use_cache=None, encoder_outputs=None, **kwargs
    ):
        full_combo = self.stored_combo_ids
        if past_key_values is None:
            combo_ids = full_combo
        else:
            combo_ids = full_combo[:, -1].unsqueeze(-1)
        return {
            "decoder_input_ids":  input_ids,
            "attention_mask":     attention_mask,
            "encoder_outputs":    encoder_outputs,
            "past_key_values":    past_key_values,
            "use_cache":          use_cache,
            "combo_ids":          combo_ids,
        }

    def forward(
        self, input_ids=None, inputs_embeds=None, attention_mask=None,
        decoder_input_ids=None, decoder_attention_mask=None,
        encoder_outputs=None, past_key_values=None, labels=None,
        use_cache=None, **kwargs
    ):
        # generation path
        if encoder_outputs is not None:
            return super().forward(
                encoder_outputs=encoder_outputs,
                past_key_values=past_key_values,
                attention_mask=attention_mask,
                decoder_input_ids=decoder_input_ids,
                decoder_attention_mask=decoder_attention_mask,
                labels=labels,
                use_cache=use_cache
            )
        # training/encode path
        combo_ids = kwargs.get("combo_ids", self.stored_combo_ids)
        if combo_ids is None and input_ids is not None:
            combo_ids = torch.zeros_like(input_ids)
        tok_emb = inputs_embeds if inputs_embeds is not None else self.shared(input_ids)
        cmb_emb = self.combo_embed(combo_ids.to(tok_emb.device))
        fused  = torch.cat([tok_emb, cmb_emb], dim=-1)
        inputs_embeds = self.fuse_proj(fused)
        return super().forward(
            inputs_embeds=inputs_embeds,
            attention_mask=attention_mask,
            decoder_input_ids=decoder_input_ids,
            decoder_attention_mask=decoder_attention_mask,
            labels=labels,
            use_cache=use_cache,
            past_key_values=past_key_values
        )

# ──────── Data Collator ────────
class DataCollatorWithCombo:
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer

    def __call__(self, batch):
        return {
            "input_ids":      torch.tensor([ex["input_ids"]      for ex in batch], dtype=torch.long),
            "attention_mask": torch.tensor([ex["attention_mask"] for ex in batch], dtype=torch.long),
            "labels":         torch.tensor([ex["labels"]         for ex in batch], dtype=torch.long),
            "combo_ids":      torch.tensor([ex["combo_ids"]      for ex in batch], dtype=torch.long),
        }

# ──────── Preprocess ────────
def preprocess(example, tokenizer):
    src = tokenizer(
        example["input"],
        padding="max_length",
        truncation=True,
        max_length=256
    )
    tgt = tokenizer(
        example["output"],
        padding="max_length",
        truncation=True,
        max_length=64
    )
    combo = example["combo_ids"]
    combo += [UNKNOWN_COMBO_ID] * (256 - len(combo))
    src["combo_ids"] = combo[:256]
    src["labels"]    = [
        t if t != tokenizer.pad_token_id else -100
        for t in tgt["input_ids"]
    ]
    return src

# ──────── Prediction Cleaning ────────
def clean_prediction(raw_pred, tokenizer):
    return raw_pred.replace(tokenizer.pad_token, "")\
                   .replace(tokenizer.eos_token, "")\
                   .strip()

# ──────── Evaluation ────────
def evaluate(model, tokenizer, dataset):
    model.eval()
    correct = 0
    for ex in dataset:
        inp  = ex["input"]
        gold = ex["output"].strip()
        enc = tokenizer(
            inp, return_tensors="pt",
            padding=True, truncation=True, max_length=256
        ).to(device)
        combo = ex["combo_ids"] + [UNKNOWN_COMBO_ID] * (256 - len(ex["combo_ids"]))
        model.stored_combo_ids = torch.tensor([combo], device=device)
        with torch.no_grad():
            out_ids = model.generate(
                input_ids=enc["input_ids"],
                attention_mask=enc["attention_mask"],
                max_new_tokens=20,
                do_sample=False,
                use_cache=True
            )
        raw  = tokenizer.decode(out_ids[0], skip_special_tokens=False)
        pred = clean_prediction(raw, tokenizer)
        if pred == gold:
            correct += 1
    print(f"✅ Eval Accuracy: {correct}/{len(dataset)} = {correct/len(dataset):.2f}")
    torch.cuda.empty_cache()

# ──────── Training per‐level ────────
def train_level(dataset, tokenizer, model, args, level):
    print(f"\n🔁 Training curriculum level {level}")
    tokenized = dataset.map(
        lambda ex: preprocess(ex, tokenizer),
        remove_columns=["input", "output", "combo_ids"]
    )
    split_dataset = tokenized.train_test_split(test_size=0.05, seed=42)
    train_set = split_dataset['train']
    eval_set = split_dataset['test']
    print(f"{len(train_set)} train | {len(eval_set)} val")
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_set,
        eval_dataset=eval_set,
        tokenizer=tokenizer,
        callbacks=[
            EarlyStoppingCallback(
                early_stopping_patience=3,
                early_stopping_threshold=0.0
            )
        ],
        data_collator=DataCollatorWithCombo(tokenizer)
    )
    trainer.train()
    del trainer
    torch.cuda.empty_cache()

# ──────── Setup model/tokenizer/args ────────
tokenizer = T5Tokenizer.from_pretrained("t5-base")
config    = T5Config.from_pretrained("t5-base")
model     = SyntaxT5(config=config, combo_vocab_size=MAX_COMBO_ID).to(device)

args = TrainingArguments(
    output_dir="./t5_syntax_curriculum",
    eval_strategy="epoch",            # <- Typo fixed: eval_strategy → evaluation_strategy
    save_strategy="epoch",                   # 🔥 Save checkpoint every epoch
    save_total_limit=1,                      # 🔥 Only keep latest best model (optional but nice)
    load_best_model_at_end=True,             # 🔥 Load the best model automatically at the end
    metric_for_best_model="eval_loss",       # 🔥 Use eval_loss for early stopping
    greater_is_better=False,                 # 🔥 Because lower loss is better
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=20,
    logging_steps=5,
    learning_rate=1e-5,
    # weight_decay=0.01,
    # warmup_ratio=0.1,
    report_to="none",
    remove_unused_columns=False,
    dataloader_pin_memory=False
)

# ──────── Build curriculum from your lesson files ────────
pkl_folder = "./lessons_final/lessons_final"
curriculum = []

import re

def extract_vol_lesson(filename):
    match = re.match(r'v(\d+)_l(\d+)\.pkl', filename)
    return (int(match.group(1)), int(match.group(2))) if match else (999, 999)

all_files = os.listdir(pkl_folder)
pkl_files = sorted(
    [f for f in all_files if re.match(r'v\d+_l\d+\.pkl', f)],
    key=extract_vol_lesson
)

for fname in pkl_files:
    if not fname.endswith(".pkl"):
        continue
    with open(os.path.join(pkl_folder, fname), "rb") as f:
        docs = pickle.load(f)  # list of stanza.Document
    examples = []
    for d in docs:
        masked, target = simple_t5_mask(d.text)
        combo_ids = extract_combo_ids_from_doc(d, tokenizer, max_len=256)
        examples.append({
            "input":     masked,
            "output":    target,
            "combo_ids": combo_ids
        })
    ds = Dataset.from_list(examples)
    curriculum.append((fname, ds))
    print(f"✅ Loaded {fname} ({len(examples)} examples)")

# ──────── Run curriculum ────────
for i, (lesson_name, level_ds) in enumerate(curriculum, start=1):
    # if i==3: break
    print(f"▶︎ Lesson {lesson_name}")
    train_level(level_ds, tokenizer, model, args, i)

# ──────── Final quick test ────────
def generate(text):
    model.eval()
    enc = tokenizer(
        text, return_tensors="pt",
        padding=True, truncation=True, max_length=256
    ).to(device)
    combo = extract_combo_ids_from_doc(nlp(text), tokenizer, max_len=256)
    combo += [UNKNOWN_COMBO_ID] * (256 - len(combo))
    model.stored_combo_ids = torch.tensor([combo], device=device)
    with torch.no_grad():
        out_ids = model.generate(
            input_ids=enc["input_ids"],
            attention_mask=enc["attention_mask"],
            max_new_tokens=20,
            do_sample=False,
            use_cache=True
        )
    raw = tokenizer.decode(out_ids[0], skip_special_tokens=False)
    return clean_prediction(raw, tokenizer)

print("\n🧪 Final Test Generation:")
test_text = "They are <extra_id_0> the car at the <extra_id_1>."
print("INPUT :", test_text)
print("OUTPUT:", generate(test_text))


Using device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


✅ Loaded v1_l1.pkl (8 examples)
✅ Loaded v1_l2.pkl (8 examples)
✅ Loaded v1_l3.pkl (13 examples)
✅ Loaded v1_l4.pkl (12 examples)
✅ Loaded v1_l5.pkl (14 examples)
✅ Loaded v1_l6.pkl (48 examples)
✅ Loaded v1_l7.pkl (14 examples)
✅ Loaded v1_l8.pkl (25 examples)
✅ Loaded v1_l9.pkl (9 examples)
✅ Loaded v1_l10.pkl (21 examples)
✅ Loaded v1_l11.pkl (13 examples)
✅ Loaded v1_l12.pkl (18 examples)
✅ Loaded v1_l13.pkl (11 examples)
✅ Loaded v1_l14.pkl (25 examples)
✅ Loaded v1_l15.pkl (16 examples)
✅ Loaded v1_l16.pkl (9 examples)
✅ Loaded v1_l17.pkl (27 examples)
✅ Loaded v1_l18.pkl (18 examples)
✅ Loaded v1_l19.pkl (11 examples)
✅ Loaded v1_l20.pkl (31 examples)
✅ Loaded v1_l21.pkl (9 examples)
✅ Loaded v1_l22.pkl (40 examples)
✅ Loaded v1_l23.pkl (8 examples)
✅ Loaded v1_l24.pkl (28 examples)
✅ Loaded v1_l25.pkl (42 examples)
✅ Loaded v1_l26.pkl (18 examples)
✅ Loaded v1_l27.pkl (24 examples)
✅ Loaded v1_l28.pkl (8 examples)
✅ Loaded v1_l29.pkl (21 examples)
✅ Loaded v1_l30.pkl (15 exampl

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

7 train | 1 val


  trainer = Trainer(
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch,Training Loss,Validation Loss
1,No log,9.991124
2,No log,9.433549
3,No log,9.165462


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l2.pkl

🔁 Training curriculum level 2


Map:   0%|          | 0/8 [00:00<?, ? examples/s]

7 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,8.766318
2,No log,8.469871
3,No log,8.33368


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l3.pkl

🔁 Training curriculum level 3


Map:   0%|          | 0/13 [00:00<?, ? examples/s]

12 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,8.070036
2,No log,7.982329
3,No log,7.944316


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l4.pkl

🔁 Training curriculum level 4


Map:   0%|          | 0/12 [00:00<?, ? examples/s]

11 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.679104
2,No log,4.349
3,No log,4.266916


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l5.pkl

🔁 Training curriculum level 5


Map:   0%|          | 0/14 [00:00<?, ? examples/s]

13 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,7.270329
2,No log,7.281817
3,No log,7.273552


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l6.pkl

🔁 Training curriculum level 6


Map:   0%|          | 0/48 [00:00<?, ? examples/s]

45 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.391801
2,6.351000,4.754725
3,6.351000,4.554489


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l7.pkl

🔁 Training curriculum level 7


Map:   0%|          | 0/14 [00:00<?, ? examples/s]

13 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.887775
2,No log,6.762506
3,No log,6.699329


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l8.pkl

🔁 Training curriculum level 8


Map:   0%|          | 0/25 [00:00<?, ? examples/s]

23 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.358781
2,No log,6.249172
3,6.115800,6.203819


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l9.pkl

🔁 Training curriculum level 9


Map:   0%|          | 0/9 [00:00<?, ? examples/s]

8 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.553709
2,No log,6.473855
3,No log,6.430363


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l10.pkl

🔁 Training curriculum level 10


Map:   0%|          | 0/21 [00:00<?, ? examples/s]

19 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.537376
2,No log,4.383162
3,5.542600,4.33562


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l11.pkl

🔁 Training curriculum level 11


Map:   0%|          | 0/13 [00:00<?, ? examples/s]

12 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.467434
2,No log,4.273802
3,No log,4.190965


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l12.pkl

🔁 Training curriculum level 12


Map:   0%|          | 0/18 [00:00<?, ? examples/s]

17 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.188636
2,No log,3.948383
3,5.420600,3.824743


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l13.pkl

🔁 Training curriculum level 13


Map:   0%|          | 0/11 [00:00<?, ? examples/s]

10 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.102031
2,No log,6.28585
3,No log,6.308338


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l14.pkl

🔁 Training curriculum level 14


Map:   0%|          | 0/25 [00:00<?, ? examples/s]

23 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.59611
2,No log,3.238805
3,5.060900,3.095619


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l15.pkl

🔁 Training curriculum level 15


Map:   0%|          | 0/16 [00:00<?, ? examples/s]

15 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,7.167048
2,No log,6.789825
3,No log,6.613706


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l16.pkl

🔁 Training curriculum level 16


Map:   0%|          | 0/9 [00:00<?, ? examples/s]

8 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.187872
2,No log,3.046776
3,No log,2.999738


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l17.pkl

🔁 Training curriculum level 17


Map:   0%|          | 0/27 [00:00<?, ? examples/s]

25 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.709258
2,No log,6.556247
3,5.411200,6.50736


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l18.pkl

🔁 Training curriculum level 18


Map:   0%|          | 0/18 [00:00<?, ? examples/s]

17 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,7.551007
2,No log,7.538733
3,5.469100,7.552305


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l19.pkl

🔁 Training curriculum level 19


Map:   0%|          | 0/11 [00:00<?, ? examples/s]

10 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.587352
2,No log,5.664672
3,No log,5.690851


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l20.pkl

🔁 Training curriculum level 20


Map:   0%|          | 0/31 [00:00<?, ? examples/s]

29 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.384992
2,No log,5.04655
3,5.359300,4.936634


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l21.pkl

🔁 Training curriculum level 21


Map:   0%|          | 0/9 [00:00<?, ? examples/s]

8 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,7.156086
2,No log,6.802928
3,No log,6.661811


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l22.pkl

🔁 Training curriculum level 22


Map:   0%|          | 0/40 [00:00<?, ? examples/s]

38 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.907613
2,5.410000,4.795156
3,5.410000,4.767257


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l23.pkl

🔁 Training curriculum level 23


Map:   0%|          | 0/8 [00:00<?, ? examples/s]

7 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.988751
2,No log,6.69829
3,No log,6.556186


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l24.pkl

🔁 Training curriculum level 24


Map:   0%|          | 0/28 [00:00<?, ? examples/s]

26 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.696348
2,No log,4.506257
3,4.860100,4.441668


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l25.pkl

🔁 Training curriculum level 25


Map:   0%|          | 0/42 [00:00<?, ? examples/s]

39 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.284567
2,4.792900,2.948128
3,4.792900,2.858136


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l26.pkl

🔁 Training curriculum level 26


Map:   0%|          | 0/18 [00:00<?, ? examples/s]

17 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.374984
2,No log,5.046687
3,4.754100,4.92183


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l27.pkl

🔁 Training curriculum level 27


Map:   0%|          | 0/24 [00:00<?, ? examples/s]

22 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.070104
2,No log,4.9838
3,5.074600,4.99998


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l28.pkl

🔁 Training curriculum level 28


Map:   0%|          | 0/8 [00:00<?, ? examples/s]

7 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.243019
2,No log,4.265888
3,No log,4.274193


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l29.pkl

🔁 Training curriculum level 29


Map:   0%|          | 0/21 [00:00<?, ? examples/s]

19 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.215544
2,No log,6.176157
3,6.082000,6.190151


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l30.pkl

🔁 Training curriculum level 30


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

14 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.835561
2,No log,5.881696
3,No log,5.858207


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l31.pkl

🔁 Training curriculum level 31


Map:   0%|          | 0/34 [00:00<?, ? examples/s]

32 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.956567
2,No log,5.790472
3,5.263600,5.733857


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l32.pkl

🔁 Training curriculum level 32


Map:   0%|          | 0/11 [00:00<?, ? examples/s]

10 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.907242
2,No log,3.893476
3,No log,3.903487


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l33.pkl

🔁 Training curriculum level 33


Map:   0%|          | 0/17 [00:00<?, ? examples/s]

16 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.613088
2,No log,4.397347
3,No log,4.302556


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l34.pkl

🔁 Training curriculum level 34


Map:   0%|          | 0/16 [00:00<?, ? examples/s]

15 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.774001
2,No log,5.798128
3,No log,5.831157


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l35.pkl

🔁 Training curriculum level 35


Map:   0%|          | 0/17 [00:00<?, ? examples/s]

16 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.747036
2,No log,4.81209
3,No log,4.829575


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l36.pkl

🔁 Training curriculum level 36


Map:   0%|          | 0/16 [00:00<?, ? examples/s]

15 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.199189
2,No log,5.201466
3,No log,5.175712


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l37.pkl

🔁 Training curriculum level 37


Map:   0%|          | 0/17 [00:00<?, ? examples/s]

16 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.598525
2,No log,5.313956
3,No log,5.202744


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l38.pkl

🔁 Training curriculum level 38


Map:   0%|          | 0/11 [00:00<?, ? examples/s]

10 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.332885
2,No log,4.365878
3,No log,4.399755


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l39.pkl

🔁 Training curriculum level 39


Map:   0%|          | 0/32 [00:00<?, ? examples/s]

30 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.967001
2,No log,4.826792
3,5.061300,4.775837


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l40.pkl

🔁 Training curriculum level 40


Map:   0%|          | 0/10 [00:00<?, ? examples/s]

9 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.284198
2,No log,3.922708
3,No log,3.756578


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l41.pkl

🔁 Training curriculum level 41


Map:   0%|          | 0/22 [00:00<?, ? examples/s]

20 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,2.811718
2,No log,2.805179
3,4.038400,2.808511


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l42.pkl

🔁 Training curriculum level 42


Map:   0%|          | 0/13 [00:00<?, ? examples/s]

12 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,7.218824
2,No log,7.24147
3,No log,7.239746


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l43.pkl

🔁 Training curriculum level 43


Map:   0%|          | 0/21 [00:00<?, ? examples/s]

19 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.46098
2,No log,5.170878
3,5.114300,5.084831


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l44.pkl

🔁 Training curriculum level 44


Map:   0%|          | 0/14 [00:00<?, ? examples/s]

13 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.031363
2,No log,5.080306
3,No log,5.070961


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l45.pkl

🔁 Training curriculum level 45


Map:   0%|          | 0/38 [00:00<?, ? examples/s]

36 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.824254
2,5.047700,4.718675
3,5.047700,4.671156


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l46.pkl

🔁 Training curriculum level 46


Map:   0%|          | 0/9 [00:00<?, ? examples/s]

8 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,2.992392
2,No log,3.044289
3,No log,3.059331


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l47.pkl

🔁 Training curriculum level 47


Map:   0%|          | 0/32 [00:00<?, ? examples/s]

30 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.799491
2,No log,3.692263
3,4.637000,3.669147


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l48.pkl

🔁 Training curriculum level 48


Map:   0%|          | 0/14 [00:00<?, ? examples/s]

13 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.328684
2,No log,4.252448
3,No log,4.215423


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l49.pkl

🔁 Training curriculum level 49


Map:   0%|          | 0/34 [00:00<?, ? examples/s]

32 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.009851
2,No log,4.841867
3,4.826100,4.780855


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l50.pkl

🔁 Training curriculum level 50


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

14 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.207878
2,No log,5.974093
3,No log,5.847844


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l51.pkl

🔁 Training curriculum level 51


Map:   0%|          | 0/74 [00:00<?, ? examples/s]

70 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,5.6067,5.050821
2,5.0419,4.796261
3,4.8292,4.719754


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l52.pkl

🔁 Training curriculum level 52


Map:   0%|          | 0/14 [00:00<?, ? examples/s]

13 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.488422
2,No log,4.405349
3,No log,4.386934


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l53.pkl

🔁 Training curriculum level 53


Map:   0%|          | 0/55 [00:00<?, ? examples/s]

52 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.012752
2,5.287100,4.902167
3,4.761300,4.855028


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l54.pkl

🔁 Training curriculum level 54


Map:   0%|          | 0/16 [00:00<?, ? examples/s]

15 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.388607
2,No log,6.367254
3,No log,6.342481


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l55.pkl

🔁 Training curriculum level 55


Map:   0%|          | 0/31 [00:00<?, ? examples/s]

29 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.231201
2,No log,4.976268
3,4.628100,4.9121


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l56.pkl

🔁 Training curriculum level 56


Map:   0%|          | 0/13 [00:00<?, ? examples/s]

12 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.790089
2,No log,4.78111
3,No log,4.74612


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l57.pkl

🔁 Training curriculum level 57


Map:   0%|          | 0/25 [00:00<?, ? examples/s]

23 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.195446
2,No log,3.993609
3,5.058800,3.938477


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l58.pkl

🔁 Training curriculum level 58


Map:   0%|          | 0/21 [00:00<?, ? examples/s]

19 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.971747
2,No log,5.892635
3,5.400900,5.857888


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l59.pkl

🔁 Training curriculum level 59


Map:   0%|          | 0/18 [00:00<?, ? examples/s]

17 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.804523
2,No log,4.483285
3,4.229700,4.321891


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l60.pkl

🔁 Training curriculum level 60


Map:   0%|          | 0/18 [00:00<?, ? examples/s]

17 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,7.641323
2,No log,7.663587
3,5.483100,7.716613


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l61.pkl

🔁 Training curriculum level 61


Map:   0%|          | 0/54 [00:00<?, ? examples/s]

51 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.522086
2,5.192500,4.244359
3,4.799100,4.121978


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l62.pkl

🔁 Training curriculum level 62


Map:   0%|          | 0/20 [00:00<?, ? examples/s]

19 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.171953
2,No log,5.135607
3,4.911800,5.135749


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l63.pkl

🔁 Training curriculum level 63


Map:   0%|          | 0/26 [00:00<?, ? examples/s]

24 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.180568
2,No log,4.841126
3,5.062300,4.727434


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l64.pkl

🔁 Training curriculum level 64


Map:   0%|          | 0/21 [00:00<?, ? examples/s]

19 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.935244
2,No log,5.84848
3,6.070100,5.82221


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l65.pkl

🔁 Training curriculum level 65


Map:   0%|          | 0/20 [00:00<?, ? examples/s]

19 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.295159
2,No log,5.21832
3,4.588000,5.147401


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l66.pkl

🔁 Training curriculum level 66


Map:   0%|          | 0/16 [00:00<?, ? examples/s]

15 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.573929
2,No log,5.586727
3,No log,5.597447


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l67.pkl

🔁 Training curriculum level 67


Map:   0%|          | 0/22 [00:00<?, ? examples/s]

20 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.103588
2,No log,2.826455
3,4.604300,2.727031


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l68.pkl

🔁 Training curriculum level 68


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

14 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.294309
2,No log,6.204519
3,No log,6.161884


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l69.pkl

🔁 Training curriculum level 69


Map:   0%|          | 0/27 [00:00<?, ? examples/s]

25 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.146948
2,No log,4.83945
3,4.847800,4.708283


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l70.pkl

🔁 Training curriculum level 70


Map:   0%|          | 0/20 [00:00<?, ? examples/s]

19 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,2.029495
2,No log,1.97719
3,5.319800,1.963215


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l71.pkl

🔁 Training curriculum level 71


Map:   0%|          | 0/39 [00:00<?, ? examples/s]

37 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.203301
2,5.349100,5.153374
3,5.349100,5.064739


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l72.pkl

🔁 Training curriculum level 72


Map:   0%|          | 0/22 [00:00<?, ? examples/s]

20 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.413705
2,No log,4.406352
3,4.153200,4.398121


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l73.pkl

🔁 Training curriculum level 73


Map:   0%|          | 0/16 [00:00<?, ? examples/s]

15 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,7.717559
2,No log,7.561109
3,No log,7.499303


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l74.pkl

🔁 Training curriculum level 74


Map:   0%|          | 0/15 [00:00<?, ? examples/s]

14 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.530486
2,No log,3.534414
3,No log,3.52549


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l75.pkl

🔁 Training curriculum level 75


Map:   0%|          | 0/58 [00:00<?, ? examples/s]

55 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.375843
2,5.215600,4.309277
3,4.993900,4.326586


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l76.pkl

🔁 Training curriculum level 76


Map:   0%|          | 0/20 [00:00<?, ? examples/s]

19 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.80573
2,No log,5.696525
3,5.408500,5.663727


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l77.pkl

🔁 Training curriculum level 77


Map:   0%|          | 0/34 [00:00<?, ? examples/s]

32 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.433268
2,No log,4.360011
3,4.963900,4.327003


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l78.pkl

🔁 Training curriculum level 78


Map:   0%|          | 0/18 [00:00<?, ? examples/s]

17 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.625263
2,No log,4.368491
3,5.950100,4.27613


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l79.pkl

🔁 Training curriculum level 79


Map:   0%|          | 0/42 [00:00<?, ? examples/s]

39 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.436632
2,4.583200,5.0961
3,4.583200,5.0177


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l80.pkl

🔁 Training curriculum level 80


Map:   0%|          | 0/22 [00:00<?, ? examples/s]

20 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.048161
2,No log,4.022357
3,5.556100,3.997866


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l81.pkl

🔁 Training curriculum level 81


Map:   0%|          | 0/45 [00:00<?, ? examples/s]

42 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.479883
2,3.913900,4.503926
3,3.913900,4.490074


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l82.pkl

🔁 Training curriculum level 82


Map:   0%|          | 0/25 [00:00<?, ? examples/s]

23 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.959079
2,No log,5.991883
3,5.198200,6.033425


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l83.pkl

🔁 Training curriculum level 83


Map:   0%|          | 0/59 [00:00<?, ? examples/s]

56 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.116808
2,4.559600,3.734688
3,4.061400,3.60164


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l84.pkl

🔁 Training curriculum level 84


Map:   0%|          | 0/19 [00:00<?, ? examples/s]

18 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.305417
2,No log,3.40446
3,4.282600,3.446522


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l85.pkl

🔁 Training curriculum level 85


Map:   0%|          | 0/47 [00:00<?, ? examples/s]

44 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.750578
2,4.941700,3.69385
3,4.941700,3.670919


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l86.pkl

🔁 Training curriculum level 86


Map:   0%|          | 0/19 [00:00<?, ? examples/s]

18 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.789002
2,No log,5.711319
3,5.134700,5.75846


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l87.pkl

🔁 Training curriculum level 87


Map:   0%|          | 0/66 [00:00<?, ? examples/s]

62 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.789196
2,4.525400,3.703505
3,4.173600,3.68445


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l88.pkl

🔁 Training curriculum level 88


Map:   0%|          | 0/22 [00:00<?, ? examples/s]

20 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.192277
2,No log,4.136516
3,5.335600,4.055459


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l89.pkl

🔁 Training curriculum level 89


Map:   0%|          | 0/50 [00:00<?, ? examples/s]

47 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.579217
2,4.916400,4.499461
3,4.916400,4.47893


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l90.pkl

🔁 Training curriculum level 90


Map:   0%|          | 0/22 [00:00<?, ? examples/s]

20 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.453714
2,No log,5.332522
3,4.964700,5.283654


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l91.pkl

🔁 Training curriculum level 91


Map:   0%|          | 0/31 [00:00<?, ? examples/s]

29 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.379879
2,No log,4.277802
3,5.224200,4.314436


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l92.pkl

🔁 Training curriculum level 92


Map:   0%|          | 0/13 [00:00<?, ? examples/s]

12 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,7.927792
2,No log,7.865082
3,No log,7.851774


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l93.pkl

🔁 Training curriculum level 93


Map:   0%|          | 0/31 [00:00<?, ? examples/s]

29 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.205424
2,No log,4.076444
3,4.823400,4.031028


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l94.pkl

🔁 Training curriculum level 94


Map:   0%|          | 0/22 [00:00<?, ? examples/s]

20 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.936215
2,No log,4.002312
3,5.684300,4.021382


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l95.pkl

🔁 Training curriculum level 95


Map:   0%|          | 0/36 [00:00<?, ? examples/s]

34 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.905789
2,4.764100,5.840535
3,4.764100,5.87024


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l96.pkl

🔁 Training curriculum level 96


Map:   0%|          | 0/21 [00:00<?, ? examples/s]

19 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.671724
2,No log,3.690902
3,5.187300,3.71825


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l97.pkl

🔁 Training curriculum level 97


Map:   0%|          | 0/53 [00:00<?, ? examples/s]

50 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.278483
2,4.816000,4.10354
3,4.119200,4.048295


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l98.pkl

🔁 Training curriculum level 98


Map:   0%|          | 0/18 [00:00<?, ? examples/s]

17 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.312901
2,No log,3.362116
3,5.352900,3.42433


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l99.pkl

🔁 Training curriculum level 99


Map:   0%|          | 0/41 [00:00<?, ? examples/s]

38 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.428851
2,4.626400,5.381033
3,4.626400,5.321993


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l100.pkl

🔁 Training curriculum level 100


Map:   0%|          | 0/19 [00:00<?, ? examples/s]

18 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.703917
2,No log,4.616874
3,5.184900,4.584495


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l101.pkl

🔁 Training curriculum level 101


Map:   0%|          | 0/23 [00:00<?, ? examples/s]

21 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.533463
2,No log,5.524768
3,4.223200,5.466466


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l102.pkl

🔁 Training curriculum level 102


Map:   0%|          | 0/22 [00:00<?, ? examples/s]

20 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.26328
2,No log,5.103791
3,4.941500,5.032325


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l103.pkl

🔁 Training curriculum level 103


Map:   0%|          | 0/55 [00:00<?, ? examples/s]

52 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.960461
2,4.509400,3.860641
3,3.961500,3.801807


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l104.pkl

🔁 Training curriculum level 104


Map:   0%|          | 0/20 [00:00<?, ? examples/s]

19 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.345103
2,No log,5.263547
3,4.735600,5.255799


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l105.pkl

🔁 Training curriculum level 105


Map:   0%|          | 0/54 [00:00<?, ? examples/s]

51 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.156352
2,4.947200,4.611647
3,3.886000,4.47254


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l106.pkl

🔁 Training curriculum level 106


Map:   0%|          | 0/18 [00:00<?, ? examples/s]

17 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.492156
2,No log,4.415027
3,4.892200,4.363454


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l107.pkl

🔁 Training curriculum level 107


Map:   0%|          | 0/49 [00:00<?, ? examples/s]

46 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.372595
2,4.401000,5.291708
3,4.401000,5.276629


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l108.pkl

🔁 Training curriculum level 108


Map:   0%|          | 0/24 [00:00<?, ? examples/s]

22 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.264014
2,No log,5.298323
3,4.752000,5.289476


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l109.pkl

🔁 Training curriculum level 109


Map:   0%|          | 0/50 [00:00<?, ? examples/s]

47 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.222342
2,4.198300,3.727463
3,4.198300,3.533708


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l110.pkl

🔁 Training curriculum level 110


Map:   0%|          | 0/20 [00:00<?, ? examples/s]

19 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.851407
2,No log,3.796233
3,4.574200,3.733425


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l111.pkl

🔁 Training curriculum level 111


Map:   0%|          | 0/34 [00:00<?, ? examples/s]

32 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.235098
2,No log,5.2525
3,4.128200,5.197269


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l112.pkl

🔁 Training curriculum level 112


Map:   0%|          | 0/22 [00:00<?, ? examples/s]

20 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.528608
2,No log,4.388119
3,5.703400,4.343255


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l113.pkl

🔁 Training curriculum level 113


Map:   0%|          | 0/52 [00:00<?, ? examples/s]

49 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.981066
2,4.026900,4.905758
3,3.710600,4.857281


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l114.pkl

🔁 Training curriculum level 114


Map:   0%|          | 0/21 [00:00<?, ? examples/s]

19 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.863679
2,No log,5.73563
3,4.855500,5.697384


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l115.pkl

🔁 Training curriculum level 115


Map:   0%|          | 0/55 [00:00<?, ? examples/s]

52 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.45009
2,4.752300,4.480676
3,4.282400,4.464748


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l116.pkl

🔁 Training curriculum level 116


Map:   0%|          | 0/9 [00:00<?, ? examples/s]

8 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.379805
2,No log,3.262941
3,No log,3.263978


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l117.pkl

🔁 Training curriculum level 117


Map:   0%|          | 0/54 [00:00<?, ? examples/s]

51 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.64806
2,4.115800,4.274247
3,3.795200,4.178574


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l118.pkl

🔁 Training curriculum level 118


Map:   0%|          | 0/17 [00:00<?, ? examples/s]

16 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.026185
2,No log,3.998458
3,No log,3.98612


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l119.pkl

🔁 Training curriculum level 119


Map:   0%|          | 0/57 [00:00<?, ? examples/s]

54 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.588712
2,4.873300,4.522735
3,4.430900,4.493604


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l120.pkl

🔁 Training curriculum level 120


Map:   0%|          | 0/16 [00:00<?, ? examples/s]

15 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.870421
2,No log,6.787196
3,No log,6.744329


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l121.pkl

🔁 Training curriculum level 121


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

67 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4782,4.31859
2,4.2015,4.146711
3,4.0268,4.054677


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l122.pkl

🔁 Training curriculum level 122


Map:   0%|          | 0/20 [00:00<?, ? examples/s]

19 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,2.224542
2,No log,2.071781
3,4.680200,2.008236


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l123.pkl

🔁 Training curriculum level 123


Map:   0%|          | 0/60 [00:00<?, ? examples/s]

57 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.352106
2,4.227000,4.264286
3,4.065600,4.234163


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l124.pkl

🔁 Training curriculum level 124


Map:   0%|          | 0/19 [00:00<?, ? examples/s]

18 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,1.33993
2,No log,1.389008
3,4.850000,1.418917


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l125.pkl

🔁 Training curriculum level 125


Map:   0%|          | 0/43 [00:00<?, ? examples/s]

40 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.960884
2,4.568700,3.740402
3,4.568700,3.652565


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l126.pkl

🔁 Training curriculum level 126


Map:   0%|          | 0/21 [00:00<?, ? examples/s]

19 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.125985
2,No log,6.117809
3,5.101300,6.113833


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l127.pkl

🔁 Training curriculum level 127


Map:   0%|          | 0/62 [00:00<?, ? examples/s]

58 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.149522
2,4.174800,2.931677
3,3.861600,2.866445


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l128.pkl

🔁 Training curriculum level 128


Map:   0%|          | 0/23 [00:00<?, ? examples/s]

21 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.365716
2,No log,4.344399
3,5.567300,4.330506


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l129.pkl

🔁 Training curriculum level 129


Map:   0%|          | 0/84 [00:00<?, ? examples/s]

79 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.1583,4.065598
2,3.7566,3.817757
3,3.6376,3.774172


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l130.pkl

🔁 Training curriculum level 130


Map:   0%|          | 0/20 [00:00<?, ? examples/s]

19 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.189881
2,No log,5.118772
3,4.874300,5.087178


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l131.pkl

🔁 Training curriculum level 131


Map:   0%|          | 0/44 [00:00<?, ? examples/s]

41 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,2.447127
2,4.052800,2.32363
3,4.052800,2.291393


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l132.pkl

🔁 Training curriculum level 132


Map:   0%|          | 0/20 [00:00<?, ? examples/s]

19 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.86552
2,No log,5.012977
3,4.670900,5.043087


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l133.pkl

🔁 Training curriculum level 133


Map:   0%|          | 0/24 [00:00<?, ? examples/s]

22 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.097569
2,No log,3.953561
3,4.741200,3.849168


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l134.pkl

🔁 Training curriculum level 134


Map:   0%|          | 0/22 [00:00<?, ? examples/s]

20 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.981753
2,No log,5.027887
3,5.097000,5.032565


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l135.pkl

🔁 Training curriculum level 135


Map:   0%|          | 0/93 [00:00<?, ? examples/s]

88 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.1997,4.852912
2,4.1281,4.748581
3,3.5613,4.764081


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l136.pkl

🔁 Training curriculum level 136


Map:   0%|          | 0/16 [00:00<?, ? examples/s]

15 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.647625
2,No log,5.661353
3,No log,5.651954


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l137.pkl

🔁 Training curriculum level 137


Map:   0%|          | 0/39 [00:00<?, ? examples/s]

37 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.701061
2,4.618800,3.71802
3,4.618800,3.682512


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l138.pkl

🔁 Training curriculum level 138


Map:   0%|          | 0/19 [00:00<?, ? examples/s]

18 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.031678
2,No log,4.18276
3,3.161200,4.225542


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l139.pkl

🔁 Training curriculum level 139


Map:   0%|          | 0/93 [00:00<?, ? examples/s]

88 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.5102,3.793798
2,4.0236,3.468028
3,3.5852,3.333442


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l140.pkl

🔁 Training curriculum level 140


Map:   0%|          | 0/16 [00:00<?, ? examples/s]

15 train | 1 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.773352
2,No log,3.722538
3,No log,3.718434


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l141.pkl

🔁 Training curriculum level 141


Map:   0%|          | 0/99 [00:00<?, ? examples/s]

94 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,5.444,4.813148
2,4.6347,4.561361
3,5.1581,4.511467


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l142.pkl

🔁 Training curriculum level 142


Map:   0%|          | 0/27 [00:00<?, ? examples/s]

25 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.677622
2,No log,4.767074
3,4.252400,4.793379


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l143.pkl

🔁 Training curriculum level 143


Map:   0%|          | 0/67 [00:00<?, ? examples/s]

63 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.776526
2,4.893500,5.714984
3,4.324900,5.713


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v1_l144.pkl

🔁 Training curriculum level 144


Map:   0%|          | 0/67 [00:00<?, ? examples/s]

63 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.888448
2,4.124000,4.768965
3,3.731200,4.730197


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l1.pkl

🔁 Training curriculum level 145


Map:   0%|          | 0/67 [00:00<?, ? examples/s]

63 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.488882
2,4.853200,5.398634
3,4.662900,5.362403


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l2.pkl

🔁 Training curriculum level 146


Map:   0%|          | 0/78 [00:00<?, ? examples/s]

74 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,5.3121,4.163671
2,5.0628,4.130285
3,4.8732,4.162151


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l3.pkl

🔁 Training curriculum level 147


Map:   0%|          | 0/89 [00:00<?, ? examples/s]

84 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.5382,4.535699
2,4.1647,4.446155
3,3.8809,4.41477


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l4.pkl

🔁 Training curriculum level 148


Map:   0%|          | 0/70 [00:00<?, ? examples/s]

66 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4702,4.393434
2,4.4117,4.136923
3,4.1232,4.104982


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l5.pkl

🔁 Training curriculum level 149


Map:   0%|          | 0/56 [00:00<?, ? examples/s]

53 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.341169
2,5.140100,6.311915
3,4.431700,6.355831


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l6.pkl

🔁 Training curriculum level 150


Map:   0%|          | 0/50 [00:00<?, ? examples/s]

47 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,2.908347
2,5.220600,2.954509
3,5.220600,2.984096


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l7.pkl

🔁 Training curriculum level 151


Map:   0%|          | 0/69 [00:00<?, ? examples/s]

65 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.8251,3.915725
2,4.1223,3.899793
3,3.9302,3.86909


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l8.pkl

🔁 Training curriculum level 152


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

67 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,5.0856,4.797251
2,4.384,4.736978
3,4.6096,4.726583


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l9.pkl

🔁 Training curriculum level 153


Map:   0%|          | 0/76 [00:00<?, ? examples/s]

72 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.7015,4.614583
2,4.4142,4.609653
3,4.2161,4.597946


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l10.pkl

🔁 Training curriculum level 154


Map:   0%|          | 0/109 [00:00<?, ? examples/s]

103 train | 6 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.5603,5.071434
2,4.5394,4.961194
3,4.2618,4.93404


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l11.pkl

🔁 Training curriculum level 155


Map:   0%|          | 0/68 [00:00<?, ? examples/s]

64 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.614151
2,4.552400,4.558386
3,4.463200,4.564522


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l12.pkl

🔁 Training curriculum level 156


Map:   0%|          | 0/78 [00:00<?, ? examples/s]

74 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.6005,4.262625
2,4.2321,4.38892
3,4.0968,4.390071


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l13.pkl

🔁 Training curriculum level 157


Map:   0%|          | 0/107 [00:00<?, ? examples/s]

101 train | 6 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4244,3.596369
2,4.3338,3.510682
3,3.9948,3.475701


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l14.pkl

🔁 Training curriculum level 158


Map:   0%|          | 0/84 [00:00<?, ? examples/s]

79 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.8241,4.517251
2,4.5396,4.463104
3,4.4188,4.446149


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l15.pkl

🔁 Training curriculum level 159


Map:   0%|          | 0/102 [00:00<?, ? examples/s]

96 train | 6 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.8041,5.02604
2,4.5055,5.020523
3,4.3656,5.007156


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l16.pkl

🔁 Training curriculum level 160


Map:   0%|          | 0/83 [00:00<?, ? examples/s]

78 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.9713,5.229792
2,4.6237,4.982216
3,4.4987,4.934243


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l17.pkl

🔁 Training curriculum level 161


Map:   0%|          | 0/82 [00:00<?, ? examples/s]

77 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.6721,4.855238
2,4.3581,4.9143
3,4.1746,4.951469


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l18.pkl

🔁 Training curriculum level 162


Map:   0%|          | 0/81 [00:00<?, ? examples/s]

76 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.2094,2.796838
2,3.9611,2.817584
3,3.7838,2.832427


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l19.pkl

🔁 Training curriculum level 163


Map:   0%|          | 0/72 [00:00<?, ? examples/s]

68 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.6284,3.572792
2,4.4825,3.585066
3,4.1184,3.608695


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l20.pkl

🔁 Training curriculum level 164


Map:   0%|          | 0/95 [00:00<?, ? examples/s]

90 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.7364,3.600145
2,4.6496,3.586286
3,4.3691,3.59419


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l21.pkl

🔁 Training curriculum level 165


Map:   0%|          | 0/88 [00:00<?, ? examples/s]

83 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4883,4.508824
2,4.1554,4.545084
3,4.0155,4.556993


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l22.pkl

🔁 Training curriculum level 166


Map:   0%|          | 0/90 [00:00<?, ? examples/s]

85 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.6315,5.276338
2,4.5911,5.089248
3,4.3178,5.072214


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l23.pkl

🔁 Training curriculum level 167


Map:   0%|          | 0/70 [00:00<?, ? examples/s]

66 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4503,3.707725
2,4.0252,3.730021
3,3.8504,3.764982


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l24.pkl

🔁 Training curriculum level 168


Map:   0%|          | 0/92 [00:00<?, ? examples/s]

87 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4861,4.479669
2,4.2888,4.613056
3,4.2316,4.637055


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l25.pkl

🔁 Training curriculum level 169


Map:   0%|          | 0/81 [00:00<?, ? examples/s]

76 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.618,4.550695
2,4.3716,4.356432
3,4.2112,4.322932


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l26.pkl

🔁 Training curriculum level 170


Map:   0%|          | 0/76 [00:00<?, ? examples/s]

72 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.7666,4.346136
2,4.3913,4.242915
3,4.1877,4.261719


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l27.pkl

🔁 Training curriculum level 171


Map:   0%|          | 0/67 [00:00<?, ? examples/s]

63 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.290885
2,4.705400,4.197331
3,4.537500,4.169433


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l28.pkl

🔁 Training curriculum level 172


Map:   0%|          | 0/67 [00:00<?, ? examples/s]

63 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.109926
2,4.590800,4.032032
3,4.267400,4.036957


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l29.pkl

🔁 Training curriculum level 173


Map:   0%|          | 0/62 [00:00<?, ? examples/s]

58 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.324819
2,5.078900,4.312964
3,4.639900,4.290672


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l30.pkl

🔁 Training curriculum level 174


Map:   0%|          | 0/67 [00:00<?, ? examples/s]

63 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.245427
2,4.573000,5.173116
3,4.266800,5.214123


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l31.pkl

🔁 Training curriculum level 175


Map:   0%|          | 0/68 [00:00<?, ? examples/s]

64 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.837638
2,4.132400,3.822941
3,4.161700,3.830027


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l32.pkl

🔁 Training curriculum level 176


Map:   0%|          | 0/73 [00:00<?, ? examples/s]

69 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.0551,4.00277
2,4.034,3.933827
3,3.593,3.900035


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l33.pkl

🔁 Training curriculum level 177


Map:   0%|          | 0/85 [00:00<?, ? examples/s]

80 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.5207,4.476449
2,4.2353,4.390335
3,4.1282,4.376487


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l34.pkl

🔁 Training curriculum level 178


Map:   0%|          | 0/88 [00:00<?, ? examples/s]

83 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4243,3.929271
2,4.1055,3.941253
3,3.8424,3.935396


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l35.pkl

🔁 Training curriculum level 179


Map:   0%|          | 0/59 [00:00<?, ? examples/s]

56 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.752926
2,4.662400,3.759308
3,4.216400,3.805072


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l36.pkl

🔁 Training curriculum level 180


Map:   0%|          | 0/72 [00:00<?, ? examples/s]

68 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.7082,3.366765
2,4.374,3.408834
3,4.0613,3.405459


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l37.pkl

🔁 Training curriculum level 181


Map:   0%|          | 0/66 [00:00<?, ? examples/s]

62 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.204183
2,4.539300,4.084652
3,4.303300,4.059854


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l38.pkl

🔁 Training curriculum level 182


Map:   0%|          | 0/62 [00:00<?, ? examples/s]

58 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.229664
2,4.343700,4.172001
3,4.000500,4.131919


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l39.pkl

🔁 Training curriculum level 183


Map:   0%|          | 0/61 [00:00<?, ? examples/s]

57 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,1.900807
2,4.566600,1.796878
3,4.197600,1.780935


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l40.pkl

🔁 Training curriculum level 184


Map:   0%|          | 0/86 [00:00<?, ? examples/s]

81 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.8042,5.782761
2,4.513,5.758314
3,4.8157,5.814899


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l41.pkl

🔁 Training curriculum level 185


Map:   0%|          | 0/78 [00:00<?, ? examples/s]

74 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.0408,4.314903
2,3.8516,4.335989
3,3.7211,4.327017


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l42.pkl

🔁 Training curriculum level 186


Map:   0%|          | 0/82 [00:00<?, ? examples/s]

77 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.3947,3.073763
2,4.0743,3.075728
3,3.976,3.0448


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l43.pkl

🔁 Training curriculum level 187


Map:   0%|          | 0/67 [00:00<?, ? examples/s]

63 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.786173
2,4.732000,4.735888
3,4.490100,4.698443


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l44.pkl

🔁 Training curriculum level 188


Map:   0%|          | 0/76 [00:00<?, ? examples/s]

72 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4828,3.394033
2,4.2246,3.395196
3,4.0659,3.387516


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l45.pkl

🔁 Training curriculum level 189


Map:   0%|          | 0/69 [00:00<?, ? examples/s]

65 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,5.1592,4.493443
2,4.2471,4.349887
3,4.2756,4.331425


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l46.pkl

🔁 Training curriculum level 190


Map:   0%|          | 0/85 [00:00<?, ? examples/s]

80 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.5024,4.136672
2,4.2382,4.122388
3,4.1224,4.101796


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l47.pkl

🔁 Training curriculum level 191


Map:   0%|          | 0/70 [00:00<?, ? examples/s]

66 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.6835,4.659538
2,4.4939,4.583139
3,4.5491,4.561098


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l48.pkl

🔁 Training curriculum level 192


Map:   0%|          | 0/75 [00:00<?, ? examples/s]

71 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.302,4.433999
2,3.9776,4.375052
3,3.9151,4.361323


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l49.pkl

🔁 Training curriculum level 193


Map:   0%|          | 0/54 [00:00<?, ? examples/s]

51 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.207695
2,3.845400,4.175325
3,3.306200,4.183507


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l50.pkl

🔁 Training curriculum level 194


Map:   0%|          | 0/69 [00:00<?, ? examples/s]

65 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.8767,3.828933
2,4.3259,3.709254
3,4.4632,3.693978


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l51.pkl

🔁 Training curriculum level 195


Map:   0%|          | 0/61 [00:00<?, ? examples/s]

57 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.334352
2,4.306300,3.273845
3,4.307900,3.288972


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l52.pkl

🔁 Training curriculum level 196


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

67 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.6881,4.29614
2,4.5807,4.285637
3,4.3412,4.272048


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l53.pkl

🔁 Training curriculum level 197


Map:   0%|          | 0/68 [00:00<?, ? examples/s]

64 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.351725
2,4.975900,4.219232
3,5.094100,4.207414


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l54.pkl

🔁 Training curriculum level 198


Map:   0%|          | 0/63 [00:00<?, ? examples/s]

59 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.676404
2,4.409800,5.788368
3,4.147400,5.794841


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l55.pkl

🔁 Training curriculum level 199


Map:   0%|          | 0/62 [00:00<?, ? examples/s]

58 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.919493
2,4.375400,4.887413
3,4.024500,4.838468


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l56.pkl

🔁 Training curriculum level 200


Map:   0%|          | 0/77 [00:00<?, ? examples/s]

73 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.1261,4.762336
2,3.9496,4.58653
3,3.8113,4.573364


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l57.pkl

🔁 Training curriculum level 201


Map:   0%|          | 0/68 [00:00<?, ? examples/s]

64 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,2.454996
2,4.199300,2.39055
3,3.823000,2.364276


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l58.pkl

🔁 Training curriculum level 202


Map:   0%|          | 0/58 [00:00<?, ? examples/s]

55 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.723316
2,3.970200,4.676384
3,3.840400,4.694227


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l59.pkl

🔁 Training curriculum level 203


Map:   0%|          | 0/73 [00:00<?, ? examples/s]

69 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.1577,3.515761
2,4.0805,3.42405
3,3.8329,3.409805


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l60.pkl

🔁 Training curriculum level 204


Map:   0%|          | 0/73 [00:00<?, ? examples/s]

69 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.2665,5.132251
2,3.9667,5.158484
3,3.7057,5.163949


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l61.pkl

🔁 Training curriculum level 205


Map:   0%|          | 0/56 [00:00<?, ? examples/s]

53 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.207924
2,4.298100,3.215344
3,3.737500,3.22326


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l62.pkl

🔁 Training curriculum level 206


Map:   0%|          | 0/56 [00:00<?, ? examples/s]

53 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.68394
2,4.898900,5.405352
3,4.377400,5.388931


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l63.pkl

🔁 Training curriculum level 207


Map:   0%|          | 0/101 [00:00<?, ? examples/s]

95 train | 6 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.5654,3.537413
2,4.3846,3.428532
3,4.1345,3.423891


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l64.pkl

🔁 Training curriculum level 208


Map:   0%|          | 0/64 [00:00<?, ? examples/s]

60 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.971887
2,4.983000,3.955547
3,4.438000,3.957474


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l65.pkl

🔁 Training curriculum level 209


Map:   0%|          | 0/60 [00:00<?, ? examples/s]

57 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.105736
2,4.183800,4.044981
3,3.782100,4.087803


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l66.pkl

🔁 Training curriculum level 210


Map:   0%|          | 0/76 [00:00<?, ? examples/s]

72 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4767,3.04031
2,4.067,3.160258
3,4.006,3.203212


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l67.pkl

🔁 Training curriculum level 211


Map:   0%|          | 0/73 [00:00<?, ? examples/s]

69 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,5.1328,3.364441
2,4.698,3.401935
3,4.7421,3.387845


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l68.pkl

🔁 Training curriculum level 212


Map:   0%|          | 0/68 [00:00<?, ? examples/s]

64 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.022033
2,4.859900,3.01198
3,4.383500,3.026484


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l69.pkl

🔁 Training curriculum level 213


Map:   0%|          | 0/63 [00:00<?, ? examples/s]

59 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.078144
2,4.449600,4.137292
3,3.891100,4.181684


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l70.pkl

🔁 Training curriculum level 214


Map:   0%|          | 0/54 [00:00<?, ? examples/s]

51 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.852163
2,4.276000,3.899307
3,4.002300,3.901749


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l71.pkl

🔁 Training curriculum level 215


Map:   0%|          | 0/66 [00:00<?, ? examples/s]

62 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.999601
2,4.688900,3.947741
3,4.647200,3.975813


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l72.pkl

🔁 Training curriculum level 216


Map:   0%|          | 0/35 [00:00<?, ? examples/s]

33 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.651672
2,5.740000,5.546703
3,5.740000,5.559945


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l73.pkl

🔁 Training curriculum level 217


Map:   0%|          | 0/81 [00:00<?, ? examples/s]

76 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.8039,4.279212
2,4.5599,4.360409
3,4.4443,4.321255


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l74.pkl

🔁 Training curriculum level 218


Map:   0%|          | 0/62 [00:00<?, ? examples/s]

58 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.676474
2,4.024000,4.733568
3,3.689200,4.764598


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l75.pkl

🔁 Training curriculum level 219


Map:   0%|          | 0/57 [00:00<?, ? examples/s]

54 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.514843
2,4.981200,4.628019
3,4.528200,4.61095


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l76.pkl

🔁 Training curriculum level 220


Map:   0%|          | 0/67 [00:00<?, ? examples/s]

63 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.617872
2,4.654300,4.647915
3,4.525700,4.642628


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l77.pkl

🔁 Training curriculum level 221


Map:   0%|          | 0/73 [00:00<?, ? examples/s]

69 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.2023,3.014483
2,4.0399,2.993087
3,3.8146,3.020001


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l78.pkl

🔁 Training curriculum level 222


Map:   0%|          | 0/64 [00:00<?, ? examples/s]

60 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.291536
2,4.208300,5.177911
3,4.180100,5.185604


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l79.pkl

🔁 Training curriculum level 223


Map:   0%|          | 0/76 [00:00<?, ? examples/s]

72 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4608,3.572124
2,4.1736,3.553022
3,4.0294,3.541897


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l80.pkl

🔁 Training curriculum level 224


Map:   0%|          | 0/70 [00:00<?, ? examples/s]

66 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.7637,6.047644
2,4.5824,5.904723
3,4.284,5.930406


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l81.pkl

🔁 Training curriculum level 225


Map:   0%|          | 0/61 [00:00<?, ? examples/s]

57 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,2.982082
2,4.055300,2.823496
3,3.682100,2.827813


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l82.pkl

🔁 Training curriculum level 226


Map:   0%|          | 0/54 [00:00<?, ? examples/s]

51 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.028996
2,4.573900,3.980429
3,4.478400,3.982081


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l83.pkl

🔁 Training curriculum level 227


Map:   0%|          | 0/66 [00:00<?, ? examples/s]

62 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.551051
2,4.355600,4.746304
3,4.288000,4.750592


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l84.pkl

🔁 Training curriculum level 228


Map:   0%|          | 0/49 [00:00<?, ? examples/s]

46 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.472479
2,4.431700,3.396201
3,4.431700,3.3793


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l85.pkl

🔁 Training curriculum level 229


Map:   0%|          | 0/67 [00:00<?, ? examples/s]

63 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.866924
2,4.413100,4.728978
3,3.891400,4.703746


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l86.pkl

🔁 Training curriculum level 230


Map:   0%|          | 0/54 [00:00<?, ? examples/s]

51 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.08467
2,4.684400,4.161942
3,4.111800,4.097467


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l87.pkl

🔁 Training curriculum level 231


Map:   0%|          | 0/68 [00:00<?, ? examples/s]

64 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.566524
2,4.169100,4.655229
3,4.193400,4.602504


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l88.pkl

🔁 Training curriculum level 232


Map:   0%|          | 0/63 [00:00<?, ? examples/s]

59 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.040969
2,4.698900,3.038382
3,4.235400,3.007837


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l89.pkl

🔁 Training curriculum level 233


Map:   0%|          | 0/70 [00:00<?, ? examples/s]

66 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4313,3.862693
2,4.3422,3.76923
3,4.5934,3.797573


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l90.pkl

🔁 Training curriculum level 234


Map:   0%|          | 0/63 [00:00<?, ? examples/s]

59 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.543627
2,3.890900,4.49909
3,3.671300,4.482279


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l91.pkl

🔁 Training curriculum level 235


Map:   0%|          | 0/69 [00:00<?, ? examples/s]

65 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4706,4.596576
2,4.0479,4.718495
3,4.2187,4.709664


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l92.pkl

🔁 Training curriculum level 236


Map:   0%|          | 0/52 [00:00<?, ? examples/s]

49 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.693735
2,4.111300,5.723686
3,4.562500,5.739374


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l93.pkl

🔁 Training curriculum level 237


Map:   0%|          | 0/85 [00:00<?, ? examples/s]

80 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.5179,3.656964
2,4.2311,3.713522
3,4.0536,3.778479


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l94.pkl

🔁 Training curriculum level 238


Map:   0%|          | 0/73 [00:00<?, ? examples/s]

69 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.3782,5.891001
2,4.417,5.788576
3,4.1636,5.621134


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v2_l95.pkl

🔁 Training curriculum level 239


Map:   0%|          | 0/73 [00:00<?, ? examples/s]

69 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.1279,3.489908
2,3.7752,3.500908
3,3.8843,3.564384


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l1.pkl

🔁 Training curriculum level 240


Map:   0%|          | 0/61 [00:00<?, ? examples/s]

57 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.475037
2,4.692300,4.577467
3,4.501400,4.589304


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l2.pkl

🔁 Training curriculum level 241


Map:   0%|          | 0/62 [00:00<?, ? examples/s]

58 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.36533
2,4.827800,5.355017
3,4.511100,5.370508


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l3.pkl

🔁 Training curriculum level 242


Map:   0%|          | 0/48 [00:00<?, ? examples/s]

45 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.413414
2,5.215700,4.512426
3,5.215700,4.551099


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l4.pkl

🔁 Training curriculum level 243


Map:   0%|          | 0/59 [00:00<?, ? examples/s]

56 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.108792
2,4.852400,4.243382
3,4.230300,4.21457


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l5.pkl

🔁 Training curriculum level 244


Map:   0%|          | 0/56 [00:00<?, ? examples/s]

53 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.597675
2,4.470400,4.533224
3,4.318500,4.540791


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l6.pkl

🔁 Training curriculum level 245


Map:   0%|          | 0/52 [00:00<?, ? examples/s]

49 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.017143
2,4.777500,3.977801
3,3.752600,3.948413


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l7.pkl

🔁 Training curriculum level 246


Map:   0%|          | 0/64 [00:00<?, ? examples/s]

60 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.513592
2,4.409900,3.481652
3,4.446700,3.470505


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l8.pkl

🔁 Training curriculum level 247


Map:   0%|          | 0/63 [00:00<?, ? examples/s]

59 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.586948
2,4.566100,5.536712
3,4.193600,5.508115


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l9.pkl

🔁 Training curriculum level 248


Map:   0%|          | 0/67 [00:00<?, ? examples/s]

63 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.342592
2,4.804800,4.356138
3,4.454900,4.338359


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l10.pkl

🔁 Training curriculum level 249


Map:   0%|          | 0/59 [00:00<?, ? examples/s]

56 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,2.641437
2,4.464600,2.736404
3,4.347100,2.71816


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l11.pkl

🔁 Training curriculum level 250


Map:   0%|          | 0/62 [00:00<?, ? examples/s]

58 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.403575
2,4.113300,3.388619
3,3.931900,3.39552


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l12.pkl

🔁 Training curriculum level 251


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

67 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.2133,4.371041
2,3.9054,4.302289
3,3.8105,4.249356


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l13.pkl

🔁 Training curriculum level 252


Map:   0%|          | 0/62 [00:00<?, ? examples/s]

58 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.526041
2,4.384800,3.531569
3,4.002700,3.541564


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l14.pkl

🔁 Training curriculum level 253


Map:   0%|          | 0/60 [00:00<?, ? examples/s]

57 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.462515
2,4.534100,5.5626
3,4.349900,5.681519


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l15.pkl

🔁 Training curriculum level 254


Map:   0%|          | 0/61 [00:00<?, ? examples/s]

57 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.864341
2,4.488800,3.861128
3,4.004400,3.85003


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l16.pkl

🔁 Training curriculum level 255


Map:   0%|          | 0/65 [00:00<?, ? examples/s]

61 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.457561
2,4.584200,4.455536
3,4.313300,4.477789


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l17.pkl

🔁 Training curriculum level 256


Map:   0%|          | 0/59 [00:00<?, ? examples/s]

56 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.548345
2,4.604600,6.474495
3,4.778200,6.464622


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l18.pkl

🔁 Training curriculum level 257


Map:   0%|          | 0/50 [00:00<?, ? examples/s]

47 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.03171
2,4.621800,4.957805
3,4.621800,4.964204


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l19.pkl

🔁 Training curriculum level 258


Map:   0%|          | 0/52 [00:00<?, ? examples/s]

49 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.172625
2,5.009200,5.190913
3,4.471700,5.178836


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l20.pkl

🔁 Training curriculum level 259


Map:   0%|          | 0/66 [00:00<?, ? examples/s]

62 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.427818
2,4.547700,4.448007
3,4.321700,4.442205


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l21.pkl

🔁 Training curriculum level 260


Map:   0%|          | 0/73 [00:00<?, ? examples/s]

69 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.9301,3.945236
2,4.5377,4.099855
3,4.5346,4.069522


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l22.pkl

🔁 Training curriculum level 261


Map:   0%|          | 0/60 [00:00<?, ? examples/s]

57 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.50583
2,4.361000,4.536016
3,4.477200,4.561766


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l23.pkl

🔁 Training curriculum level 262


Map:   0%|          | 0/64 [00:00<?, ? examples/s]

60 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.13832
2,4.563200,4.108615
3,4.047100,4.098621


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l24.pkl

🔁 Training curriculum level 263


Map:   0%|          | 0/57 [00:00<?, ? examples/s]

54 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,6.044209
2,4.819100,6.034197
3,4.202400,6.055734


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l25.pkl

🔁 Training curriculum level 264


Map:   0%|          | 0/67 [00:00<?, ? examples/s]

63 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.634248
2,4.743900,3.623852
3,4.604300,3.618662


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l26.pkl

🔁 Training curriculum level 265


Map:   0%|          | 0/49 [00:00<?, ? examples/s]

46 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,2.693821
2,4.702200,2.774177
3,4.702200,2.77614


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l27.pkl

🔁 Training curriculum level 266


Map:   0%|          | 0/63 [00:00<?, ? examples/s]

59 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.352293
2,4.246800,5.238416
3,3.658200,5.229761


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l28.pkl

🔁 Training curriculum level 267


Map:   0%|          | 0/64 [00:00<?, ? examples/s]

60 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.483244
2,4.530600,4.562819
3,4.277000,4.570652


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l29.pkl

🔁 Training curriculum level 268


Map:   0%|          | 0/60 [00:00<?, ? examples/s]

57 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.071169
2,4.620500,4.107625
3,4.497400,4.127888


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l30.pkl

🔁 Training curriculum level 269


Map:   0%|          | 0/66 [00:00<?, ? examples/s]

62 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.589303
2,4.205800,3.604355
3,3.971400,3.607094


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l31.pkl

🔁 Training curriculum level 270


Map:   0%|          | 0/64 [00:00<?, ? examples/s]

60 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.751502
2,4.567200,4.791093
3,4.242500,4.818622


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l32.pkl

🔁 Training curriculum level 271


Map:   0%|          | 0/62 [00:00<?, ? examples/s]

58 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.551109
2,4.757300,4.486619
3,4.329600,4.487215


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l33.pkl

🔁 Training curriculum level 272


Map:   0%|          | 0/70 [00:00<?, ? examples/s]

66 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.098,6.771813
2,4.381,6.690054
3,3.6673,6.631604


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l34.pkl

🔁 Training curriculum level 273


Map:   0%|          | 0/60 [00:00<?, ? examples/s]

57 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.052644
2,4.492400,4.106485
3,4.289700,4.12078


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l35.pkl

🔁 Training curriculum level 274


Map:   0%|          | 0/59 [00:00<?, ? examples/s]

56 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.306512
2,4.880100,4.345732
3,4.653900,4.362723


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l36.pkl

🔁 Training curriculum level 275


Map:   0%|          | 0/53 [00:00<?, ? examples/s]

50 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,2.556135
2,4.865900,2.727613
3,4.357000,2.710065


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l37.pkl

🔁 Training curriculum level 276


Map:   0%|          | 0/74 [00:00<?, ? examples/s]

70 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4336,4.226645
2,4.4592,4.13008
3,4.4067,4.131293


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l38.pkl

🔁 Training curriculum level 277


Map:   0%|          | 0/42 [00:00<?, ? examples/s]

39 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,2.695369
2,4.314700,2.63623
3,4.314700,2.599261


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l39.pkl

🔁 Training curriculum level 278


Map:   0%|          | 0/69 [00:00<?, ? examples/s]

65 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,3.7334,5.877854
2,4.1324,5.941839
3,4.5288,5.94634


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l40.pkl

🔁 Training curriculum level 279


Map:   0%|          | 0/59 [00:00<?, ? examples/s]

56 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.255769
2,4.677700,4.258955
3,4.615200,4.25947


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l41.pkl

🔁 Training curriculum level 280


Map:   0%|          | 0/55 [00:00<?, ? examples/s]

52 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.014729
2,4.610500,4.993921
3,4.318700,4.965718


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l42.pkl

🔁 Training curriculum level 281


Map:   0%|          | 0/56 [00:00<?, ? examples/s]

53 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.800939
2,5.044800,4.812343
3,4.896500,4.817818


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l43.pkl

🔁 Training curriculum level 282


Map:   0%|          | 0/60 [00:00<?, ? examples/s]

57 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.434153
2,4.378200,3.3794
3,4.120400,3.341359


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l44.pkl

🔁 Training curriculum level 283


Map:   0%|          | 0/66 [00:00<?, ? examples/s]

62 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.29431
2,4.716600,4.241672
3,4.517800,4.263757


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l45.pkl

🔁 Training curriculum level 284


Map:   0%|          | 0/58 [00:00<?, ? examples/s]

55 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.023765
2,4.233900,3.949386
3,3.936000,3.926103


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l46.pkl

🔁 Training curriculum level 285


Map:   0%|          | 0/70 [00:00<?, ? examples/s]

66 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,5.2927,4.783801
2,4.4128,4.931613
3,4.2087,4.922235


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l47.pkl

🔁 Training curriculum level 286


Map:   0%|          | 0/55 [00:00<?, ? examples/s]

52 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.64726
2,4.460000,4.661069
3,4.413200,4.668983


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l48.pkl

🔁 Training curriculum level 287


Map:   0%|          | 0/68 [00:00<?, ? examples/s]

64 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.652394
2,4.752600,3.711241
3,4.401200,3.71572


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l49.pkl

🔁 Training curriculum level 288


Map:   0%|          | 0/50 [00:00<?, ? examples/s]

47 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.184239
2,5.256700,4.265529
3,5.256700,4.255884


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l50.pkl

🔁 Training curriculum level 289


Map:   0%|          | 0/58 [00:00<?, ? examples/s]

55 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.199811
2,4.414000,3.213968
3,4.057300,3.223925


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l51.pkl

🔁 Training curriculum level 290


Map:   0%|          | 0/61 [00:00<?, ? examples/s]

57 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.844681
2,5.166200,4.82915
3,4.622200,4.843304


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l52.pkl

🔁 Training curriculum level 291


Map:   0%|          | 0/65 [00:00<?, ? examples/s]

61 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.260351
2,4.421800,4.310714
3,4.120100,4.314866


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l53.pkl

🔁 Training curriculum level 292


Map:   0%|          | 0/56 [00:00<?, ? examples/s]

53 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.753652
2,4.432500,4.625366
3,4.259000,4.601536


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l54.pkl

🔁 Training curriculum level 293


Map:   0%|          | 0/62 [00:00<?, ? examples/s]

58 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.531569
2,4.724300,4.610625
3,4.454900,4.673069


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l55.pkl

🔁 Training curriculum level 294


Map:   0%|          | 0/66 [00:00<?, ? examples/s]

62 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.133503
2,4.718500,5.194634
3,4.219100,5.185153


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l56.pkl

🔁 Training curriculum level 295


Map:   0%|          | 0/57 [00:00<?, ? examples/s]

54 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.993405
2,4.629200,4.999436
3,4.318800,5.001651


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l57.pkl

🔁 Training curriculum level 296


Map:   0%|          | 0/61 [00:00<?, ? examples/s]

57 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.657394
2,4.454500,3.57595
3,4.149400,3.552082


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l58.pkl

🔁 Training curriculum level 297


Map:   0%|          | 0/49 [00:00<?, ? examples/s]

46 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.445506
2,4.785700,3.469832
3,4.785700,3.489638


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l59.pkl

🔁 Training curriculum level 298


Map:   0%|          | 0/54 [00:00<?, ? examples/s]

51 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.866166
2,4.738500,4.805264
3,4.250100,4.763215


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v3_l60.pkl

🔁 Training curriculum level 299


Map:   0%|          | 0/65 [00:00<?, ? examples/s]

61 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.860065
2,4.796400,4.746289
3,4.476200,4.752383


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l1.pkl

🔁 Training curriculum level 300


Map:   0%|          | 0/71 [00:00<?, ? examples/s]

67 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.9509,3.862144
2,4.621,3.833725
3,4.4853,3.811231


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l2.pkl

🔁 Training curriculum level 301


Map:   0%|          | 0/63 [00:00<?, ? examples/s]

59 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.104351
2,4.201900,5.058704
3,4.029900,5.057975


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l3.pkl

🔁 Training curriculum level 302


Map:   0%|          | 0/57 [00:00<?, ? examples/s]

54 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.052114
2,4.758700,5.040701
3,4.640700,5.049638


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l4.pkl

🔁 Training curriculum level 303


Map:   0%|          | 0/64 [00:00<?, ? examples/s]

60 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.170422
2,4.631400,3.13025
3,4.388100,3.121397


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l5.pkl

🔁 Training curriculum level 304


Map:   0%|          | 0/86 [00:00<?, ? examples/s]

81 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.3431,4.427357
2,4.1769,4.441517
3,4.2616,4.414291


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l6.pkl

🔁 Training curriculum level 305


Map:   0%|          | 0/56 [00:00<?, ? examples/s]

53 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.741958
2,4.472600,4.698087
3,3.844300,4.699646


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l7.pkl

🔁 Training curriculum level 306


Map:   0%|          | 0/56 [00:00<?, ? examples/s]

53 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.4475
2,4.446100,3.475984
3,4.083900,3.446435


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l8.pkl

🔁 Training curriculum level 307


Map:   0%|          | 0/34 [00:00<?, ? examples/s]

32 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.050011
2,No log,4.264862
3,4.959300,4.304603


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l9.pkl

🔁 Training curriculum level 308


Map:   0%|          | 0/74 [00:00<?, ? examples/s]

70 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.7295,5.831873
2,4.4926,5.746584
3,4.4494,5.723054


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l10.pkl

🔁 Training curriculum level 309


Map:   0%|          | 0/47 [00:00<?, ? examples/s]

44 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.072845
2,5.011800,4.111171
3,5.011800,4.101469


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l11.pkl

🔁 Training curriculum level 310


Map:   0%|          | 0/35 [00:00<?, ? examples/s]

33 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.809694
2,4.152500,4.861584
3,4.152500,4.888121


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l12.pkl

🔁 Training curriculum level 311


Map:   0%|          | 0/81 [00:00<?, ? examples/s]

76 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.6304,3.056569
2,4.2886,3.112748
3,4.1436,3.105702


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l13.pkl

🔁 Training curriculum level 312


Map:   0%|          | 0/80 [00:00<?, ? examples/s]

76 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,3.9351,2.49455
2,3.7366,2.455834
3,3.6299,2.493828


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l14.pkl

🔁 Training curriculum level 313


Map:   0%|          | 0/66 [00:00<?, ? examples/s]

62 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.402408
2,4.664000,3.222855
3,4.714200,3.213009


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l15.pkl

🔁 Training curriculum level 314


Map:   0%|          | 0/58 [00:00<?, ? examples/s]

55 train | 3 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,3.549356
2,4.423600,3.563701
3,4.093700,3.570246


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l16.pkl

🔁 Training curriculum level 315


Map:   0%|          | 0/72 [00:00<?, ? examples/s]

68 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.7139,4.513974
2,4.12,4.618409
3,3.8783,4.642042


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l17.pkl

🔁 Training curriculum level 316


Map:   0%|          | 0/88 [00:00<?, ? examples/s]

83 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.3832,7.051634
2,3.6076,7.016658
3,3.7983,7.048532


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l18.pkl

🔁 Training curriculum level 317


Map:   0%|          | 0/100 [00:00<?, ? examples/s]

95 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,5.0884,5.439578
2,5.0104,5.358461
3,4.6089,5.366102


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l19.pkl

🔁 Training curriculum level 318


Map:   0%|          | 0/83 [00:00<?, ? examples/s]

78 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.7106,4.873305
2,4.4716,4.738266
3,4.4036,4.700072


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l20.pkl

🔁 Training curriculum level 319


Map:   0%|          | 0/38 [00:00<?, ? examples/s]

36 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.555634
2,4.374500,5.500821
3,4.374500,5.452185


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l21.pkl

🔁 Training curriculum level 320


Map:   0%|          | 0/81 [00:00<?, ? examples/s]

76 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.523,3.300492
2,4.264,3.31044
3,4.1926,3.354047


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l22.pkl

🔁 Training curriculum level 321


Map:   0%|          | 0/81 [00:00<?, ? examples/s]

76 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.6351,4.617883
2,4.3237,4.595663
3,4.2595,4.590376


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l23.pkl

🔁 Training curriculum level 322


Map:   0%|          | 0/80 [00:00<?, ? examples/s]

76 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.9493,3.40831
2,4.6858,3.342417
3,4.5706,3.339457


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l24.pkl

🔁 Training curriculum level 323


Map:   0%|          | 0/78 [00:00<?, ? examples/s]

74 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.5299,4.814043
2,4.3266,4.784694
3,4.2542,4.845236


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l25.pkl

🔁 Training curriculum level 324


Map:   0%|          | 0/88 [00:00<?, ? examples/s]

83 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.6972,4.819825
2,3.9311,4.71873
3,4.5136,4.686061


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l26.pkl

🔁 Training curriculum level 325


Map:   0%|          | 0/80 [00:00<?, ? examples/s]

76 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.7509,4.672305
2,4.506,4.681688
3,4.4034,4.67825


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l27.pkl

🔁 Training curriculum level 326


Map:   0%|          | 0/92 [00:00<?, ? examples/s]

87 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.9817,5.927673
2,4.7446,5.804419
3,4.4875,5.770906


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l28.pkl

🔁 Training curriculum level 327


Map:   0%|          | 0/87 [00:00<?, ? examples/s]

82 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.0405,4.004666
2,3.7391,4.064286
3,3.5113,4.086538


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l29.pkl

🔁 Training curriculum level 328


Map:   0%|          | 0/86 [00:00<?, ? examples/s]

81 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.8064,5.085283
2,4.7512,5.06878
3,4.2749,5.078811


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l30.pkl

🔁 Training curriculum level 329


Map:   0%|          | 0/97 [00:00<?, ? examples/s]

92 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.1878,4.147404
2,4.2515,4.077272
3,3.7835,4.043625


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l31.pkl

🔁 Training curriculum level 330


Map:   0%|          | 0/81 [00:00<?, ? examples/s]

76 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.9172,5.382058
2,4.5283,5.390683
3,4.4503,5.399441


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l32.pkl

🔁 Training curriculum level 331


Map:   0%|          | 0/90 [00:00<?, ? examples/s]

85 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.8303,6.233069
2,4.552,6.147583
3,4.5034,6.118696


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l33.pkl

🔁 Training curriculum level 332


Map:   0%|          | 0/98 [00:00<?, ? examples/s]

93 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,5.02,4.149417
2,4.9152,4.144811
3,4.6608,4.129337


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l34.pkl

🔁 Training curriculum level 333


Map:   0%|          | 0/98 [00:00<?, ? examples/s]

93 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.8509,4.059557
2,4.6242,4.007556
3,4.569,3.980706


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l35.pkl

🔁 Training curriculum level 334


Map:   0%|          | 0/98 [00:00<?, ? examples/s]

93 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.5413,5.895357
2,4.3463,5.841806
3,4.3434,5.844038


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l36.pkl

🔁 Training curriculum level 335


Map:   0%|          | 0/75 [00:00<?, ? examples/s]

71 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,3.9772,4.325546
2,3.8584,4.270058
3,3.7792,4.258785


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l37.pkl

🔁 Training curriculum level 336


Map:   0%|          | 0/99 [00:00<?, ? examples/s]

94 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,5.1085,5.241001
2,4.8849,5.13677
3,4.7814,5.148464


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l38.pkl

🔁 Training curriculum level 337


Map:   0%|          | 0/121 [00:00<?, ? examples/s]

114 train | 7 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.9153,4.831452
2,4.2937,4.754376
3,4.4169,4.775005


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l39.pkl

🔁 Training curriculum level 338


Map:   0%|          | 0/85 [00:00<?, ? examples/s]

80 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.9266,3.830314
2,4.639,3.751349
3,4.5392,3.747422


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l40.pkl

🔁 Training curriculum level 339


Map:   0%|          | 0/78 [00:00<?, ? examples/s]

74 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.8117,4.698019
2,4.4575,4.682478
3,4.4068,4.68047


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l41.pkl

🔁 Training curriculum level 340


Map:   0%|          | 0/22 [00:00<?, ? examples/s]

20 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,4.116405
2,No log,4.137732
3,4.131200,4.101792


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l42.pkl

🔁 Training curriculum level 341


Map:   0%|          | 0/120 [00:00<?, ? examples/s]

114 train | 6 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.6689,5.207698
2,4.3691,5.184107
3,3.9613,5.192619


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l43.pkl

🔁 Training curriculum level 342


Map:   0%|          | 0/92 [00:00<?, ? examples/s]

87 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.8496,5.468338
2,4.6266,5.570011
3,4.5661,5.575787


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l44.pkl

🔁 Training curriculum level 343


Map:   0%|          | 0/99 [00:00<?, ? examples/s]

94 train | 5 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.4327,4.870332
2,4.1768,4.804779
3,4.1602,4.792305


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l45.pkl

🔁 Training curriculum level 344


Map:   0%|          | 0/32 [00:00<?, ? examples/s]

30 train | 2 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,No log,5.578395
2,No log,5.574091
3,4.045900,5.565675


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


▶︎ Lesson v4_l46.pkl

🔁 Training curriculum level 345


Map:   0%|          | 0/70 [00:00<?, ? examples/s]

66 train | 4 val


  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,4.6776,5.580605
2,4.8304,5.531673
3,4.4068,5.49635


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].



🧪 Final Test Generation:
INPUT : They are <extra_id_0> the car at the <extra_id_1>.
OUTPUT: <extra_id_0> a


In [None]:
save_dir = "my_syntax_gpt_model"

model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)


('my_syntax_gpt_model/tokenizer_config.json',
 'my_syntax_gpt_model/special_tokens_map.json',
 'my_syntax_gpt_model/spiece.model',
 'my_syntax_gpt_model/added_tokens.json')

In [None]:
save_dir = "my_syntax_gpt_model"

model = SyntaxT5.from_pretrained(save_dir).to(device)
tokenizer = T5Tokenizer.from_pretrained(save_dir)


# Trec classification

In [None]:
import os, pickle, torch
from datasets import load_dataset, Dataset
from torch.utils.data import Dataset as TorchDataset
from transformers import (
    T5Tokenizer, T5Config, T5ForConditionalGeneration,
    Trainer, TrainingArguments
)
from sklearn.metrics import accuracy_score, recall_score, f1_score
from tqdm import tqdm
import numpy as np

# ─── Device ───
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ─── Tokenizer & Model ───
tokenizer = T5Tokenizer.from_pretrained("t5-base")
label_names = ["ABBR", "ENTY", "DESC", "HUM", "LOC", "NUM"]
label_tokens = [f"<LABEL_{i}>" for i in range(6)]
tokenizer.add_tokens(label_tokens)
model = SyntaxT5.from_pretrained("my_syntax_gpt_model").to(device)
model.resize_token_embeddings(len(tokenizer))
label_map = {i: label_tokens[i] for i in range(6)}  # int → token
inv_label_map = {v: label_names[i] for i, v in label_map.items()}

# ─── Combo utils ───
combo2id, combo_counter = {}, 1
MAX_COMBO_ID, UNKNOWN_COMBO_ID = 1024, 0

def get_combo_id(pos, dep, ner, morph):
    global combo_counter
    key = (pos, dep, ner, morph)
    if key not in combo2id:
        if combo_counter >= MAX_COMBO_ID:
            return UNKNOWN_COMBO_ID
        combo2id[key] = combo_counter
        combo_counter += 1
    return combo2id[key]

def get_char_offset_ner_map(doc):
    ner_map = {}
    for ent in doc.ents:
        for i in range(ent.start_char, ent.end_char):
            ner_map[i] = ent.type
    return ner_map

def extract_combo_ids_from_doc(doc, tokenizer, max_len=64):
    ner_map = get_char_offset_ner_map(doc)
    combo_ids = []
    for sent in doc.sentences:
        for w in sent.words:
            pos, dep = w.upos or "X", w.deprel or "dep"
            morph = w.feats or ""
            start = w.start_char or 0
            ner = ner_map.get(start, "O")
            cid = get_combo_id(pos, dep, ner, morph)
            sub_len = len(tokenizer(w.text).input_ids) - 1
            combo_ids.extend([cid] * sub_len)
            if len(combo_ids) >= max_len:
                break
        if len(combo_ids) >= max_len:
            break
    combo_ids = combo_ids[:max_len]
    combo_ids += [UNKNOWN_COMBO_ID] * (max_len - len(combo_ids))
    return combo_ids

# ─── Load Dataset & Preparsed .pkl ───
trec = load_dataset("CogComp/trec")
with open("trec_train_docs.pkl", "rb") as f: train_docs = pickle.load(f)
with open("trec_test_docs.pkl", "rb") as f:  test_docs  = pickle.load(f)

train_exs, test_exs = [], []
for i, doc in enumerate(train_docs):
    train_exs.append({
        "text": doc.text,
        "coarse_label": trec["train"][i]["coarse_label"],
        "combo_ids": extract_combo_ids_from_doc(doc, tokenizer)
    })
for i, doc in enumerate(test_docs):
    test_exs.append({
        "text": doc.text,
        "coarse_label": trec["test"][i]["coarse_label"],
        "combo_ids": extract_combo_ids_from_doc(doc, tokenizer)
    })

train_ds = Dataset.from_list(train_exs)
test_ds = Dataset.from_list(test_exs)

# ─── Preprocessing ───
def preprocess_clf(examples):
    inputs = [f"{q}" for q in examples["text"]]
    targets = [label_map[l] for l in examples["coarse_label"]]

    model_inputs = tokenizer(inputs, max_length=64, padding="max_length", truncation=True)
    with tokenizer.as_target_tokenizer():
        lbl = tokenizer(targets, max_length=1, padding="max_length", truncation=False)

    model_inputs["labels"] = [
        [seq[0]] + [-100]*(len(seq)-1) for seq in lbl["input_ids"]
    ]
    model_inputs["combo_ids"] = examples["combo_ids"]
    return model_inputs

train_tok = train_ds.map(preprocess_clf, batched=True, remove_columns=["text", "coarse_label", "combo_ids"])
test_tok  = test_ds.map(preprocess_clf, batched=True, remove_columns=["text", "coarse_label", "combo_ids"])
train_tok.set_format("torch")
test_tok.set_format("torch")

# ─── Dataset wrapper ───
class ClfDataset(TorchDataset):
    def __init__(self, ds): self.ds = ds
    def __len__(self): return len(self.ds)
    def __getitem__(self, i):
        ex = self.ds[i]
        return {
            "input_ids": ex["input_ids"],
            "attention_mask": ex["attention_mask"],
            "labels": ex["labels"],
            "combo_ids": ex["combo_ids"]
        }

train_torch = ClfDataset(train_tok)
test_torch = ClfDataset(test_tok)

# ─── Data collator ───
def collate_batch(batch):
    return {
        "input_ids":      torch.stack([b["input_ids"] for b in batch]),
        "attention_mask": torch.stack([b["attention_mask"] for b in batch]),
        "labels":         torch.stack([b["labels"] for b in batch]),
        "combo_ids":      torch.stack([b["combo_ids"] for b in batch]),
    }

from transformers import EvalPrediction

from transformers import EvalPrediction
from sklearn.metrics import accuracy_score, recall_score, f1_score
from sklearn.metrics import accuracy_score, recall_score, f1_score

from sklearn.metrics import accuracy_score, recall_score, f1_score
import numpy as np

def compute_metrics(eval_pred):
    predictions, labels = eval_pred

    # 📌 Handle prediction tuple from Trainer
    if isinstance(predictions, tuple):
        predictions = predictions[0]

    # 📌 If logits are 3D (batch_size, seq_len, vocab_size)
    if predictions.ndim == 3:
        predictions = predictions.argmax(-1)  # take argmax over vocab

    # 📌 Replace -100 label padding with pad_token_id
    labels = np.where(labels == -100, tokenizer.pad_token_id, labels)

    # 📌 Decode
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=False)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=False)

    y_true = []
    y_pred = []

    print("\n🔎 Sample Predictions (first 5):")

    for i, (pred_str, label_str) in enumerate(zip(decoded_preds, decoded_labels)):
        pred_str = pred_str.strip()
        label_str = label_str.strip()

        pred_tokens = pred_str.split()
        label_tokens = label_str.split()

        # 📌 Strictly take FIRST token only
        pred_first = pred_tokens[0] if pred_tokens else ""
        label_first = label_tokens[0] if label_tokens else ""

        # 📌 Map token to real label
        pred_label = inv_label_map.get(pred_first, "???")
        label_label = inv_label_map.get(label_first, "???")

        y_pred.append(pred_label)
        y_true.append(label_label)

        # 📌 Print first 5 examples
        if i < 5:
            print(f"  ➡️  pred: {pred_first} ({pred_label}) | label: {label_first} ({label_label})")

    acc = accuracy_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred, average="macro")
    f1 = f1_score(y_true, y_pred, average="macro")

    return {
        "accuracy": acc,
        "recall": recall,
        "f1": f1,
    }

# ─── Train ───
args = TrainingArguments(
    output_dir="./t5_trec_labeltok",
    eval_strategy="epoch",            # <- Typo fixed: eval_strategy → evaluation_strategy
    save_strategy="epoch",                   # 🔥 Save checkpoint every epoch
    save_total_limit=1,                      # 🔥 Only keep latest best model (optional but nice)
    load_best_model_at_end=True,             # 🔥 Load the best model automatically at the end
    metric_for_best_model="eval_loss",       # 🔥 Use eval_loss for early stopping
    greater_is_better=False,                 # 🔥 Because lower loss is better
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    logging_steps=5,
    learning_rate=1e-5,
    # weight_decay=0.01,
    # warmup_ratio=0.1,
    report_to="none",
    remove_unused_columns=False,
    dataloader_pin_memory=False
)

trainer = Trainer(
    model=model,
    args=args,
    compute_metrics=compute_metrics,
    train_dataset=train_torch,
    eval_dataset=test_torch,
    data_collator=collate_batch
)

trainer.train()
trainer.evaluate()


Using device: cuda


README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

trec.py:   0%|          | 0.00/5.09k [00:00<?, ?B/s]

The repository for CogComp/trec contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/CogComp/trec.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0.00/336k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/23.4k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/5452 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/500 [00:00<?, ? examples/s]

Map:   0%|          | 0/5452 [00:00<?, ? examples/s]



Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,Recall,F1
1,1.0153,0.826639,0.686,0.572225,0.572292
2,0.5407,0.622482,0.796,0.747645,0.769837
3,0.7305,0.463135,0.852,0.795183,0.811478
4,0.401,0.51212,0.842,0.825039,0.829104
5,0.3154,0.454833,0.864,0.844708,0.850305
6,0.3761,0.493758,0.87,0.846288,0.850256
7,0.2335,0.573718,0.848,0.83484,0.808325
8,0.3882,0.607027,0.88,0.855914,0.87072
9,0.176,0.614605,0.86,0.846098,0.813481
10,0.125,0.603744,0.87,0.851911,0.831571



🔎 Sample Predictions (first 5):
  ➡️  pred: <LABEL_2> (DESC) | label: <LABEL_5> (NUM)
  ➡️  pred: <LABEL_4> (LOC) | label: <LABEL_4> (LOC)
  ➡️  pred: <LABEL_3> (HUM) | label: <LABEL_3> (HUM)
  ➡️  pred: <LABEL_2> (DESC) | label: <LABEL_2> (DESC)
  ➡️  pred: <LABEL_5> (NUM) | label: <LABEL_5> (NUM)

🔎 Sample Predictions (first 5):
  ➡️  pred: <LABEL_5> (NUM) | label: <LABEL_5> (NUM)
  ➡️  pred: <LABEL_4> (LOC) | label: <LABEL_4> (LOC)
  ➡️  pred: <LABEL_3> (HUM) | label: <LABEL_3> (HUM)
  ➡️  pred: <LABEL_2> (DESC) | label: <LABEL_2> (DESC)
  ➡️  pred: <LABEL_5> (NUM) | label: <LABEL_5> (NUM)

🔎 Sample Predictions (first 5):
  ➡️  pred: <LABEL_5> (NUM) | label: <LABEL_5> (NUM)
  ➡️  pred: <LABEL_4> (LOC) | label: <LABEL_4> (LOC)
  ➡️  pred: <LABEL_3> (HUM) | label: <LABEL_3> (HUM)
  ➡️  pred: <LABEL_2> (DESC) | label: <LABEL_2> (DESC)
  ➡️  pred: <LABEL_5> (NUM) | label: <LABEL_5> (NUM)

🔎 Sample Predictions (first 5):
  ➡️  pred: <LABEL_5> (NUM) | label: <LABEL_5> (NUM)
  ➡️  pred: <

There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].



🔎 Sample Predictions (first 5):
  ➡️  pred: <LABEL_5> (NUM) | label: <LABEL_5> (NUM)
  ➡️  pred: <LABEL_4> (LOC) | label: <LABEL_4> (LOC)
  ➡️  pred: <LABEL_3> (HUM) | label: <LABEL_3> (HUM)
  ➡️  pred: <LABEL_2> (DESC) | label: <LABEL_2> (DESC)
  ➡️  pred: <LABEL_5> (NUM) | label: <LABEL_5> (NUM)


{'eval_loss': 0.45483294129371643,
 'eval_accuracy': 0.864,
 'eval_recall': 0.8447075972401225,
 'eval_f1': 0.85030533078114,
 'eval_runtime': 2.723,
 'eval_samples_per_second': 183.619,
 'eval_steps_per_second': 11.752,
 'epoch': 10.0}