In [None]:
# ==========================
# SETUP: install deps + set CSV paths (DistilBERT / BERT / RoBERTa)
# Edit TRAIN_CSV / VAL_CSV / TEST_CSV to your local paths or Google Drive.
# ==========================

!pip -q install -U transformers datasets accelerate scikit-learn pandas

TRAIN_CSV = "/content/iemocap_emoberta_train.csv"
VAL_CSV   = "/content/iemocap_emoberta_val.csv"
TEST_CSV  = "/content/iemocap_emoberta_test.csv"


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.5/79.5 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.3/10.3 MB[0m [31m156.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m515.2/515.2 kB[0m [31m46.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m122.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.9/10.9 MB[0m [31m139.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.6/47.6 MB[0m [31m45.8 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 3.0.0 which is incompatible.
dask

In [None]:
# ==========================
# CONFIG: pick a base model + define training hyperparams
# MODEL_BASE can be: distilbert-base-uncased | bert-base-uncased | roberta-base (HF model IDs).
# AutoTokenizer / AutoModel handle the correct tokenizer/model class automatically.
# ==========================

import numpy as np, pandas as pd, torch, os
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, set_seed, DataCollatorWithPadding
from sklearn.metrics import accuracy_score, f1_score


# ====== CONFIG ======
MODEL_BASE = "Roberta-base"
TEXT_COL = "Utterance"
LABEL_COL = "Emotion"

IEMO6 = ["neutral","frustration","sadness","anger","excited","happiness"]
label2id = {l:i for i,l in enumerate(IEMO6)}
id2label = {i:l for l,i in label2id.items()}

SEEDS = [42,43,44,45,46]

LR = 3e-5
EPOCHS = 5
LABEL_SMOOTHING = 0.1


BATCH_TRAIN = 16
BATCH_EVAL  = 32
MAX_LEN = 256
OUT_ROOT = "robert_iemocap6"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
tok = AutoTokenizer.from_pretrained(MODEL_BASE, use_fast=True)

In [None]:
# ==========================
# DATA + METRICS: load CSVs, map labels, tokenize, and define accuracy/F1 metrics
# Tokenizer uses MAX_LEN truncation; labels are mapped to IEMO6 -> ids.
# ==========================

def load_df(path):
    df = pd.read_csv(path).dropna(subset=[TEXT_COL, LABEL_COL]).copy()
    df[TEXT_COL]  = df[TEXT_COL].astype(str)
    df[LABEL_COL] = df[LABEL_COL].astype(str).str.strip().str.lower()
    df = df[df[LABEL_COL].isin(IEMO6)].copy()
    return df

def to_ds(df):
    ds = Dataset.from_pandas(df[[TEXT_COL, LABEL_COL]], preserve_index=False)
    def enc(batch):
        out = tok(batch[TEXT_COL], truncation=True, padding=False, max_length=MAX_LEN)
        out["labels"] = [label2id[x] for x in batch[LABEL_COL]]
        return out
    return ds.map(enc, batched=True, remove_columns=[TEXT_COL, LABEL_COL])

train_ds = to_ds(load_df(TRAIN_CSV))
val_ds   = to_ds(load_df(VAL_CSV))
test_ds  = to_ds(load_df(TEST_CSV))

print("Counts:", len(train_ds), len(val_ds), len(test_ds))  # 4778 / 980 / 1622 expected

def compute_metrics(eval_pred):
    logits, y_true = eval_pred
    y_pred = np.argmax(logits, axis=1)
    return {
        "acc": accuracy_score(y_true, y_pred),
        "weighted_f1": f1_score(y_true, y_pred, average="weighted"),
        "macro_f1": f1_score(y_true, y_pred, average="macro"),
    }

Map:   0%|          | 0/4778 [00:00<?, ? examples/s]

Map:   0%|          | 0/980 [00:00<?, ? examples/s]

Map:   0%|          | 0/1622 [00:00<?, ? examples/s]

Counts: 4778 980 1622


In [None]:
# ==========================
# TRAIN/EVAL: run Trainer across multiple seeds and report mean/std
# Best checkpoint per seed is kept via load_best_model_at_end + metric_for_best_model.
# ==========================

rows = []
best_ckpts = {}  # seed -> checkpoint path

data_collator = DataCollatorWithPadding(tokenizer=tok, padding="longest", max_length=MAX_LEN)

for seed in SEEDS:
    print("\n" + "="*20, "SEED", seed, "="*20)
    set_seed(seed)

    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_BASE,
        num_labels=len(IEMO6),
        label2id=label2id,
        id2label=id2label
    ).to(DEVICE)

    args = TrainingArguments(
        output_dir=f"{OUT_ROOT}_seed{seed}",
        eval_strategy="epoch",
        save_strategy="epoch",
        save_total_limit=2,
        load_best_model_at_end=True,
        metric_for_best_model="weighted_f1",
        greater_is_better=True,

        learning_rate=LR,
        num_train_epochs=EPOCHS,
        per_device_train_batch_size=BATCH_TRAIN,
        per_device_eval_batch_size=BATCH_EVAL,
        weight_decay=0.01,
        warmup_ratio=0.06,

        label_smoothing_factor=LABEL_SMOOTHING,  # <-- change
        fp16=torch.cuda.is_available(),
        report_to="none",
        seed=seed,
        logging_steps=50,
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )

    trainer.train()

    best_ckpts[seed] = trainer.state.best_model_checkpoint
    res = trainer.evaluate(test_ds)

    rows.append({
        "seed": seed,
        "test_acc": float(res["eval_acc"]),
        "test_weighted_f1": float(res["eval_weighted_f1"]),
        "test_macro_f1": float(res["eval_macro_f1"]),
        "best_ckpt": best_ckpts[seed],
    })

df = pd.DataFrame(rows)
print("\nPer-seed results:")
display(df)

print("\nMEAN:")
display(df.drop(columns=["seed","best_ckpt"]).mean().to_frame("mean"))

print("\nSTD:")
display(df.drop(columns=["seed","best_ckpt"]).std().to_frame("std"))




Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]

[1mRobertaForSequenceClassification LOAD REPORT[0m from: Roberta-base
Key                             | Status     | 
--------------------------------+------------+-
lm_head.dense.bias              | UNEXPECTED | 
lm_head.layer_norm.weight       | UNEXPECTED | 
lm_head.bias                    | UNEXPECTED | 
lm_head.layer_norm.bias         | UNEXPECTED | 
lm_head.dense.weight            | UNEXPECTED | 
roberta.embeddings.position_ids | UNEXPECTED | 
classifier.dense.weight         | MISSING    | 
classifier.out_proj.bias        | MISSING    | 
classifier.dense.bias           | MISSING    | 
classifier.out_proj.weight      | MISSING    | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING[3m	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.[0m
warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.


Epoch,Training Loss,Validation Loss,Acc,Weighted F1,Macro F1
1,1.489834,1.515479,0.384694,0.352337,0.364539
2,1.219353,1.346105,0.528571,0.535913,0.50984
3,1.058678,1.352366,0.55102,0.557712,0.53547
4,0.883819,1.440182,0.563265,0.569316,0.545562
5,0.803284,1.513636,0.557143,0.564525,0.539353


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['roberta.embeddings.LayerNorm.weight', 'roberta.embeddings.LayerNorm.bias', 'roberta.encoder.layer.0.attention.output.LayerNorm.weight', 'roberta.encoder.layer.0.attention.output.LayerNorm.bias', 'roberta.encoder.layer.0.output.LayerNorm.weight', 'roberta.encoder.layer.0.output.LayerNorm.bias', 'roberta.encoder.layer.1.attention.output.LayerNorm.weight', 'roberta.encoder.layer.1.attention.output.LayerNorm.bias', 'roberta.encoder.layer.1.output.LayerNorm.weight', 'roberta.encoder.layer.1.output.LayerNorm.bias', 'roberta.encoder.layer.2.attention.output.LayerNorm.weight', 'roberta.encoder.layer.2.attention.output.LayerNorm.bias', 'roberta.encoder.layer.2.output.LayerNorm.weight', 'roberta.encoder.layer.2.output.LayerNorm.bias', 'roberta.encoder.layer.3.attention.output.LayerNorm.weight', 'roberta.encoder.layer.3.attention.output.LayerNorm.bias', 'roberta.encoder.layer.3.output.LayerNorm.weight', 'roberta.encoder.layer.3.output.Laye




Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]

[1mRobertaForSequenceClassification LOAD REPORT[0m from: Roberta-base
Key                             | Status     | 
--------------------------------+------------+-
lm_head.dense.bias              | UNEXPECTED | 
lm_head.layer_norm.weight       | UNEXPECTED | 
lm_head.bias                    | UNEXPECTED | 
lm_head.layer_norm.bias         | UNEXPECTED | 
lm_head.dense.weight            | UNEXPECTED | 
roberta.embeddings.position_ids | UNEXPECTED | 
classifier.dense.weight         | MISSING    | 
classifier.out_proj.bias        | MISSING    | 
classifier.dense.bias           | MISSING    | 
classifier.out_proj.weight      | MISSING    | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING[3m	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.[0m
warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.


Epoch,Training Loss,Validation Loss,Acc,Weighted F1,Macro F1
1,1.49933,1.454985,0.445918,0.441813,0.393079
2,1.293322,1.285602,0.554082,0.557873,0.527206
3,1.060732,1.402167,0.533673,0.538013,0.508404
4,0.923185,1.501056,0.530612,0.539878,0.527254
5,0.781296,1.506917,0.535714,0.540455,0.52648


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['roberta.embeddings.LayerNorm.weight', 'roberta.embeddings.LayerNorm.bias', 'roberta.encoder.layer.0.attention.output.LayerNorm.weight', 'roberta.encoder.layer.0.attention.output.LayerNorm.bias', 'roberta.encoder.layer.0.output.LayerNorm.weight', 'roberta.encoder.layer.0.output.LayerNorm.bias', 'roberta.encoder.layer.1.attention.output.LayerNorm.weight', 'roberta.encoder.layer.1.attention.output.LayerNorm.bias', 'roberta.encoder.layer.1.output.LayerNorm.weight', 'roberta.encoder.layer.1.output.LayerNorm.bias', 'roberta.encoder.layer.2.attention.output.LayerNorm.weight', 'roberta.encoder.layer.2.attention.output.LayerNorm.bias', 'roberta.encoder.layer.2.output.LayerNorm.weight', 'roberta.encoder.layer.2.output.LayerNorm.bias', 'roberta.encoder.layer.3.attention.output.LayerNorm.weight', 'roberta.encoder.layer.3.attention.output.LayerNorm.bias', 'roberta.encoder.layer.3.output.LayerNorm.weight', 'roberta.encoder.layer.3.output.Laye




Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]

[1mRobertaForSequenceClassification LOAD REPORT[0m from: Roberta-base
Key                             | Status     | 
--------------------------------+------------+-
lm_head.dense.bias              | UNEXPECTED | 
lm_head.layer_norm.weight       | UNEXPECTED | 
lm_head.bias                    | UNEXPECTED | 
lm_head.layer_norm.bias         | UNEXPECTED | 
lm_head.dense.weight            | UNEXPECTED | 
roberta.embeddings.position_ids | UNEXPECTED | 
classifier.dense.weight         | MISSING    | 
classifier.out_proj.bias        | MISSING    | 
classifier.dense.bias           | MISSING    | 
classifier.out_proj.weight      | MISSING    | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING[3m	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.[0m
warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.


Epoch,Training Loss,Validation Loss,Acc,Weighted F1,Macro F1
1,1.513098,1.368868,0.516327,0.515989,0.464091
2,1.25198,1.331355,0.530612,0.535258,0.513169
3,1.058391,1.365813,0.546939,0.556769,0.530853
4,0.893429,1.398201,0.562245,0.571082,0.550853
5,0.771923,1.47254,0.563265,0.570037,0.550717


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['roberta.embeddings.LayerNorm.weight', 'roberta.embeddings.LayerNorm.bias', 'roberta.encoder.layer.0.attention.output.LayerNorm.weight', 'roberta.encoder.layer.0.attention.output.LayerNorm.bias', 'roberta.encoder.layer.0.output.LayerNorm.weight', 'roberta.encoder.layer.0.output.LayerNorm.bias', 'roberta.encoder.layer.1.attention.output.LayerNorm.weight', 'roberta.encoder.layer.1.attention.output.LayerNorm.bias', 'roberta.encoder.layer.1.output.LayerNorm.weight', 'roberta.encoder.layer.1.output.LayerNorm.bias', 'roberta.encoder.layer.2.attention.output.LayerNorm.weight', 'roberta.encoder.layer.2.attention.output.LayerNorm.bias', 'roberta.encoder.layer.2.output.LayerNorm.weight', 'roberta.encoder.layer.2.output.LayerNorm.bias', 'roberta.encoder.layer.3.attention.output.LayerNorm.weight', 'roberta.encoder.layer.3.attention.output.LayerNorm.bias', 'roberta.encoder.layer.3.output.LayerNorm.weight', 'roberta.encoder.layer.3.output.Laye




Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]

[1mRobertaForSequenceClassification LOAD REPORT[0m from: Roberta-base
Key                             | Status     | 
--------------------------------+------------+-
lm_head.dense.bias              | UNEXPECTED | 
lm_head.layer_norm.weight       | UNEXPECTED | 
lm_head.bias                    | UNEXPECTED | 
lm_head.layer_norm.bias         | UNEXPECTED | 
lm_head.dense.weight            | UNEXPECTED | 
roberta.embeddings.position_ids | UNEXPECTED | 
classifier.dense.weight         | MISSING    | 
classifier.out_proj.bias        | MISSING    | 
classifier.dense.bias           | MISSING    | 
classifier.out_proj.weight      | MISSING    | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING[3m	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.[0m
warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.


Epoch,Training Loss,Validation Loss,Acc,Weighted F1,Macro F1
1,1.545464,1.448857,0.467347,0.461233,0.410358
2,1.254969,1.357365,0.519388,0.529955,0.503181
3,1.046051,1.403641,0.537755,0.550311,0.525095
4,0.874063,1.408177,0.563265,0.570904,0.546023
5,0.827734,1.492243,0.554082,0.561186,0.539383


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['roberta.embeddings.LayerNorm.weight', 'roberta.embeddings.LayerNorm.bias', 'roberta.encoder.layer.0.attention.output.LayerNorm.weight', 'roberta.encoder.layer.0.attention.output.LayerNorm.bias', 'roberta.encoder.layer.0.output.LayerNorm.weight', 'roberta.encoder.layer.0.output.LayerNorm.bias', 'roberta.encoder.layer.1.attention.output.LayerNorm.weight', 'roberta.encoder.layer.1.attention.output.LayerNorm.bias', 'roberta.encoder.layer.1.output.LayerNorm.weight', 'roberta.encoder.layer.1.output.LayerNorm.bias', 'roberta.encoder.layer.2.attention.output.LayerNorm.weight', 'roberta.encoder.layer.2.attention.output.LayerNorm.bias', 'roberta.encoder.layer.2.output.LayerNorm.weight', 'roberta.encoder.layer.2.output.LayerNorm.bias', 'roberta.encoder.layer.3.attention.output.LayerNorm.weight', 'roberta.encoder.layer.3.attention.output.LayerNorm.bias', 'roberta.encoder.layer.3.output.LayerNorm.weight', 'roberta.encoder.layer.3.output.Laye




Loading weights:   0%|          | 0/197 [00:00<?, ?it/s]

[1mRobertaForSequenceClassification LOAD REPORT[0m from: Roberta-base
Key                             | Status     | 
--------------------------------+------------+-
lm_head.dense.bias              | UNEXPECTED | 
lm_head.layer_norm.weight       | UNEXPECTED | 
lm_head.bias                    | UNEXPECTED | 
lm_head.layer_norm.bias         | UNEXPECTED | 
lm_head.dense.weight            | UNEXPECTED | 
roberta.embeddings.position_ids | UNEXPECTED | 
classifier.dense.weight         | MISSING    | 
classifier.out_proj.bias        | MISSING    | 
classifier.dense.bias           | MISSING    | 
classifier.out_proj.weight      | MISSING    | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.
- MISSING[3m	:those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.[0m
warmup_ratio is deprecated and will be removed in v5.2. Use `warmup_steps` instead.


Epoch,Training Loss,Validation Loss,Acc,Weighted F1,Macro F1
1,1.467923,1.435429,0.471429,0.469604,0.421433
2,1.249665,1.391684,0.518367,0.526512,0.515579
3,1.059407,1.374205,0.536735,0.543082,0.527787
4,0.92345,1.489085,0.52449,0.531897,0.521761
5,0.817101,1.537108,0.543878,0.550699,0.540288


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['roberta.embeddings.LayerNorm.weight', 'roberta.embeddings.LayerNorm.bias', 'roberta.encoder.layer.0.attention.output.LayerNorm.weight', 'roberta.encoder.layer.0.attention.output.LayerNorm.bias', 'roberta.encoder.layer.0.output.LayerNorm.weight', 'roberta.encoder.layer.0.output.LayerNorm.bias', 'roberta.encoder.layer.1.attention.output.LayerNorm.weight', 'roberta.encoder.layer.1.attention.output.LayerNorm.bias', 'roberta.encoder.layer.1.output.LayerNorm.weight', 'roberta.encoder.layer.1.output.LayerNorm.bias', 'roberta.encoder.layer.2.attention.output.LayerNorm.weight', 'roberta.encoder.layer.2.attention.output.LayerNorm.bias', 'roberta.encoder.layer.2.output.LayerNorm.weight', 'roberta.encoder.layer.2.output.LayerNorm.bias', 'roberta.encoder.layer.3.attention.output.LayerNorm.weight', 'roberta.encoder.layer.3.attention.output.LayerNorm.bias', 'roberta.encoder.layer.3.output.LayerNorm.weight', 'roberta.encoder.layer.3.output.Laye


Per-seed results:


Unnamed: 0,seed,test_acc,test_weighted_f1,test_macro_f1,best_ckpt
0,42,0.550555,0.547415,0.528877,robert_iemocap6_seed42/checkpoint-1196
1,43,0.53021,0.52502,0.504606,robert_iemocap6_seed43/checkpoint-598
2,44,0.569667,0.567273,0.550666,robert_iemocap6_seed44/checkpoint-1196
3,45,0.546856,0.544101,0.532201,robert_iemocap6_seed45/checkpoint-1196
4,46,0.566584,0.564625,0.551645,robert_iemocap6_seed46/checkpoint-1495



MEAN:


Unnamed: 0,mean
test_acc,0.552774
test_weighted_f1,0.549687
test_macro_f1,0.533599



STD:


Unnamed: 0,std
test_acc,0.016009
test_weighted_f1,0.017155
test_macro_f1,0.019247


In [None]:
# ==========================
# SAVE: copy each seed’s best checkpoint into a clean *_BEST folder + save tokenizer
# Useful for exporting the final model directory for inference/deployment.
# ==========================

import os, shutil

for seed, ckpt_path in best_ckpts.items():
    best_dir = f"{OUT_ROOT}_seed{seed}_BEST"
    if os.path.exists(best_dir):
        shutil.rmtree(best_dir)
    shutil.copytree(ckpt_path, best_dir)
    tok.save_pretrained(best_dir)
    print(f"Saved BEST for seed {seed}: {best_dir}")


Saved BEST for seed 42: robert_iemocap6_seed42_BEST
Saved BEST for seed 43: robert_iemocap6_seed43_BEST
Saved BEST for seed 44: robert_iemocap6_seed44_BEST
Saved BEST for seed 45: robert_iemocap6_seed45_BEST
Saved BEST for seed 46: robert_iemocap6_seed46_BEST


In [None]:
# ==========================
# COLAB: mount Google Drive (optional)
# Needed only if you want to copy checkpoints to Drive.
# ==========================

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# ==========================
# EXPORT: rsync the BEST checkpoint folder to Google Drive
# Update SRC/DST to match the model folder name you want to export.
# ==========================

SRC = "/content/robert_iemocap6_seed44_BEST"
DST = "/content/drive/MyDrive/fine tuned roberta iemocap/"

!rsync -ah --progress "$SRC" "$DST"
!ls -lh "/content/drive/MyDrive/fine tuned roberta iemocap/"

sending incremental file list
created directory /content/drive/MyDrive/fine tuned roberta iemocap
robert_iemocap6_seed44_BEST/
robert_iemocap6_seed44_BEST/config.json
            966 100%    0.00kB/s    0:00:00 (xfr#1, to-chk=9/11)
robert_iemocap6_seed44_BEST/model.safetensors
        498.63M 100%  433.87MB/s    0:00:01 (xfr#2, to-chk=8/11)
robert_iemocap6_seed44_BEST/optimizer.pt
        997.37M 100%  394.68MB/s    0:00:02 (xfr#3, to-chk=7/11)
robert_iemocap6_seed44_BEST/rng_state.pth
         14.64K 100%   34.80kB/s    0:00:00 (xfr#4, to-chk=6/11)
robert_iemocap6_seed44_BEST/scaler.pt
          1.38K 100%    3.29kB/s    0:00:00 (xfr#5, to-chk=5/11)
robert_iemocap6_seed44_BEST/scheduler.pt
          1.47K 100%    3.47kB/s    0:00:00 (xfr#6, to-chk=4/11)
robert_iemocap6_seed44_BEST/tokenizer.json
          3.56M 100%  976.19kB/s    0:00:03 (xfr#7, to-chk=3/11)
robert_iemocap6_seed44_BEST/tokenizer_config.json
            359 100%   31.87kB/s    0:00:00 (xfr#8, to-chk=2/11)
robert_iemoc