In [6]:
pip install vaderSentiment nlpaug optuna emoji

Collecting emoji
  Downloading emoji-2.15.0-py3-none-any.whl.metadata (5.7 kB)
Downloading emoji-2.15.0-py3-none-any.whl (608 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m608.4/608.4 kB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji
Successfully installed emoji-2.15.0


In [2]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m57.0 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [7]:
# ====== Full runnable Colab script (copy-paste into one cell) ======
# Installs (run once)
# !pip install -q transformers datasets torch scikit-learn pandas numpy tqdm optuna spacy vaderSentiment emoji nltk nlpaug
# !python -m spacy download en_core_web_sm

# -------------------------
# Imports & reproducibility
# -------------------------
import os, random, math, time
from collections import defaultdict
import numpy as np
import pandas as pd
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from transformers import AutoTokenizer, AutoModel, AutoConfig, get_linear_schedule_with_warmup
from datasets import load_dataset
from sklearn.metrics import f1_score, classification_report
from tqdm.auto import tqdm
import optuna
import warnings
warnings.filterwarnings('ignore')

# HEF libs
import spacy
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import emoji
# augmentation
import nltk
from nltk.corpus import wordnet
import nlpaug.augmenter.word as naw

# Ensure NLTK assets
try:
    _ = wordnet.synsets("dog")
except Exception:
    import nltk as _nltk
    _nltk.download('wordnet')
    _nltk.download('omw-1.4')

# Seed
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
if torch.cuda.is_available(): torch.cuda.manual_seed_all(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# -------------------------
# Config / hyperparams
# -------------------------
BACKBONE = "microsoft/deberta-v3-large"   # change to -large if you have A100
MAX_LEN = 128
BATCH_SIZE = 20        # lower if OOM
EPOCHS = 4
LR = 2e-5
WEIGHT_DECAY = 0.01
NUM_TRIALS_THRESHOLD = 30   # per-class Optuna trials (quick). Increase to 80+ if you want.
TOP_K_CHECKPOINTS = 3

AUGMENT_SYNONYM = True   # lightweight augmentation for rare classes
AUGMENT_PROB = 0.25      # probability to augment a rare-sample
RARE_THRESHOLD = 120     # label support < this is rare

EMOTION_NAMES = [
 'admiration','amusement','anger','annoyance','approval','caring','confusion','curiosity','desire',
 'disappointment','disapproval','disgust','embarrassment','excitement','fear','gratitude','grief',
 'joy','love','nervousness','optimism','pride','realization','relief','remorse','sadness','surprise','neutral'
]
NUM_LABELS = len(EMOTION_NAMES)

# -------------------------
# Load data + basic clean
# -------------------------
print("Loading GoEmotions...")
dataset = load_dataset("google-research-datasets/go_emotions", "simplified")
train_df = dataset['train'].to_pandas()
val_df = dataset['validation'].to_pandas()
test_df = dataset['test'].to_pandas()

# ensure label lists
train_df['labels'] = train_df['labels'].apply(lambda x: [int(i) for i in x])
val_df['labels']   = val_df['labels'].apply(lambda x: [int(i) for i in x])
test_df['labels']  = test_df['labels'].apply(lambda x: [int(i) for i in x])

def clean_text(df):
    df = df.copy()
    df['text'] = df['text'].str.strip()
    df['text'] = df['text'].str.replace(r'\s+', ' ', regex=True)
    return df[df['text'].str.len() > 0]

train_df = clean_text(train_df)
val_df   = clean_text(val_df)
test_df  = clean_text(test_df)

print(len(train_df), len(val_df), len(test_df), "examples")

# -------------------------
# HEF: spaCy + VADER
# -------------------------
print("Loading HEF tools...")
nlp = spacy.load("en_core_web_sm", disable=["ner"])
vader = SentimentIntensityAnalyzer()

def extract_hef_features(text):
    doc = nlp(text)
    pos_counts = {'ADJ':0,'ADV':0,'INTJ':0,'VERB':0,'NOUN':0}
    for tok in doc:
        if tok.pos_ in pos_counts:
            pos_counts[tok.pos_] += 1
    exclam = text.count('!')
    qmarks = text.count('?')
    allcaps = sum(1 for w in text.split() if w.isupper() and len(w) > 1)
    vs = vader.polarity_scores(text)
    emojis = sum(1 for c in text if c in emoji.EMOJI_DATA)
    feat = [
        pos_counts['ADJ'], pos_counts['ADV'], pos_counts['INTJ'],
        pos_counts['VERB'], pos_counts['NOUN'],
        exclam, qmarks, allcaps,
        vs['neg'], vs['neu'], vs['pos'], vs['compound'],
        emojis
    ]
    return np.array(feat, dtype=np.float32)  # HEF_DIM = 13

HEF_DIM = 13

# -------------------------
# Augmentation: synonym replacement (light)
# -------------------------
def synonym_augment(text, p=0.12):
    words = text.split()
    new_words = []
    for w in words:
        if random.random() < p:
            syns = wordnet.synsets(w)
            if syns:
                lemmas = [l.name().replace('_',' ') for s in syns for l in s.lemmas() if l.name().lower() != w.lower()]
                if lemmas:
                    new_words.append(random.choice(lemmas))
                    continue
        new_words.append(w)
    return " ".join(new_words)

# determine rare classes
label_counts = train_df.explode('labels')['labels'].value_counts()
rare_classes = set(label_counts[label_counts < RARE_THRESHOLD].index.tolist())
print("Rare classes (support < {}):".format(RARE_THRESHOLD), sorted(list(rare_classes)))

def maybe_augment(text, labels):
    if not labels: return text
    if any(l in rare_classes for l in labels) and AUGMENT_SYNONYM and random.random() < AUGMENT_PROB:
        return synonym_augment(text, p=0.12)
    return text

# -------------------------
# Tokenizer & Dataset
# -------------------------
print("Preparing tokenizer and datasets...")
tokenizer = AutoTokenizer.from_pretrained(BACKBONE)

class HybridDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len, hef=True, augment=False):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.hef = hef
        self.augment = augment

    def __len__(self): return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        labs = self.labels[idx]
        if self.augment:
            text = maybe_augment(text, labs)

        enc = self.tokenizer(text, padding='max_length', truncation=True, max_length=self.max_len, return_tensors='pt')
        input_ids = enc['input_ids'].squeeze(0)
        attention_mask = enc['attention_mask'].squeeze(0)

        label_vec = torch.zeros(NUM_LABELS, dtype=torch.float)
        for l in labs:
            label_vec[l] = 1.0

        hef_vec = torch.zeros(HEF_DIM, dtype=torch.float)
        if self.hef:
            hef_np = extract_hef_features(text)
            hef_vec = torch.from_numpy(hef_np)

        return {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "hef": hef_vec,
            "labels": label_vec
        }

train_dataset = HybridDataset(train_df['text'].tolist(), train_df['labels'].tolist(), tokenizer, MAX_LEN, hef=True, augment=True)
val_dataset   = HybridDataset(val_df['text'].tolist(), val_df['labels'].tolist(), tokenizer, MAX_LEN, hef=True, augment=False)
test_dataset  = HybridDataset(test_df['text'].tolist(), test_df['labels'].tolist(), tokenizer, MAX_LEN, hef=True, augment=False)

# -------------------------
# Weighted sampler
# -------------------------
print("Creating weighted sampler...")
label_counts_tensor = torch.zeros(NUM_LABELS)
for labs in train_df['labels']:
    for l in labs:
        label_counts_tensor[l] += 1

sample_weights = []
for labs in train_df['labels']:
    if len(labs) == 0:
        sample_weights.append(0.0)
    else:
        sample_weights.append(sum([1.0 / (label_counts_tensor[l].item() + 1e-6) for l in labs]))

sampler = WeightedRandomSampler(torch.tensor(sample_weights, dtype=torch.double), num_samples=len(sample_weights), replacement=True)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=sampler, num_workers=2)
val_loader   = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
test_loader  = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# -------------------------
# Model: AutoModel backbone + HEF fusion head
# -------------------------
print("Loading backbone:", BACKBONE)
backbone = AutoModel.from_pretrained(BACKBONE)
hidden_size = backbone.config.hidden_size

class HybridClassifier(nn.Module):
    def __init__(self, backbone, hef_dim, hidden_size, num_labels, proj=512, drop=0.2):
        super().__init__()
        self.backbone = backbone
        self.head = nn.Sequential(
            nn.Linear(hidden_size + hef_dim, proj),
            nn.ReLU(),
            nn.Dropout(drop),
            nn.Linear(proj, num_labels)
        )
    def forward(self, input_ids=None, attention_mask=None, hef=None):
        out = self.backbone(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
        pooled = out.last_hidden_state[:,0,:]   # CLS-like pooling
        if hef is None:
            hef = torch.zeros((pooled.size(0), HEF_DIM), device=pooled.device)
        x = torch.cat([pooled, hef], dim=-1)
        logits = self.head(x)
        return logits

model = HybridClassifier(backbone, HEF_DIM, hidden_size, NUM_LABELS).to(device)

# -------------------------
# Loss: Autocast-safe Clipped Asymmetric Loss
# -------------------------
import torch.nn.functional as F
class ClippedAsymmetricLoss(nn.Module):
    def __init__(self, gamma_pos=0.0, gamma_neg=1.0, clip=0.05, reduction='mean'):
        super().__init__()
        self.gamma_pos = gamma_pos
        self.gamma_neg = gamma_neg
        self.clip = clip
        self.reduction = reduction
    def forward(self, logits, targets):
        # BCE via logits (stable + autocast-safe)
        bce = F.binary_cross_entropy_with_logits(logits, targets, reduction='none')  # (B,L)
        probs = torch.sigmoid(logits)
        xs_pos = probs
        xs_neg = 1.0 - probs
        if self.clip and self.clip > 0:
            xs_neg = (xs_neg + self.clip).clamp(max=1)
        pt = torch.where(targets==1, xs_pos, xs_neg)
        gamma = torch.where(targets==1, torch.tensor(self.gamma_pos, device=logits.device), torch.tensor(self.gamma_neg, device=logits.device))
        focal_factor = (1.0 - pt) ** gamma
        loss = focal_factor * bce
        if self.reduction == 'mean': return loss.mean()
        if self.reduction == 'sum': return loss.sum()
        return loss

loss_fn = ClippedAsymmetricLoss(gamma_pos=0.0, gamma_neg=1.0, clip=0.05)

# -------------------------
# Optimizer / scheduler / AMP
# -------------------------
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
total_steps = len(train_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=int(0.05*total_steps), num_training_steps=total_steps)
scaler = torch.cuda.amp.GradScaler()

# -------------------------
# Train / Eval helpers
# -------------------------
def train_one_epoch(model, loader, optimizer, scheduler, loss_fn, device):
    model.train()
    total_loss = 0.0
    for batch in tqdm(loader, desc="Train"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        hef = batch['hef'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            logits = model(input_ids=input_ids, attention_mask=attention_mask, hef=hef)
            loss = loss_fn(logits, labels)
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def predict_probs(model, loader, device):
    model.eval()
    probs_list, labels_list = [], []
    with torch.no_grad():
        for batch in tqdm(loader, desc="Predict"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            hef = batch['hef'].to(device)
            labels = batch['labels'].cpu().numpy()
            logits = model(input_ids=input_ids, attention_mask=attention_mask, hef=hef)
            probs = torch.sigmoid(logits).cpu().numpy()
            probs_list.append(probs)
            labels_list.append(labels)
    return np.vstack(probs_list), np.vstack(labels_list)

# -------------------------
# Per-class threshold tuning (Optuna)
# -------------------------
def tune_thresholds_per_class(val_probs, val_labels, n_trials=NUM_TRIALS_THRESHOLD, seed=SEED):
    thresholds = np.zeros(NUM_LABELS)
    def objective_factory(col):
        def objective(trial):
            t = trial.suggest_float("t", 0.01, 0.9)
            preds = (val_probs[:, col] >= t).astype(int)
            return f1_score(val_labels[:, col], preds, zero_division=0)
        return objective
    for c in range(NUM_LABELS):
        study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(seed=seed))
        study.optimize(objective_factory(c), n_trials=n_trials, show_progress_bar=False)
        thresholds[c] = study.best_trial.params['t']
    return thresholds

# -------------------------
# Main training loop with checkpointing
# -------------------------
checkpoint_records = []  # (val_micro, path, thresholds)
best_val_micro = 0.0

for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    train_loss = train_one_epoch(model, train_loader, optimizer, scheduler, loss_fn, device)
    print("Train loss:", train_loss)

    val_probs, val_labels = predict_probs(model, val_loader, device)
    thresholds = tune_thresholds_per_class(val_probs, val_labels, n_trials=NUM_TRIALS_THRESHOLD)
    val_preds = (val_probs >= thresholds).astype(int)
    val_micro = f1_score(val_labels, val_preds, average='micro', zero_division=0)
    val_macro = f1_score(val_labels, val_preds, average='macro', zero_division=0)
    print(f"Val micro-F1: {val_micro:.4f} | Val macro-F1: {val_macro:.4f}")

    ckpt = f"ckpt_epoch{epoch+1:02d}_micro{val_micro:.4f}.pt"
    torch.save(model.state_dict(), ckpt)
    checkpoint_records.append((val_micro, ckpt, thresholds.copy()))
    checkpoint_records = sorted(checkpoint_records, key=lambda x: x[0], reverse=True)[:TOP_K_CHECKPOINTS]

    if val_micro > best_val_micro:
        best_val_micro = val_micro
        best_thresholds_best = thresholds.copy()

    print("Top checkpoints:", [(r[0], r[1]) for r in checkpoint_records])

print("\nTraining finished. Best val micro-F1:", best_val_micro)

# -------------------------
# Ensemble top checkpoints on test set
# -------------------------
print("Ensembling top checkpoints...")
probs_accum = None
labels_accum = None
th_stack = []
for score, ckpt_path, th in checkpoint_records:
    state = torch.load(ckpt_path, map_location=device)
    model.load_state_dict(state)
    model.to(device)
    model.eval()
    probs, labels = predict_probs(model, test_loader, device)
    if probs_accum is None:
        probs_accum = probs
        labels_accum = labels
    else:
        probs_accum += probs
    th_stack.append(th)

ensemble_probs = probs_accum / len(checkpoint_records)
if th_stack:
    ensemble_thresholds = np.mean(np.stack(th_stack, axis=0), axis=0)
else:
    ensemble_thresholds = best_thresholds_best

test_preds = (ensemble_probs >= ensemble_thresholds).astype(int)
micro = f1_score(labels_accum, test_preds, average='micro', zero_division=0)
macro = f1_score(labels_accum, test_preds, average='macro', zero_division=0)
weighted = f1_score(labels_accum, test_preds, average='weighted', zero_division=0)

print(f"Ensemble Test Micro-F1: {micro:.4f} | Macro-F1: {macro:.4f} | Weighted-F1: {weighted:.4f}")
print("\nClassification report (ensemble):")
print(classification_report(labels_accum, test_preds, target_names=EMOTION_NAMES, zero_division=0))

# -------------------------
# Inference helper
# -------------------------
def predict_text(text, model, tokenizer, device, thresholds, hef_enabled=True):
    model.eval()
    enc = tokenizer(text, padding='max_length', truncation=True, max_length=MAX_LEN, return_tensors='pt')
    input_ids = enc['input_ids'].to(device); attention_mask = enc['attention_mask'].to(device)
    hef_tensor = torch.from_numpy(extract_hef_features(text)).unsqueeze(0).to(device) if hef_enabled else torch.zeros((1,HEF_DIM), device=device)
    with torch.no_grad():
        logits = model(input_ids=input_ids, attention_mask=attention_mask, hef=hef_tensor)
        probs = torch.sigmoid(logits).cpu().numpy()[0]
    preds = [(EMOTION_NAMES[i], float(probs[i])) for i in range(NUM_LABELS) if probs[i] >= thresholds[i]]
    preds.sort(key=lambda x: x[1], reverse=True)
    return preds

print("\nExample predictions:")
for txt in ["I am thrilled to see you!", "I hate this so much.", "Thanks for your help."]:
    print(txt, "->", predict_text(txt, model, tokenizer, device, ensemble_thresholds))


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


Device: cuda
Loading GoEmotions...


README.md: 0.00B [00:00, ?B/s]

simplified/train-00000-of-00001.parquet:   0%|          | 0.00/2.77M [00:00<?, ?B/s]

simplified/validation-00000-of-00001.par(…):   0%|          | 0.00/350k [00:00<?, ?B/s]

simplified/test-00000-of-00001.parquet:   0%|          | 0.00/347k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/43410 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/5426 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5427 [00:00<?, ? examples/s]

43410 5426 5427 examples
Loading HEF tools...
Rare classes (support < 120): [16, 21]
Preparing tokenizer and datasets...


tokenizer_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/580 [00:00<?, ?B/s]

spm.model:   0%|          | 0.00/2.46M [00:00<?, ?B/s]

Creating weighted sampler...
Loading backbone: microsoft/deberta-v3-large


pytorch_model.bin:   0%|          | 0.00/874M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/874M [00:00<?, ?B/s]


Epoch 1/4


Train:   0%|          | 0/2171 [00:00<?, ?it/s]

Train loss: 0.08245000745526304


Predict:   0%|          | 0/272 [00:00<?, ?it/s]

[I 2025-12-07 07:12:01,389] A new study created in memory with name: no-name-cd1d697b-eb00-4c00-a9de-13d65bd52729
[I 2025-12-07 07:12:01,415] Trial 0 finished with value: 0.6846846846846847 and parameters: {'t': 0.34334070577415265}. Best is trial 0 with value: 0.6846846846846847.
[I 2025-12-07 07:12:01,422] Trial 1 finished with value: 0.618421052631579 and parameters: {'t': 0.8561357327048255}. Best is trial 0 with value: 0.6846846846846847.
[I 2025-12-07 07:12:01,427] Trial 2 finished with value: 0.7251336898395722 and parameters: {'t': 0.6614746082121505}. Best is trial 2 with value: 0.7251336898395722.
[I 2025-12-07 07:12:01,432] Trial 3 finished with value: 0.7291866028708134 and parameters: {'t': 0.5428060509353626}. Best is trial 3 with value: 0.7291866028708134.
[I 2025-12-07 07:12:01,437] Trial 4 finished with value: 0.4575036425449247 and parameters: {'t': 0.14885658999376852}. Best is trial 3 with value: 0.7291866028708134.
[I 2025-12-07 07:12:01,443] Trial 5 finished with 

Val micro-F1: 0.6003 | Val macro-F1: 0.5441
Top checkpoints: [(0.6003269599829412, 'ckpt_epoch01_micro0.6003.pt')]

Epoch 2/4


Train:   0%|          | 0/2171 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7d3158b560c0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1637, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7d3158b560c0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

Train loss: 0.04386860690581969


Predict:   0%|          | 0/272 [00:00<?, ?it/s]

[I 2025-12-07 07:40:18,435] A new study created in memory with name: no-name-b81ac0a4-708d-4106-a4fa-3b0e96593b69
[I 2025-12-07 07:40:18,445] Trial 0 finished with value: 0.7271010387157696 and parameters: {'t': 0.34334070577415265}. Best is trial 0 with value: 0.7271010387157696.
[I 2025-12-07 07:40:18,459] Trial 1 finished with value: 0.5451977401129944 and parameters: {'t': 0.8561357327048255}. Best is trial 0 with value: 0.7271010387157696.
[I 2025-12-07 07:40:18,466] Trial 2 finished with value: 0.6942148760330579 and parameters: {'t': 0.6614746082121505}. Best is trial 0 with value: 0.7271010387157696.
[I 2025-12-07 07:40:18,474] Trial 3 finished with value: 0.7319698600645855 and parameters: {'t': 0.5428060509353626}. Best is trial 3 with value: 0.7319698600645855.
[I 2025-12-07 07:40:18,480] Trial 4 finished with value: 0.6394658753709199 and parameters: {'t': 0.14885658999376852}. Best is trial 3 with value: 0.7319698600645855.
[I 2025-12-07 07:40:18,487] Trial 5 finished with

Val micro-F1: 0.6040 | Val macro-F1: 0.5533
Top checkpoints: [(0.6039949713647157, 'ckpt_epoch02_micro0.6040.pt'), (0.6003269599829412, 'ckpt_epoch01_micro0.6003.pt')]

Epoch 3/4


Train:   0%|          | 0/2171 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7d3158b560c0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1637, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^Exception ignored in: ^<function _MultiProcessingDataLoaderIter.__del__ at 0x7d3158b560c0>^
^^Traceback (most recent call last):
^^^^  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
^^^    ^^^self._shutdown_workers()^^^^^^
AssertionError: can only test a child process

  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 16

Train loss: 0.03159431255633163


Predict:   0%|          | 0/272 [00:00<?, ?it/s]

[I 2025-12-07 08:08:32,775] A new study created in memory with name: no-name-af50d789-4ee7-473d-ba54-37fac95f001d
[I 2025-12-07 08:08:32,783] Trial 0 finished with value: 0.7242647058823529 and parameters: {'t': 0.34334070577415265}. Best is trial 0 with value: 0.7242647058823529.
[I 2025-12-07 08:08:32,788] Trial 1 finished with value: 0.6599496221662469 and parameters: {'t': 0.8561357327048255}. Best is trial 0 with value: 0.7242647058823529.
[I 2025-12-07 08:08:32,794] Trial 2 finished with value: 0.7209554831704669 and parameters: {'t': 0.6614746082121505}. Best is trial 0 with value: 0.7242647058823529.
[I 2025-12-07 08:08:32,801] Trial 3 finished with value: 0.7418032786885246 and parameters: {'t': 0.5428060509353626}. Best is trial 3 with value: 0.7418032786885246.
[I 2025-12-07 08:08:32,806] Trial 4 finished with value: 0.6324786324786325 and parameters: {'t': 0.14885658999376852}. Best is trial 3 with value: 0.7418032786885246.
[I 2025-12-07 08:08:32,810] Trial 5 finished with

Val micro-F1: 0.6009 | Val macro-F1: 0.5534
Top checkpoints: [(0.6039949713647157, 'ckpt_epoch02_micro0.6040.pt'), (0.6008994632235601, 'ckpt_epoch03_micro0.6009.pt'), (0.6003269599829412, 'ckpt_epoch01_micro0.6003.pt')]

Epoch 4/4


Train:   0%|          | 0/2171 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7d3158b560c0>
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1637, in _shutdown_workers
    if w.is_alive():
       ^^^^^^^^^^^^
  File "/usr/lib/python3.12/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AssertionError: can only test a child process
Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7d3158b560c0>
<function _MultiProcessingDataLoaderIter.__del__ at 0x7d3158b560c0>Traceback (most recent call last):

  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
Traceback (most recent call

Train loss: 0.024361671566515336


Predict:   0%|          | 0/272 [00:00<?, ?it/s]

[I 2025-12-07 08:36:47,284] A new study created in memory with name: no-name-08602d3c-b0ae-4110-9a05-a7aacff90af7
[I 2025-12-07 08:36:47,293] Trial 0 finished with value: 0.7103448275862069 and parameters: {'t': 0.34334070577415265}. Best is trial 0 with value: 0.7103448275862069.
[I 2025-12-07 08:36:47,299] Trial 1 finished with value: 0.6923976608187135 and parameters: {'t': 0.8561357327048255}. Best is trial 0 with value: 0.7103448275862069.
[I 2025-12-07 08:36:47,304] Trial 2 finished with value: 0.728952772073922 and parameters: {'t': 0.6614746082121505}. Best is trial 2 with value: 0.728952772073922.
[I 2025-12-07 08:36:47,310] Trial 3 finished with value: 0.7260406582768635 and parameters: {'t': 0.5428060509353626}. Best is trial 2 with value: 0.728952772073922.
[I 2025-12-07 08:36:47,314] Trial 4 finished with value: 0.5942028985507246 and parameters: {'t': 0.14885658999376852}. Best is trial 2 with value: 0.728952772073922.
[I 2025-12-07 08:36:47,321] Trial 5 finished with val

Val micro-F1: 0.5975 | Val macro-F1: 0.5469
Top checkpoints: [(0.6039949713647157, 'ckpt_epoch02_micro0.6040.pt'), (0.6008994632235601, 'ckpt_epoch03_micro0.6009.pt'), (0.6003269599829412, 'ckpt_epoch01_micro0.6003.pt')]

Training finished. Best val micro-F1: 0.6039949713647157
Ensembling top checkpoints...


Predict:   0%|          | 0/272 [00:00<?, ?it/s]

Predict:   0%|          | 0/272 [00:00<?, ?it/s]

Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7d3158b560c0><function _MultiProcessingDataLoaderIter.__del__ at 0x7d3158b560c0>

Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1654, in __del__
        self._shutdown_workers()self._shutdown_workers()

  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1637, in _shutdown_workers
  File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1637, in _shutdown_workers
        if w.is_alive():if w.is_alive():

              ^^^^^^^^Exception ignored in: Exception ignored in: ^^^<function _MultiProcessingDataLoaderIter.__del__ at 0x7d3158b560c0>^^^
^<function _MultiProcessingDataLoaderIter.__del__ at 0x7d3158b560c0>^Traceback

Predict:   0%|          | 0/272 [00:00<?, ?it/s]

Ensemble Test Micro-F1: 0.6046 | Macro-F1: 0.5284 | Weighted-F1: 0.6026

Classification report (ensemble):
                precision    recall  f1-score   support

    admiration       0.71      0.71      0.71       504
     amusement       0.77      0.89      0.83       264
         anger       0.52      0.44      0.48       198
     annoyance       0.31      0.54      0.39       320
      approval       0.38      0.47      0.42       351
        caring       0.41      0.52      0.46       135
     confusion       0.46      0.42      0.44       153
     curiosity       0.45      0.76      0.56       284
        desire       0.58      0.52      0.55        83
disappointment       0.33      0.32      0.33       151
   disapproval       0.43      0.47      0.45       267
       disgust       0.56      0.46      0.51       123
 embarrassment       0.61      0.38      0.47        37
    excitement       0.36      0.52      0.42       103
          fear       0.59      0.74      0.66       