In [None]:
!pip install coral_pytorch transformers datasets torch scikit-learn accelerate



In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, cohen_kappa_score, mean_absolute_error, mean_squared_error
from transformers import (
    AutoTokenizer,
    AutoModel,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback
)
from datasets import Dataset

from coral_pytorch.losses import CornLoss
from coral_pytorch.dataset import corn_label_from_logits


SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)


Libraries imported and seed set.


In [None]:
MODEL_NAME = "xlm-roberta-base"
N_FOLDS = 5
MAX_LEN = 512
BATCH_SIZE = 8
EPOCHS = 6          
NUM_CLASSES = 8

In [None]:
class XLMRCornOrdinal(nn.Module):
    def __init__(self, model_checkpoint, num_classes=8):
        super(XLMRCornOrdinal, self).__init__()
        self.backbone = AutoModel.from_pretrained(model_checkpoint)
        hidden_size = self.backbone.config.hidden_size

        # CORN Architecture: (K-1) binary classifiers
        self.corn_layer = nn.Linear(hidden_size, num_classes - 1)

    def forward(self, input_ids, attention_mask, labels=None, **kwargs):
        # 1. Get Transformer Embeddings
        outputs = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
        cls_embedding = outputs.last_hidden_state[:, 0, :]

        # 2. Pass through CORN Layer
        logits = self.corn_layer(cls_embedding)

        loss = None
        if labels is not None:
            loss_fct = CornLoss(num_classes=8)
            loss = loss_fct(logits, labels)

        return (loss, logits) if loss is not None else logits

In [None]:
df = pd.read_csv('training_data.csv')

In [None]:
if df['labels'].min() == 1:
    print("Shifting labels from 1-8 to 0-7...")
    df['labels'] = df['labels'] - 1

Shifting labels from 1-8 to 0-7...


In [None]:
df['labels'] = df['labels'].astype(int)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding="max_length", max_length=MAX_LEN)

In [None]:
skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=42)

best_score = -float('inf')
best_model_dir = "./best_corn_model"

fold_metrics = {'accuracy': [], 'qwk': [], 'mae': [], 'rmse': []}

print(f"Starting {N_FOLDS}-Fold CV (CORN Implementation)...")

Starting 5-Fold CV (CORN Implementation)...


In [None]:
for fold, (train_idx, val_idx) in enumerate(skf.split(df, df['labels'])):
    print(f"\n{'='*20} FOLD {fold+1}/{N_FOLDS} {'='*20}")

    # A. Split
    train_dataset = Dataset.from_pandas(df.iloc[train_idx])
    val_dataset = Dataset.from_pandas(df.iloc[val_idx])

    # B. Tokenize
    train_tokenized = train_dataset.map(tokenize_function, batched=True)
    val_tokenized = val_dataset.map(tokenize_function, batched=True)

    # C. Initializing Model (Fresh for each fold)
    model = XLMRCornOrdinal(MODEL_NAME, NUM_CLASSES)

    # D. Metrics (Special for CORN)
    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        # logits shape: [Batch, 7] (for 8 classes)

        # CORN Inference: Convert binary logits to integer rank
        # corn_label_from_logits applies the chain rule logic
        predictions = corn_label_from_logits(torch.tensor(logits)).numpy()

        # Calculate Metrics
        acc = accuracy_score(labels, predictions)
        qwk = cohen_kappa_score(labels, predictions, weights='quadratic')
        mae = mean_absolute_error(labels, predictions)
        rmse = np.sqrt(mean_squared_error(labels, predictions))

        return {
            'accuracy': acc,
            'qwk': qwk,
            'mae': mae,
            'rmse': rmse
        }

    # E. Training Arguments
    training_args = TrainingArguments(
        output_dir=f'./results_corn_fold_{fold+1}',
        num_train_epochs=EPOCHS,        
        per_device_train_batch_size=BATCH_SIZE,
        per_device_eval_batch_size=BATCH_SIZE,
        learning_rate=1e-5,               
        weight_decay=0.01,

        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="qwk",
        greater_is_better=True,
        save_total_limit=1,

        logging_steps=50,
        fp16=torch.cuda.is_available(),

        remove_unused_columns=False
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_tokenized,
        eval_dataset=val_tokenized,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
    )

    # F. Train
    trainer.train()
    metrics = trainer.evaluate()

    print(f"Fold {fold+1} Result: QWK={metrics['eval_qwk']:.4f} | MAE={metrics['eval_mae']:.4f}")

    fold_metrics['accuracy'].append(metrics['eval_accuracy'])
    fold_metrics['qwk'].append(metrics['eval_qwk'])
    fold_metrics['mae'].append(metrics['eval_mae'])
    fold_metrics['rmse'].append(metrics['eval_rmse'])

    if metrics['eval_qwk'] > best_score:
        print(f"New Best Score (QWK {metrics['eval_qwk']:.4f})! Saving...")
        best_score = metrics['eval_qwk']
        trainer.save_model(best_model_dir)
        tokenizer.save_pretrained(best_model_dir)

    del model, trainer
    torch.cuda.empty_cache()




print("\n" + "="*40)
print(f"FINAL CORN RESULTS ({N_FOLDS}-Fold CV)")
print("="*40)
for k, v in fold_metrics.items():
    print(f"{k.upper()}: {np.mean(v):.4f} ± {np.std(v):.4f}")




Map:   0%|          | 0/910 [00:00<?, ? examples/s]

Map:   0%|          | 0/228 [00:00<?, ? examples/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

XLMRobertaModel LOAD REPORT from: xlm-roberta-base
Key                       | Status     |  | 
--------------------------+------------+--+-
lm_head.dense.weight      | UNEXPECTED |  | 
lm_head.bias              | UNEXPECTED |  | 
lm_head.layer_norm.weight | UNEXPECTED |  | 
lm_head.layer_norm.bias   | UNEXPECTED |  | 
lm_head.dense.bias        | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Epoch,Training Loss,Validation Loss,Accuracy,Qwk,Mae,Rmse
1,0.370426,0.344964,0.359649,0.544222,0.894737,1.213954
2,0.317314,0.281268,0.429825,0.734195,0.692982,0.982299
3,0.276951,0.311626,0.412281,0.710771,0.741228,1.049227
4,0.258109,0.274858,0.473684,0.797858,0.627193,0.924804
5,0.227224,0.249711,0.552632,0.823092,0.52193,0.835086
6,0.214347,0.256524,0.530702,0.847103,0.54386,0.842927


Fold 1 Result: QWK=0.8471 | MAE=0.5439
New Best Score (QWK 0.8471)! Saving...



Map:   0%|          | 0/910 [00:00<?, ? examples/s]

Map:   0%|          | 0/228 [00:00<?, ? examples/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

XLMRobertaModel LOAD REPORT from: xlm-roberta-base
Key                       | Status     |  | 
--------------------------+------------+--+-
lm_head.dense.weight      | UNEXPECTED |  | 
lm_head.bias              | UNEXPECTED |  | 
lm_head.layer_norm.weight | UNEXPECTED |  | 
lm_head.layer_norm.bias   | UNEXPECTED |  | 
lm_head.dense.bias        | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Epoch,Training Loss,Validation Loss,Accuracy,Qwk,Mae,Rmse
1,0.361299,0.301395,0.407895,0.611311,0.767544,1.086197
2,0.301109,0.289003,0.429825,0.770802,0.675439,0.945905
3,0.272464,0.269716,0.460526,0.745916,0.666667,0.973329
4,0.237557,0.273719,0.495614,0.785583,0.631579,0.950531
5,0.220355,0.252008,0.473684,0.813243,0.614035,0.898342
6,0.214618,0.259777,0.504386,0.800462,0.609649,0.929535


Fold 2 Result: QWK=0.8132 | MAE=0.6140



Map:   0%|          | 0/910 [00:00<?, ? examples/s]

Map:   0%|          | 0/228 [00:00<?, ? examples/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

XLMRobertaModel LOAD REPORT from: xlm-roberta-base
Key                       | Status     |  | 
--------------------------+------------+--+-
lm_head.dense.weight      | UNEXPECTED |  | 
lm_head.bias              | UNEXPECTED |  | 
lm_head.layer_norm.weight | UNEXPECTED |  | 
lm_head.layer_norm.bias   | UNEXPECTED |  | 
lm_head.dense.bias        | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Epoch,Training Loss,Validation Loss,Accuracy,Qwk,Mae,Rmse
1,0.368172,0.323155,0.328947,0.529242,0.872807,1.160384
2,0.295415,0.32595,0.320175,0.569764,0.890351,1.160384
3,0.274136,0.299702,0.381579,0.70811,0.776316,1.049227
4,0.229239,0.255744,0.517544,0.801839,0.578947,0.878595
5,0.21209,0.260375,0.47807,0.781137,0.614035,0.893446
6,0.201704,0.247451,0.54386,0.831891,0.535088,0.832456


Fold 3 Result: QWK=0.8319 | MAE=0.5351



Map:   0%|          | 0/911 [00:00<?, ? examples/s]

Map:   0%|          | 0/227 [00:00<?, ? examples/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

XLMRobertaModel LOAD REPORT from: xlm-roberta-base
Key                       | Status     |  | 
--------------------------+------------+--+-
lm_head.dense.weight      | UNEXPECTED |  | 
lm_head.bias              | UNEXPECTED |  | 
lm_head.layer_norm.weight | UNEXPECTED |  | 
lm_head.layer_norm.bias   | UNEXPECTED |  | 
lm_head.dense.bias        | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Epoch,Training Loss,Validation Loss,Accuracy,Qwk,Mae,Rmse
1,0.378734,0.316695,0.414097,0.665631,0.731278,1.023942
2,0.296029,0.274015,0.493392,0.827632,0.568282,0.852568
3,0.274778,0.324217,0.365639,0.738123,0.762115,1.01313
4,0.236508,0.262322,0.471366,0.842507,0.590308,0.860284
5,0.221587,0.298488,0.414097,0.793613,0.696035,0.961826
6,0.206281,0.261399,0.462555,0.834121,0.599119,0.865389


Fold 4 Result: QWK=0.8425 | MAE=0.5903



Map:   0%|          | 0/911 [00:00<?, ? examples/s]

Map:   0%|          | 0/227 [00:00<?, ? examples/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

XLMRobertaModel LOAD REPORT from: xlm-roberta-base
Key                       | Status     |  | 
--------------------------+------------+--+-
lm_head.dense.weight      | UNEXPECTED |  | 
lm_head.bias              | UNEXPECTED |  | 
lm_head.layer_norm.weight | UNEXPECTED |  | 
lm_head.layer_norm.bias   | UNEXPECTED |  | 
lm_head.dense.bias        | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Epoch,Training Loss,Validation Loss,Accuracy,Qwk,Mae,Rmse
1,0.3831,0.323477,0.370044,0.430394,0.911894,1.250551
2,0.288954,0.335634,0.409692,0.598298,0.77533,1.094641
3,0.276554,0.288058,0.475771,0.79106,0.634361,0.929213
4,0.238326,0.277655,0.440529,0.798954,0.651982,0.919682
5,0.221347,0.283523,0.431718,0.802807,0.660793,0.92446
6,0.211941,0.269816,0.46696,0.822454,0.612335,0.878023


Fold 5 Result: QWK=0.8225 | MAE=0.6123

FINAL CORN RESULTS (5-Fold CV)
ACCURACY: 0.4973 ± 0.0330
QWK: 0.8314 ± 0.0125
MAE: 0.5791 ± 0.0336
RMSE: 0.8624 ± 0.0237
