In [None]:
!pip install coral_pytorch transformers datasets torch scikit-learn accelerate

Collecting coral_pytorch
  Downloading coral_pytorch-1.4.0-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading coral_pytorch-1.4.0-py2.py3-none-any.whl (7.3 kB)
Installing collected packages: coral_pytorch
Successfully installed coral_pytorch-1.4.0


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, cohen_kappa_score, mean_absolute_error, mean_squared_error
from transformers import (
    AutoTokenizer,
    AutoModel,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback,
    DataCollatorWithPadding
)
from datasets import Dataset

In [None]:
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

In [None]:
MODEL_NAME = "xlm-roberta-base"
MAX_LEN = 512
BATCH_SIZE = 8
EPOCHS = 6
NUM_CLASSES = 8
NUM_EXTRA_FEATURES = 24

In [None]:
df_text = pd.read_csv("training_data.csv")

df_features = pd.read_csv("training_features.csv")

assert len(df_text) == len(df_features), "Error: CSV file lengths do not match!"

In [None]:
df = pd.concat([df_text, df_features], axis=1)

In [None]:
feature_cols = [
    'sent_len_tokens_mean', 'sent_len_tokens_max',
    'token_len_chars_mean', 'token_len_chars_max',
    'mattr', 'lexical_density',
    'noun_ratio', 'verb_ratio', 'adj_ratio', 'pron_ratio', 'function_word_ratio',
    'clauses_per_sentence', 'avg_tree_depth',
    'ari', 'fres', 'smog', 'cli',
    'sp_rate', 'gr_rate',
    'ratio_a1', 'ratio_a2', 'ratio_b1', 'ratio_b2', 'ratio_c1'
]

In [None]:
print(f"Loaded {len(df)} essays.")
print(f"Using {len(feature_cols)} extra features.")

Loaded 1138 essays.
Using 24 extra features.


In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=MAX_LEN)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

In [None]:

class XLMR_Coral_Fusion(nn.Module):
    def __init__(self, model_checkpoint, num_classes=8, num_extra_features=24):
        super(XLMR_Coral_Fusion, self).__init__()
        self.backbone = AutoModel.from_pretrained(model_checkpoint)

        # Combined Dimension: 768 (XLM-R) + 24 (Features) = 792
        combined_dim = self.backbone.config.hidden_size + num_extra_features

        # CORAL Layer: Projects 792 -> 1 scalar score
        self.linear = nn.Linear(combined_dim, 1, bias=False)
        self.biases = nn.Parameter(torch.zeros(num_classes - 1))

    def forward(self, input_ids, attention_mask, extra_features, labels=None, **kwargs):
        # 1. Get Transformer Embedding [CLS]
        outputs = self.backbone(input_ids=input_ids, attention_mask=attention_mask) # [Batch, 768]
        cls_embedding = outputs.last_hidden_state[:, 0, :]  

        # 2. Concatenate (Early Fusion)
        extra_features = extra_features.float()
        combined = torch.cat((cls_embedding, extra_features), dim=1) # [Batch, 792]

        # 3. CORAL Projection
        score = self.linear(combined)
        logits = score + self.biases # [Batch, 7]

        loss = None
        if labels is not None:
            # Task encoding (Ordinal > k)
            levels = torch.arange(logits.size(1), device=logits.device).expand_as(logits)
            targets = (labels.unsqueeze(1) > levels).float()

            # binary_cross_entropy_with_logits
            # This is safe for fp16 autocast
            loss = nn.functional.binary_cross_entropy_with_logits(logits, targets, reduction='sum')

        return (loss, logits) if loss is not None else logits

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred

    # CORAL Inference: Sigmoid -> Sum -> Round
    probs = 1 / (1 + np.exp(-logits))
    predicted_rank = np.sum(probs, axis=1)
    predictions = np.round(predicted_rank).astype(int)

    # Clipping to valid range
    predictions = np.clip(predictions, 0, NUM_CLASSES - 1)

    # Calculating all 4 metrics
    acc = accuracy_score(labels, predictions)
    qwk = cohen_kappa_score(labels, predictions, weights='quadratic')
    mae = mean_absolute_error(labels, predictions)
    rmse = np.sqrt(mean_squared_error(labels, predictions))

    return {
        'accuracy': acc,
        'qwk': qwk,
        'mae': mae,
        'rmse': rmse
    }

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

fold_results = {'qwk': [], 'mae': [], 'rmse': [], 'accuracy': []}

In [None]:
for fold, (train_idx, val_idx) in enumerate(skf.split(df, df['labels'])):
    print(f"\n{'='*20} FOLD {fold+1}/5 {'='*20}")

    # A. Split
    train_df = df.iloc[train_idx].copy()
    val_df = df.iloc[val_idx].copy()

    # B. Normalize Features
    scaler = StandardScaler()
    train_df[feature_cols] = scaler.fit_transform(train_df[feature_cols])
    val_df[feature_cols] = scaler.transform(val_df[feature_cols])

    # C. Vectorize
    train_df['extra_features'] = train_df[feature_cols].values.tolist()
    val_df['extra_features'] = val_df[feature_cols].values.tolist()

    # D. Dataset Creation
    keep_cols = ['text', 'labels', 'extra_features']
    train_dataset = Dataset.from_pandas(train_df[keep_cols])
    val_dataset = Dataset.from_pandas(val_df[keep_cols])

    # E. Tokenize
    train_tokenized = train_dataset.map(tokenize_function, batched=True)
    val_tokenized = val_dataset.map(tokenize_function, batched=True)

    # F. Model Init
    model = XLMR_Coral_Fusion(MODEL_NAME, num_classes=8, num_extra_features=len(feature_cols))

    # G. Training Args
    training_args = TrainingArguments(
        output_dir=f'./results_fusion_fold_{fold+1}',
        num_train_epochs=EPOCHS,
        per_device_train_batch_size=BATCH_SIZE,
        learning_rate=1e-5,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        metric_for_best_model="qwk",
        greater_is_better=True,
        save_total_limit=1,
        fp16=torch.cuda.is_available(),
        remove_unused_columns=False
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_tokenized,
        eval_dataset=val_tokenized,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=3)]
    )

    # H. Train & Evaluate
    trainer.train()
    metrics = trainer.evaluate()

    # Print Full Report
    print(f"Fold {fold+1} Result:")
    print(f"  QWK: {metrics['eval_qwk']:.4f}")
    print(f"  MAE: {metrics['eval_mae']:.4f}")
    print(f"  RMSE: {metrics['eval_rmse']:.4f}")
    print(f"  Acc: {metrics['eval_accuracy']:.4f}")

    # Store
    fold_results['qwk'].append(metrics['eval_qwk'])
    fold_results['mae'].append(metrics['eval_mae'])
    fold_results['rmse'].append(metrics['eval_rmse'])
    fold_results['accuracy'].append(metrics['eval_accuracy'])

    # Cleanup
    del model, trainer, train_df, val_df
    torch.cuda.empty_cache()




print("\n" + "="*40)
print(f"FINAL FUSION RESULTS (5-Fold CV)")
print("="*40)
for k, v in fold_results.items():
    print(f"{k.upper()}: {np.mean(v):.4f} ± {np.std(v):.4f}")




Map:   0%|          | 0/910 [00:00<?, ? examples/s]

Map:   0%|          | 0/228 [00:00<?, ? examples/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

XLMRobertaModel LOAD REPORT from: xlm-roberta-base
Key                       | Status     |  | 
--------------------------+------------+--+-
lm_head.layer_norm.bias   | UNEXPECTED |  | 
lm_head.dense.weight      | UNEXPECTED |  | 
lm_head.dense.bias        | UNEXPECTED |  | 
lm_head.layer_norm.weight | UNEXPECTED |  | 
lm_head.bias              | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Epoch,Training Loss,Validation Loss,Accuracy,Qwk,Mae,Rmse
1,No log,31.376833,0.451754,0.681487,0.723684,1.057554
2,No log,30.857277,0.421053,0.758982,0.649123,0.903211
3,No log,30.235035,0.539474,0.845645,0.517544,0.816497
4,No log,30.493053,0.486842,0.810285,0.583333,0.876096
5,31.953557,30.07007,0.52193,0.83748,0.530702,0.819178
6,31.953557,30.139406,0.530702,0.846401,0.535088,0.837708


Fold 1 Result:
  QWK: 0.8464
  MAE: 0.5351
  RMSE: 0.8377
  Acc: 0.5307



Map:   0%|          | 0/910 [00:00<?, ? examples/s]

Map:   0%|          | 0/228 [00:00<?, ? examples/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

XLMRobertaModel LOAD REPORT from: xlm-roberta-base
Key                       | Status     |  | 
--------------------------+------------+--+-
lm_head.layer_norm.bias   | UNEXPECTED |  | 
lm_head.dense.weight      | UNEXPECTED |  | 
lm_head.dense.bias        | UNEXPECTED |  | 
lm_head.layer_norm.weight | UNEXPECTED |  | 
lm_head.bias              | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Epoch,Training Loss,Validation Loss,Accuracy,Qwk,Mae,Rmse
1,No log,31.768803,0.385965,0.599995,0.789474,1.104218
2,No log,30.918539,0.438596,0.79079,0.671053,0.948221
3,No log,31.051817,0.429825,0.785827,0.666667,0.945905
4,No log,31.279408,0.359649,0.770646,0.754386,0.995604
5,32.138826,31.752508,0.434211,0.806053,0.679825,0.957427
6,32.138826,31.393711,0.447368,0.812872,0.653509,0.929535


Fold 2 Result:
  QWK: 0.8129
  MAE: 0.6535
  RMSE: 0.9295
  Acc: 0.4474



Map:   0%|          | 0/910 [00:00<?, ? examples/s]

Map:   0%|          | 0/228 [00:00<?, ? examples/s]



Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

XLMRobertaModel LOAD REPORT from: xlm-roberta-base
Key                       | Status     |  | 
--------------------------+------------+--+-
lm_head.layer_norm.bias   | UNEXPECTED |  | 
lm_head.dense.weight      | UNEXPECTED |  | 
lm_head.dense.bias        | UNEXPECTED |  | 
lm_head.layer_norm.weight | UNEXPECTED |  | 
lm_head.bias              | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Epoch,Training Loss,Validation Loss,Accuracy,Qwk,Mae,Rmse
1,No log,31.690332,0.337719,0.696466,0.820175,1.086197
2,No log,32.25798,0.385965,0.775416,0.754386,1.017393
3,No log,30.226299,0.52193,0.852459,0.526316,0.789181
4,No log,30.047911,0.526316,0.861943,0.526316,0.800219
5,32.210211,29.972075,0.54386,0.866173,0.5,0.766629
6,32.210211,30.109564,0.539474,0.873206,0.504386,0.769484


Fold 3 Result:
  QWK: 0.8732
  MAE: 0.5044
  RMSE: 0.7695
  Acc: 0.5395



Map:   0%|          | 0/911 [00:00<?, ? examples/s]

Map:   0%|          | 0/227 [00:00<?, ? examples/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

XLMRobertaModel LOAD REPORT from: xlm-roberta-base
Key                       | Status     |  | 
--------------------------+------------+--+-
lm_head.layer_norm.bias   | UNEXPECTED |  | 
lm_head.dense.weight      | UNEXPECTED |  | 
lm_head.dense.bias        | UNEXPECTED |  | 
lm_head.layer_norm.weight | UNEXPECTED |  | 
lm_head.bias              | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Epoch,Training Loss,Validation Loss,Accuracy,Qwk,Mae,Rmse
1,No log,30.938248,0.427313,0.779578,0.647577,0.897868
2,No log,30.737682,0.484581,0.803231,0.590308,0.865389
3,No log,31.431492,0.39207,0.789042,0.709251,0.954932
4,No log,31.754122,0.444934,0.815157,0.669604,0.947987
5,32.205674,32.457897,0.352423,0.757963,0.814978,1.076378
6,32.205674,31.341009,0.45815,0.817246,0.647577,0.93158


Fold 4 Result:
  QWK: 0.8172
  MAE: 0.6476
  RMSE: 0.9316
  Acc: 0.4581



Map:   0%|          | 0/911 [00:00<?, ? examples/s]

Map:   0%|          | 0/227 [00:00<?, ? examples/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

XLMRobertaModel LOAD REPORT from: xlm-roberta-base
Key                       | Status     |  | 
--------------------------+------------+--+-
lm_head.layer_norm.bias   | UNEXPECTED |  | 
lm_head.dense.weight      | UNEXPECTED |  | 
lm_head.dense.bias        | UNEXPECTED |  | 
lm_head.layer_norm.weight | UNEXPECTED |  | 
lm_head.bias              | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


Epoch,Training Loss,Validation Loss,Accuracy,Qwk,Mae,Rmse
1,No log,33.986526,0.334802,0.319758,1.057269,1.445028
2,No log,32.513939,0.330396,0.693009,0.85022,1.120494
3,No log,32.502338,0.286344,0.674364,0.920705,1.162937
4,No log,30.915417,0.46696,0.825031,0.621145,0.892948
5,32.155893,31.050322,0.440529,0.818276,0.651982,0.914879
6,32.155893,31.14884,0.444934,0.818956,0.647577,0.912469


Fold 5 Result:
  QWK: 0.8250
  MAE: 0.6211
  RMSE: 0.8929
  Acc: 0.4670

FINAL FUSION RESULTS (5-Fold CV)
QWK: 0.8350 ± 0.0223
MAE: 0.5923 ± 0.0611
RMSE: 0.8723 ± 0.0616
ACCURACY: 0.4885 ± 0.0386
