In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, cohen_kappa_score

In [None]:
class FeatureCORN(nn.Module):
    def __init__(self, input_dim, num_classes=8):
        super(FeatureCORN, self).__init__()
        self.num_tasks = num_classes - 1

        self.classifiers = nn.ModuleList([
            nn.Linear(input_dim, 1) for _ in range(self.num_tasks)
        ])

    def forward(self, x):

        logits = []
        for classifier in self.classifiers:
            logits.append(classifier(x))
        return torch.cat(logits, dim=1)

def corn_label_encoding(y, num_classes=8):

    pass

def predict_corn_rank(logits):

    # 1. Convert logits to conditional probabilities P(y > k | y > k-1)
    cond_probs = torch.sigmoid(logits)

    # 2. Compute unconditional cumulative probabilities via Chain Rule
    # We initialize cum_probs with 1s (P(y > -1) = 1)
    cum_probs = torch.ones(logits.size(0), logits.size(1) + 1).to(logits.device)

    # cum_probs[:, 0] is the probability of being > -1 (always 1.0)
    # cum_probs[:, 1] is P(y>0) = P(y>0|y>-1) * 1.0
    # cum_probs[:, 2] is P(y>1) = P(y>1|y>0) * P(y>0)

    for k in range(logits.size(1)):
        cum_probs[:, k+1] = cond_probs[:, k] * cum_probs[:, k]

    # 3. Sum cumulative probabilities (excluding the dummy > -1 column) to get rank
    # We sum columns 1 to End.
    # Expected Rank = Sum(P(y > 0) + P(y > 1) + ... + P(y > K-1))
    pred_rank = torch.sum(cum_probs[:, 1:], dim=1)
    return torch.round(pred_rank).detach().cpu().numpy().astype(int)

In [None]:
target_col = 'label' if 'label' in df.columns else 'labels'

X = df.drop(columns=[target_col]).values.astype(np.float32)
y = df[target_col].values.astype(np.int64)
num_classes = 8
input_dim = X.shape[1]

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

results = {'MAE': [], 'RMSE': [], 'QWK': []}
fold = 1

print(f"Starting Feature-CORN (PyTorch) on {input_dim} features...")
print("-" * 60)

Starting Feature-CORN (PyTorch) on 24 features...
------------------------------------------------------------


In [None]:
for train_index, test_index in skf.split(X, y):
    X_train_fold, X_test_fold = X[train_index], X[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]

    scaler = StandardScaler()
    X_train_fold = scaler.fit_transform(X_train_fold)
    X_test_fold = scaler.transform(X_test_fold)

    X_test_t = torch.tensor(X_test_fold)

    model = FeatureCORN(input_dim, num_classes)
    optimizer = optim.Adam(model.parameters(), lr=0.05, weight_decay=0)
    loss_fn = nn.BCEWithLogitsLoss()

    model.train()
    epochs = 1500

    X_train_t_all = torch.tensor(X_train_fold)
    y_train_t_all = torch.tensor(y_train_fold)

    for epoch in range(epochs):
        optimizer.zero_grad()
        total_loss = 0

        for k in range(num_classes - 1):
            # Condition: We only train on essays where true label > k-1
            # For k=0: y > -1 (All data)
            # For k=1: y > 0 (Exclude Novice Low)
            mask = y_train_t_all > (k - 1)

            if mask.sum() == 0: continue

            X_subset = X_train_t_all[mask]
            y_subset = y_train_t_all[mask]


            binary_targets = (y_subset > k).float().view(-1, 1)


            logits_k = model.classifiers[k](X_subset)

            loss_k = loss_fn(logits_k, binary_targets)
            total_loss += loss_k

        total_loss.backward()
        optimizer.step()


    model.eval()
    with torch.no_grad():
        test_logits = model(X_test_t)
        y_pred = predict_corn_rank(test_logits)


    mae = mean_absolute_error(y_test_fold, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test_fold, y_pred))
    qwk = cohen_kappa_score(y_test_fold, y_pred, weights='quadratic')

    results['MAE'].append(mae)
    results['RMSE'].append(rmse)
    results['QWK'].append(qwk)

    print(f"Fold {fold}: QWK = {qwk:.4f} | MAE = {mae:.4f} | RMSE = {rmse:.4f}")
    fold += 1

Fold 1: QWK = 0.7480 | MAE = 0.6711 | RMSE = 1.0066
Fold 2: QWK = 0.7534 | MAE = 0.7105 | RMSE = 1.0131
Fold 3: QWK = 0.7785 | MAE = 0.6316 | RMSE = 0.9272
Fold 4: QWK = 0.7773 | MAE = 0.6432 | RMSE = 0.9292
Fold 5: QWK = 0.7495 | MAE = 0.7048 | RMSE = 0.9800


In [None]:
print("-" * 60)
print("FINAL RESULTS (Feature-CORN):")
for metric, values in results.items():
    print(f"{metric}: {np.mean(values):.4f} ± {np.std(values):.4f}")

------------------------------------------------------------
FINAL RESULTS (Feature-CORN):
MAE: 0.6722 ± 0.0317
RMSE: 0.9712 ± 0.0368
QWK: 0.7614 ± 0.0136
