In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, cohen_kappa_score


df = pd.read_csv("training_features_baseline.csv")


target_col = 'label' if 'label' in df.columns else 'labels'
X = df.drop(columns=[target_col])
y = df[target_col].values.astype(int)


skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

results = {
    'MAE': [],
    'RMSE': [],
    'QWK': []
}

print(f"Starting Nominal Baseline (Cross-Entropy) on {X.shape[1]} features...")
print("-" * 75)

fold = 1
for train_index, test_index in skf.split(X, y):

    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]


    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)


    clf = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=2000, random_state=42)
    clf.fit(X_train_scaled, y_train)

    y_pred_class = clf.predict(X_test_scaled)

    mae = mean_absolute_error(y_test, y_pred_class)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred_class))
    qwk = cohen_kappa_score(y_test, y_pred_class, weights='quadratic')

    results['MAE'].append(mae)
    results['RMSE'].append(rmse)
    results['QWK'].append(qwk)

    print(f"Fold {fold}: QWK = {qwk:.4f} | MAE = {mae:.4f} | RMSE = {rmse:.4f}")
    fold += 1



Starting Nominal Baseline (Cross-Entropy) on 19 features...
---------------------------------------------------------------------------
Fold 1: QWK = 0.6977 | MAE = 0.7412 | RMSE = 1.1297
Fold 2: QWK = 0.7277 | MAE = 0.7018 | RMSE = 1.0882
Fold 3: QWK = 0.7061 | MAE = 0.7412 | RMSE = 1.0862




Fold 4: QWK = 0.7093 | MAE = 0.7225 | RMSE = 1.1027
Fold 5: QWK = 0.7240 | MAE = 0.7445 | RMSE = 1.0967




In [None]:

print("-" * 75)
print("FINAL RESULTS (Nominal Baseline):")
for metric, values in results.items():
    mean_val = np.mean(values)
    std_val = np.std(values)
    print(f"{metric}: {mean_val:.4f} ± {std_val:.4f}")

---------------------------------------------------------------------------
FINAL RESULTS (Nominal Baseline):
MAE: 0.7302 ± 0.0162
RMSE: 1.1007 ± 0.0157
QWK: 0.7129 ± 0.0113
