In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, cohen_kappa_score


In [None]:
df = pd.read_csv("training_features_baseline.csv")



In [None]:

target_col = 'label' if 'label' in df.columns else 'labels'
X = df.drop(columns=[target_col])
y = df[target_col].values.astype(int)


skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

results = {
    'MAE': [],
    'RMSE': [],
    'QWK': []
}

print(f"Starting 5-Fold CV with Scaling on {X.shape[1]} features...")



Starting 5-Fold CV with Scaling on 19 features...


In [None]:
fold = 1
for train_index, test_index in skf.split(X, y):

    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]


    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)


    reg = LinearRegression()
    reg.fit(X_train_scaled, y_train)


    y_pred_raw = reg.predict(X_test_scaled)


    y_pred_rounded = np.clip(np.round(y_pred_raw), 0, 7).astype(int)

    mae = mean_absolute_error(y_test, y_pred_raw)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred_raw))
    qwk = cohen_kappa_score(y_test, y_pred_rounded, weights='quadratic')

    results['MAE'].append(mae)
    results['RMSE'].append(rmse)
    results['QWK'].append(qwk)

    print(f"Fold {fold}: QWK = {qwk:.4f} | MAE = {mae:.4f} | RMSE = {rmse:.4f}")
    fold += 1

Fold 1: QWK = 0.7300 | MAE = 0.6988 | RMSE = 0.9579
Fold 2: QWK = 0.7529 | MAE = 0.7531 | RMSE = 0.9587
Fold 3: QWK = 0.7489 | MAE = 0.7098 | RMSE = 0.9289
Fold 4: QWK = 0.7427 | MAE = 0.7147 | RMSE = 0.9518
Fold 5: QWK = 0.7469 | MAE = 0.7627 | RMSE = 0.9638


In [None]:
for metric, values in results.items():
    mean_val = np.mean(values)
    std_val = np.std(values)
    print(f"{metric}: {mean_val:.4f} ± {std_val:.4f}")

MAE: 0.7278 ± 0.0253
RMSE: 0.9522 ± 0.0123
QWK: 0.7443 ± 0.0078
