In [None]:

import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, cohen_kappa_score

file_path = "training_data.csv"

try:
    df = pd.read_csv(file_path)
    print(f"Successfully loaded dataset with {len(df)} essays.")
except FileNotFoundError:
    print(f"Error: File '{file_path}' not found. Please upload it.")




Successfully loaded dataset with 1138 essays.


In [None]:

def evaluate_dummy_baselines(df, n_splits=5, random_state=42):

    if 'labels' not in df.columns:
        raise ValueError("Dataset must contain a 'labels' column.")

    X = np.zeros(len(df))
    y = df['labels'].values.astype(int)

    results_dict = {
        'Mode': {'mae': [], 'rmse': [], 'qwk': []},
        'Mean': {'mae': [], 'rmse': [], 'qwk': []},
        'Median': {'mae': [], 'rmse': [], 'qwk': []}
    }

    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=random_state)

    print(f"Starting {n_splits}-Fold Cross-Validation...\n")

    for fold, (train_idx, test_idx) in enumerate(skf.split(X, y)):
        y_train, y_test = y[train_idx], y[test_idx]

        mode_val = pd.Series(y_train).mode()[0]
        y_pred_mode = np.full(shape=y_test.shape, fill_value=mode_val)

        mean_val = np.mean(y_train)
        y_pred_mean = np.full(shape=y_test.shape, fill_value=int(round(mean_val)))

        median_val = np.median(y_train)
        y_pred_median = np.full(shape=y_test.shape, fill_value=int(round(median_val)))

        def calc_metrics(y_true, y_pred):
            return {
                'mae': mean_absolute_error(y_true, y_pred),
                'rmse': np.sqrt(mean_squared_error(y_true, y_pred)),
                'qwk': cohen_kappa_score(y_true, y_pred, weights='quadratic')
            }

        metrics_mode = calc_metrics(y_test, y_pred_mode)
        metrics_mean = calc_metrics(y_test, y_pred_mean)
        metrics_median = calc_metrics(y_test, y_pred_median)

        for metric in ['mae', 'rmse', 'qwk']:
            results_dict['Mode'][metric].append(metrics_mode[metric])
            results_dict['Mean'][metric].append(metrics_mean[metric])
            results_dict['Median'][metric].append(metrics_median[metric])

    final_results = []

    for model_name, metrics in results_dict.items():
        row = {'Model': model_name}
        for metric_name in ['mae', 'rmse', 'qwk']:
            mean_val = np.mean(metrics[metric_name])
            std_val = np.std(metrics[metric_name])
            row[metric_name.upper()] = f"{mean_val:.4f} ± {std_val:.4f}"
        final_results.append(row)

    return pd.DataFrame(final_results)

results_table = evaluate_dummy_baselines(df)



Starting 5-Fold Cross-Validation...



In [None]:
from IPython.display import display
display(results_table)

results_table.to_csv("dummy_baselines_results.csv", index=False)


--- Dummy Baseline Results (5-Fold CV) ---


Unnamed: 0,Model,MAE,RMSE,QWK
0,Mode,1.2917 ± 0.0049,1.7013 ± 0.0066,0.0000 ± 0.0000
1,Mean,1.1916 ± 0.0035,1.5346 ± 0.0048,0.0000 ± 0.0000
2,Median,1.1916 ± 0.0035,1.5346 ± 0.0048,0.0000 ± 0.0000


QWK is 0.0000 for all dummy baselines. This is mathematically correct (Kappa measures agreement above chance; a dummy predictor is pure chance). This is a perfect "sanity check" result for your thesis table.

Mean/Median give lower MAE/RMSE than Mode, which is expected for ordinal data.