# Mean Prediction Errors
Errors resulting from always predicting the mean of the entire dataset.

In [1]:
import os
import pandas as pd


### Configuration

In [2]:
clavicle_annots = {
    "train": "/data_fae_uq/clavicle_ct/annotations_train.csv",
    "val": "/data_fae_uq/clavicle_ct/annotations_val.csv",
    "test": "/data_fae_uq/clavicle_ct/annotations_test.csv",
}
save_dir_clavicle = "/ml_eval"
rsna_annots = {
    "train": "/data_fae_uq/rsna_boneage/train_annotations.csv",
    "val": "/data_fae_uq/rsna_boneage/val_annotations.csv",
    "test": "/data_fae_uq/rsna_boneage/test_annotations.csv",
}
save_dir_rsna = "/ml_eval"


In [3]:
clavicle_dfs = {dt: pd.read_csv(file) for dt, file in clavicle_annots.items()}
rsna_dfs = {dt: pd.read_csv(file) for dt, file in rsna_annots.items()}


### Mean Calculation

To test a "realistic" scenario, the Mean-Predictor uses Mean from the Training Data
(which is used by a normal trained model too) to predict every sample with it.

In [4]:
rsna_mean = rsna_dfs["train"]["boneage"].mean()  #
clavicle_mean = clavicle_dfs["train"]["age"].mean()

print("RSNA Boneage Mean:", rsna_mean)
print("Clavicle CT Mean:", clavicle_mean)


RSNA Boneage Mean: 127.46392993979201
Clavicle CT Mean: 8592.623863636363


### Generate Mean-Prediction-Errors

In [5]:
for df in clavicle_dfs.values():
    df["target"] = df["age"]
    df["prediction"] = clavicle_mean
    df["error"] = abs(df["target"] - df["prediction"])

for df in rsna_dfs.values():
    df["target"] = df["boneage"]
    df["prediction"] = rsna_mean
    df["error"] = abs(df["target"] - df["prediction"])

for data_type, dfs in zip(["RSNA Bone Age", "Clavicle CT"], [rsna_dfs, clavicle_dfs]):
    for set_type in ["train", "val", "test"]:
        print(data_type, set_type, dfs[set_type]["error"].mean())


RSNA Bone Age train 33.77528768287162
RSNA Bone Age val 33.13620350310805
RSNA Bone Age test 32.92509299105175
Clavicle CT train 1230.0242469008265
Clavicle CT val 1401.3006742424243
Clavicle CT test 1406.8448333333333


### Save DataFrames

In [6]:
for save_dir, data_type, dfs in zip(
    [save_dir_rsna, save_dir_clavicle],
    ["rsna_boneage", "clavicle_ct"],
    [rsna_dfs, clavicle_dfs],
):
    os.makedirs(save_dir, exist_ok=True)
    for set_type in ["train", "val", "test"]:
        filename = os.path.join(
            save_dir,
            "_".join(["mean_predictor_model_errors", data_type, set_type]) + ".csv",
        )
        dfs[set_type][["target", "prediction", "error"]].to_csv(filename)
