# Test statistics for our regression model

In [None]:
import pandas as pd
import os
from sklearn.metrics import mean_absolute_error
from math import sqrt

In [None]:
IS_SICK = False
DF_HEALTHY_FILENAME = "df_healthy_unfiltered_tester_model_epoch_100_4a8e58cc-edf8-4c47-9090-0534fc13ccda.pkl"
DF_SICK_FILENAME = "df_sick_subjects_tester_model_epoch_100_17f05318-09c2-49bc-82ae-0e22d281451d.pkl"
DATE = "05-05-2023"
PROJECT_PATH = os.path.normpath(os.path.join(os.path.abspath(''), os.pardir))

In [None]:
# Read the datasets
if IS_SICK:
    df_path = os.path.join(PROJECT_PATH, "models", DATE, "loss", DF_SICK_FILENAME)
else:
    df_path = os.path.join(PROJECT_PATH, "models", DATE, "loss", DF_HEALTHY_FILENAME)
df = pd.read_pickle(df_path)
df

### Compute over and underestimation percentage

In [None]:
df_estimates = df.copy()
df_estimates["Estimation"] = df_estimates.apply(lambda row: "over" if row["Predicted Age"] > row["Age"] else "under", axis=1)
df_estimates.head()


In [None]:
estimation_counts = df_estimates.groupby("Estimation").size()
estimation_percentages = estimation_counts.apply(lambda x: 100 * x / float(estimation_counts.sum()))
print(f"{estimation_percentages[0]:.5}% Over estimation\n{estimation_percentages[1]:.5}% Under estimation")

### Computing the correlation coefficient between Chronological Age and Predicted Age

In [None]:
df_pcc = df.copy()
corr_coefficient = df_pcc['Age'].corr(df_pcc['Predicted Age'], method='pearson')
print(f"Pearsons Correlation Coefficient for Age and Predicted Age: {corr_coefficient}")

### Computing Mean Absolute Error (MAE), Standard Deviation (SD) and Standard Error of the Mean (SEM)

In [None]:
df_mae = df.copy()
mae = mean_absolute_error(df_mae['Age'], df_mae['Predicted Age'])
mae
print(f"MAE of Predicted Age: {mae}")

In [None]:
df_sd = df.copy()
std_predicted_age = df_sd['Predicted Age'].std()
print(f"STD of Prediced Age: {std_predicted_age}")

In [None]:
df_sem = df.copy()
sem = df_sem['Predicted Age'].std() / sqrt(len(df_sem['Predicted Age']))
print(f"SEM of Prediced Age: {sem}")

### Computing the average Estimated Age Difference (EAD)

In [None]:
df_ead = df.copy()
df_ead['Difference'] = df_ead['Age'] - df_ead['Predicted Age']
avg_difference = df_ead['Difference'].mean()
print(f"Average Age difference in Chronological and Predicted: {avg_difference}")

### Computing the Age distribution

In [None]:
df_age = df.copy()
print(f"AVG Age: {df_age['Age'].mean()}\nSTD Age: {df_age['Age'].std()}")