In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import pingouin as pg
import numpy as np
from statsmodels.stats.libqsturng import psturng, qsturng

In [3]:
metric_ls = ["mae", "mse", "r2", "rho", "prec", "recall"]
df_scaffold_split = pd.read_csv('scaffold_split_metrics.csv')

In [4]:
np.round([1, 3, 2.1], 0)

array([1., 3., 2.])

In [20]:
def calculate_mean_diff_ci(df, metric, group_col, alpha=.05):
    df_means = df.groupby(group_col).mean(numeric_only=True).sort_values(metric, ascending=False)
    df_sorted = df.set_index(group_col).loc[df_means.index].reset_index()

    aov = pg.rm_anova(dv=metric, within=group_col, subject='cv_cycle', data=df, detailed=True)
    mse = aov.loc[1, 'MS']
    df_resid = aov.loc[1, 'DF']  

    n_groups = len(df[group_col].unique())
    n_per_group = df[group_col].value_counts().mean()
    tukey_se = np.sqrt(2 * mse / (n_per_group))
    q = qsturng(1-alpha, n_groups, df_resid)

    methods = df_means.index
    num_comparisons = len(methods) * (len(methods) - 1) // 2
    result_tab = pd.DataFrame(index=range(num_comparisons), columns=["group1", "group2", "meandiff", "lower", "upper", "p-adj"])

    # Calculate pairwise mean differences and adjusted p-values
    row_idx = 0
    for i, method1 in enumerate(methods):
        for j, method2 in enumerate(methods):
            if i < j:
                group1 = df[df[group_col] == method1][metric]
                group2 = df[df[group_col] == method2][metric]
                mean_diff = group1.mean() - group2.mean()
                studentized_range = np.abs(mean_diff) / tukey_se
                adjusted_p = psturng(studentized_range * np.sqrt(2), n_groups, df_resid)
                if isinstance(adjusted_p, np.ndarray):
                    adjusted_p = adjusted_p[0]
                lower = mean_diff - (q/np.sqrt(2) * tukey_se)
                upper = mean_diff + (q/np.sqrt(2) * tukey_se)
                result_tab.loc[row_idx] = [method1, method2, mean_diff, lower, upper, adjusted_p]
                row_idx += 1

    result_tab["group1_mean"] = result_tab["group1"].map(df_means[metric])
    result_tab["group2_mean"] = result_tab["group2"].map(df_means[metric])

    numeric_cols = ["meandiff", "lower", "upper", "p-adj", "group1_mean", "group2_mean"]
    result_tab[numeric_cols] = result_tab[numeric_cols].apply(pd.to_numeric, errors='coerce')
    result_tab[numeric_cols] = result_tab[numeric_cols].round(4)

    return result_tab

def print_mean_diff_ci_table(df, metrics, metric_names, group_col, alpha=.05):
    for i, metric in enumerate(metrics):
        result_tab = calculate_mean_diff_ci(df, metric=metric, group_col=group_col, alpha=alpha)
        print(f"\nMean Difference CI Table for {metric_names[i]}")
        print(result_tab)

# Example usage:
print_mean_diff_ci_table(df_scaffold_split, metrics=metric_ls, metric_names=metric_ls, group_col="method")

  W = np.prod(eig) / (eig.sum() / d) ** d
  W = np.prod(eig) / (eig.sum() / d) ** d
  W = np.prod(eig) / (eig.sum() / d) ** d



Mean Difference CI Table for mae
        group1       group2  meandiff   lower   upper  p-adj  group1_mean  \
0  lgbm_morgan  chemprop_mt    0.0564  0.0486  0.0642  0.001       0.4402   
1  lgbm_morgan  chemprop_st    0.0715  0.0638  0.0793  0.001       0.4402   
2  chemprop_mt  chemprop_st    0.0152  0.0074  0.0230  0.001       0.3838   

   group2_mean  
0       0.3838  
1       0.3687  
2       0.3687  

Mean Difference CI Table for mse
        group1       group2  meandiff   lower   upper  p-adj  group1_mean  \
0  lgbm_morgan  chemprop_mt    0.0734  0.0644  0.0825  0.001       0.3735   
1  lgbm_morgan  chemprop_st    0.0750  0.0659  0.0840  0.001       0.3735   
2  chemprop_mt  chemprop_st    0.0015 -0.0075  0.0106  0.900       0.3001   

   group2_mean  
0       0.3001  
1       0.2986  
2       0.2986  

Mean Difference CI Table for r2
        group1       group2  meandiff   lower   upper  p-adj  group1_mean  \
0  chemprop_st  chemprop_mt    0.0026 -0.0156  0.0208  0.900       0

  W = np.prod(eig) / (eig.sum() / d) ** d
