In [11]:
import matplotlib.pyplot as plt
import pandas as pd
import pingouin as pg
import numpy as np
from statsmodels.stats.libqsturng import psturng, qsturng

In [12]:
metric_ls = ["mae", "mse", "r2", "prec", "recall"]
df_scaffold_split = pd.read_csv('df_scaffold_split.csv')

In [13]:
def calculate_mean_diff_ci(df, metric, group_col, alpha=.05):
    df_means = df.groupby(group_col).mean(numeric_only=True).sort_values(metric, ascending=False)
    df_sorted = df.set_index(group_col).loc[df_means.index].reset_index()

    aov = pg.rm_anova(dv=metric, within=group_col, subject='cv_cycle', data=df, detailed=True)
    mse = aov.loc[1, 'MS']
    df_resid = aov.loc[1, 'DF']  

    n_groups = len(df[group_col].unique())
    n_per_group = df[group_col].value_counts().mean()
    tukey_se = np.sqrt(2 * mse / (n_per_group))
    q = qsturng(1-alpha, n_groups, df_resid)

    methods = df_means.index
    num_comparisons = len(methods) * (len(methods) - 1) // 2
    result_tab = pd.DataFrame(index=range(num_comparisons), columns=["group1", "group2", "meandiff", "lower", "upper", "p-adj"])

    # Calculate pairwise mean differences and adjusted p-values
    row_idx = 0
    for i, method1 in enumerate(methods):
        for j, method2 in enumerate(methods):
            if i < j:
                group1 = df[df[group_col] == method1][metric]
                group2 = df[df[group_col] == method2][metric]
                mean_diff = group1.mean() - group2.mean()
                studentized_range = np.abs(mean_diff) / tukey_se
                adjusted_p = psturng(studentized_range * np.sqrt(2), n_groups, df_resid)
                lower = mean_diff - (q/np.sqrt(2) * tukey_se)
                upper = mean_diff + (q/np.sqrt(2) * tukey_se)
                result_tab.loc[row_idx] = [method1, method2, mean_diff, lower, upper, adjusted_p]
                row_idx += 1

    result_tab["group1_mean"] = result_tab["group1"].map(df_means[metric])
    result_tab["group2_mean"] = result_tab["group2"].map(df_means[metric])

    return result_tab

def print_mean_diff_ci_table(df, metrics, metric_names, group_col, alpha=.05):
    for i, metric in enumerate(metrics):
        result_tab = calculate_mean_diff_ci(df, metric=metric, group_col=group_col, alpha=alpha)
        print(f"\nMean Difference CI Table for {metric_names[i]}")
        print(result_tab)

# Example usage:
print_mean_diff_ci_table(df_scaffold_split, metrics=metric_ls, metric_names=metric_ls, group_col="method")


Mean Difference CI Table for mae
        group1       group2  meandiff     lower     upper  p-adj  group1_mean  \
0  lgbm_morgan  chemprop_mt  0.056374  0.048589  0.064159  0.001     0.440199   
1  lgbm_morgan  chemprop_st  0.071541  0.063757  0.079326  0.001     0.440199   
2  chemprop_mt  chemprop_st  0.015167  0.007383  0.022952  0.001     0.383825   

   group2_mean  
0     0.383825  
1     0.368658  
2     0.368658  

Mean Difference CI Table for mse
        group1       group2  meandiff     lower     upper  p-adj  group1_mean  \
0  lgbm_morgan  chemprop_mt  0.073433  0.064375  0.082492  0.001     0.373529   
1  lgbm_morgan  chemprop_st  0.074951  0.065893   0.08401  0.001     0.373529   
2  chemprop_mt  chemprop_st  0.001518  -0.00754  0.010576    0.9     0.300095   

   group2_mean  
0     0.300095  
1     0.298577  
2     0.298577  

Mean Difference CI Table for r2
        group1       group2  meandiff     lower     upper  p-adj  group1_mean  \
0  chemprop_st  chemprop_mt  0.0

  W = np.prod(eig) / (eig.sum() / d) ** d
  W = np.prod(eig) / (eig.sum() / d) ** d
  W = np.prod(eig) / (eig.sum() / d) ** d
