In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import pingouin as pg
import numpy as np
from statsmodels.stats.libqsturng import psturng, qsturng

In [12]:
metric_ls = ["mae", "mse", "r2", "rho", "prec", "recall"]
df_scaffold_split = pd.read_csv('scaffold_split_metrics.csv')

In [5]:
np.round([1, 3, 2.1], 0)

array([1., 3., 2.])

In [13]:
def calculate_mean_diff_ci(df, metric, group_col, alpha=.05):
    df_means = df.groupby(group_col).mean(numeric_only=True).sort_values(metric, ascending=False)
    df_sorted = df.set_index(group_col).loc[df_means.index].reset_index()

    aov = pg.rm_anova(dv=metric, within=group_col, subject='cv_cycle', data=df, detailed=True)
    mse = aov.loc[1, 'MS']
    df_resid = aov.loc[1, 'DF']  

    n_groups = len(df[group_col].unique())
    n_per_group = df[group_col].value_counts().mean()
    tukey_se = np.sqrt(2 * mse / (n_per_group))
    q = qsturng(1-alpha, n_groups, df_resid)

    methods = df_means.index
    num_comparisons = len(methods) * (len(methods) - 1) // 2
    result_tab = pd.DataFrame(index=range(num_comparisons), columns=["group1", "group2", "meandiff", "lower", "upper", "p-adj"])

    # Calculate pairwise mean differences and adjusted p-values
    row_idx = 0
    for i, method1 in enumerate(methods):
        for j, method2 in enumerate(methods):
            if i < j:
                group1 = df[df[group_col] == method1][metric]
                group2 = df[df[group_col] == method2][metric]
                mean_diff = group1.mean() - group2.mean()
                studentized_range = np.abs(mean_diff) / tukey_se
                adjusted_p = psturng(studentized_range * np.sqrt(2), n_groups, df_resid)
                lower = mean_diff - (q/np.sqrt(2) * tukey_se)
                upper = mean_diff + (q/np.sqrt(2) * tukey_se)
                result_tab.loc[row_idx] = [method1, method2, mean_diff, lower, upper, adjusted_p]
                row_idx += 1

    result_tab["group1_mean"] = result_tab["group1"].map(df_means[metric])
    result_tab["group2_mean"] = result_tab["group2"].map(df_means[metric])

    numeric_cols = ["meandiff", "lower", "upper", "p-adj", "group1_mean", "group2_mean"]
    result_tab[numeric_cols] = result_tab[numeric_cols].apply(pd.to_numeric, errors='coerce')
    result_tab[numeric_cols] = result_tab[numeric_cols].round(2)

    return result_tab

def print_mean_diff_ci_table(df, metrics, metric_names, group_col, alpha=.05):
    for i, metric in enumerate(metrics):
        result_tab = calculate_mean_diff_ci(df, metric=metric, group_col=group_col, alpha=alpha)
        print(f"\nMean Difference CI Table for {metric_names[i]}")
        print(result_tab)

# Example usage:
print_mean_diff_ci_table(df_scaffold_split, metrics=metric_ls, metric_names=metric_ls, group_col="method")

  W = np.prod(eig) / (eig.sum() / d) ** d



Mean Difference CI Table for mae
        group1       group2  meandiff  lower  upper  p-adj  group1_mean  \
0  lgbm_morgan  chemprop_mt      0.06   0.05   0.06    0.0         0.44   
1  lgbm_morgan  chemprop_st      0.07   0.06   0.08    0.0         0.44   
2  chemprop_mt  chemprop_st      0.02   0.01   0.02    0.0         0.38   

   group2_mean  
0         0.38  
1         0.37  
2         0.37  

Mean Difference CI Table for mse
        group1       group2  meandiff  lower  upper  p-adj  group1_mean  \
0  lgbm_morgan  chemprop_mt      0.07   0.06   0.08    0.0         0.37   
1  lgbm_morgan  chemprop_st      0.07   0.07   0.08    0.0         0.37   
2  chemprop_mt  chemprop_st      0.00  -0.01   0.01    0.9         0.30   

   group2_mean  
0          0.3  
1          0.3  
2          0.3  


  W = np.prod(eig) / (eig.sum() / d) ** d



Mean Difference CI Table for r2
        group1       group2  meandiff  lower  upper  p-adj  group1_mean  \
0  chemprop_st  chemprop_mt      0.00  -0.02   0.02    0.9          0.4   
1  chemprop_st  lgbm_morgan      0.15   0.13   0.17    0.0          0.4   
2  chemprop_mt  lgbm_morgan      0.15   0.13   0.17    0.0          0.4   

   group2_mean  
0         0.40  
1         0.25  
2         0.25  

Mean Difference CI Table for rho
        group1       group2  meandiff  lower  upper  p-adj  group1_mean  \
0  chemprop_mt  chemprop_st      0.00  -0.01   0.01    0.9          0.6   
1  chemprop_mt  lgbm_morgan      0.08   0.07   0.09    0.0          0.6   
2  chemprop_st  lgbm_morgan      0.08   0.07   0.09    0.0          0.6   

   group2_mean  
0         0.60  
1         0.52  
2         0.52  


  W = np.prod(eig) / (eig.sum() / d) ** d
  W = np.prod(eig) / (eig.sum() / d) ** d



Mean Difference CI Table for prec
        group1       group2  meandiff  lower  upper  p-adj  group1_mean  \
0  chemprop_mt  lgbm_morgan      0.01  -0.01   0.02    NaN         0.86   
1  chemprop_mt  chemprop_st      0.02   0.00   0.03    NaN         0.86   
2  lgbm_morgan  chemprop_st      0.01  -0.01   0.02    NaN         0.85   

   group2_mean  
0         0.85  
1         0.84  
2         0.84  

Mean Difference CI Table for recall
        group1       group2  meandiff  lower  upper  p-adj  group1_mean  \
0  chemprop_st  chemprop_mt      0.08   0.06   0.10    0.0         0.66   
1  chemprop_st  lgbm_morgan      0.17   0.15   0.19    0.0         0.66   
2  chemprop_mt  lgbm_morgan      0.09   0.07   0.11    0.0         0.58   

   group2_mean  
0         0.58  
1         0.49  
2         0.49  
