In [None]:
import pandas as pd
import numpy as np

In [None]:
top_performers_for_anova = {'result.r2_dbdb_healthy': {'extra_mouse_chem.h5ad': np.array([0.99455716, 0.99429556, 0.99526207]),
  'extra_mouse_Embryonic.h5ad': np.array([0.99361177, 0.99383294, 0.99234884]),
  'extra_mouse_T1D.h5ad': np.array([0.99492497, 0.99438503, 0.99360362]),
  'extra_mouse_young.h5ad': np.array([0.99491306, 0.99212912, 0.99319578]),
  'train_adata_baseline_top3000hvg.h5ad': np.array([0.99251673, 0.99331499, 0.99361177, 0.99383294, 0.99234884])},
 'result.r2_dbdb_OOD': {'extra_mouse_chem.h5ad': np.array([0.86194461]),
  'extra_mouse_Embryonic.h5ad': np.array([0.77028542, 0.77413606, 0.78884885]),
  'extra_mouse_T1D.h5ad': np.array([0.85437402]),
  'extra_mouse_young.h5ad': np.array([0.77155366, 0.77024666, 0.76926021]),
  'train_adata_baseline_top3000hvg.h5ad': np.array([0.79873185, 0.79931434])},
 'result.r2_mSTZ_healthy': {'extra_mouse_chem.h5ad': np.array([0.97355678]),
  'extra_mouse_Embryonic.h5ad': np.array([0.98974275]),
  'extra_mouse_T1D.h5ad': np.array([0.972287]),
  'extra_mouse_young.h5ad': np.array([0.96677723, 0.96171342, 0.97228828]),
  'train_adata_baseline_top3000hvg.h5ad': np.array([0.98974275])},
 'result.r2_mSTZ_OOD': {'extra_mouse_chem.h5ad': np.array([0.8680649 , 0.94848754, 0.90506495]),
  'extra_mouse_Embryonic.h5ad': np.array([0.86447686, 0.92092105, 0.87786575]),
  'extra_mouse_T1D.h5ad': np.array([0.88246886]),
  'extra_mouse_young.h5ad': np.array([0.87197639, 0.90344952, 0.90129425]),
  'train_adata_baseline_top3000hvg.h5ad': np.array([0.91252919])}}

In [None]:
# Step 2: Running ANOVA for each test dataset
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

for column, performances in top_performers_for_anova.items():
    try:
        df_for_anova = pd.DataFrame([(k, v) for k, values in performances.items() for v in values], columns=["dataset_train", column])

        new_column_name = column.replace(".", "_")
        df_for_anova = df_for_anova.rename(columns={column: new_column_name})

        formula = f"{new_column_name} ~ C(dataset_train)"
        model = ols(formula, data=df_for_anova).fit()
        anova_table = sm.stats.anova_lm(model, typ=2)
        
        print(f"ANOVA results for {new_column_name}:\n\n", anova_table)
        
        # Tukey HSD
        tukey_results = pairwise_tukeyhsd(df_for_anova[new_column_name], df_for_anova['dataset_train'])
        print("\nTukey HSD results:\n", tukey_results)
        
    except:
        print(f"Anova or Tukey HSD can't be performed for {new_column_name}\n\n")
        print("----------------------------------------------------------------------")


In [None]:
import matplotlib.pyplot as plt

# Tukey HSD results for result_r2_dbdb_OOD
comparisons = [
    "extra_mouse_Embryonic vs extra_mouse_T1D", 
    "extra_mouse_Embryonic vs extra_mouse_chem",
    "extra_mouse_Embryonic vs extra_mouse_young",
    "extra_mouse_Embryonic vs train_adata_baseline_top3000hvg",
    "extra_mouse_T1D vs extra_mouse_chem",
    "extra_mouse_T1D vs extra_mouse_young",
    "extra_mouse_T1D vs train_adata_baseline_top3000hvg",
    "extra_mouse_chem vs extra_mouse_young",
    "extra_mouse_chem vs train_adata_baseline_top3000hvg",
    "extra_mouse_young vs train_adata_baseline_top3000hvg"
]
mean_diff = [0.0766, 0.0842, -0.0074, 0.0213, 0.0076, -0.084, -0.0554, -0.0916, -0.0629, 0.0287]
reject = [True, True, False, False, False, True, True, True, True, True]

plt.figure(figsize=(12,6))
bars = plt.bar(comparisons, mean_diff, color=['red' if r else 'blue' for r in reject])
plt.axhline(0, color='black',linewidth=0.5)
plt.ylabel('Mean Difference')
plt.title('Mean Difference with Tukey HSD results for result_r2_dbdb_OOD')
plt.xticks(rotation=90)

# Add asterisk for significant results
for bar, r in zip(bars, reject):
    if r:
        plt.text(bar.get_x() + bar.get_width() / 2 - 0.15, bar.get_height(), '*', ha='center', color='black', fontsize=15)

plt.tight_layout()
plt.show()
