In [None]:
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300

from plotnine_prism import *
from tqdm import tqdm
import plotnine as p9
import pandas as pd
import numpy as np
import glob
import yaml

import sys
sys.path.append("../")
from src.utils import bootstrapping

In [None]:
with open("../model_and_dataset_info.yaml", "r") as stream:
    model_and_dataset_info = yaml.safe_load(stream)
model_and_dataset_info

In [None]:
start_patterns = ["maynard_human_brain_analysis", "10x_TuPro_v2"] 

benchmark_files = []

for start_pattern in start_patterns:
    current_pattern = f"../{start_pattern}*/out_benchmark/benchmarks/**/*.log"
    files = glob.glob(current_pattern, recursive=True)
    files = [f for f in files if "evaluate" in f or "extract" in f]
    benchmark_files.extend(files)
benchmark_files[:2]

In [None]:
data = []
for file in tqdm(benchmark_files):
    rule = file.split("/")[4].split(".log")[0]
    dataset = file.split("/")[1]
    df = pd.read_csv(file, sep="\t")
    df["rule"] = rule
    df["dataset"] = dataset
    data.append(df)
data = pd.concat(data)
data = data[["rule", "dataset", "s"]]
image_extract = data[data.rule == "extract_image_features"].groupby("dataset").s.agg("mean").to_dict()
data = data[data.rule != "extract_image_features"]
data["s"] = data.apply(lambda x: x.s if "AESTETIK" != x.rule else x.s + image_extract[x.dataset], axis=1)
data

In [None]:
tab = data.groupby(["rule", "dataset"]).s.apply(lambda x: bootstrapping(x)).reset_index()
tab = pd.DataFrame(tab["s"].to_list(), columns=['value_median', 'value_std'], index=[tab["rule"], tab["dataset"]]).reset_index()
tab["modality"] = tab.rule.apply(lambda x: model_and_dataset_info["model_modality"][x])
tab["model"] = tab.rule
tab

In [None]:
tab["time_min_median"] = tab.value_median / 60
tab["time_min_std"] = tab.value_std / 60

In [None]:
(data.groupby(["rule"]).s.apply(lambda x: bootstrapping(x)[0]) / 60).sort_values()

In [None]:
tab["Dataset"] = tab.dataset.apply(lambda x: model_and_dataset_info["dataset"][x])

tab = tab.query("rule in ['SpaGCN', 'BayesSpace', 'AESTETIK', 'GraphST', 'STAGATE', 'MUSE', 'Leiden', 'stLearn']")
tab.Dataset = pd.Categorical(tab.Dataset, ['LIBD Human DLPFC','Tumor Profiler'])

tab["model_rank"] = tab.groupby("dataset").value_median.rank(ascending=False)
tab.model = pd.Categorical(tab.model, tab.groupby("model").model_rank.agg("median").sort_values().index)

position_dodge_width = 0.8
tab["Model"] = tab.model

tab["modality"] = pd.Categorical(tab["modality"], ['transcriptomics', 
                                  'transcriptomics + spatial', 
                                  'transcriptomics + image', 
                                  'transcriptomics + spatial + image'])


tab["Modality"] = tab.modality.apply(lambda x: 'transcriptomics + spatial + image' if x == 'transcriptomics + spatial + image' else x)

In [None]:
p = (p9.ggplot(tab, p9.aes("Dataset", "time_min_median")) 
 + p9.geom_point(p9.aes(color="Model", shape="Modality"), size=3, position=p9.position_dodge(width=position_dodge_width)) 
 + p9.facet_grid("~Dataset", scales="free_x")
 + p9.geom_errorbar(p9.aes(x="Dataset", ymin="time_min_median-time_min_std",ymax="time_min_median+time_min_std", color="Model"), 
                    width=0.001, alpha=1, size=1,
                   position=p9.position_dodge(width=position_dodge_width))
 + p9.theme_bw()
 + p9.theme(subplots_adjust={'wspace': 0}, figure_size=(8, 5), axis_text_x = p9.element_blank(), 
            legend_position="right",
            text=p9.element_text(size=15),
            strip_text=p9.element_text(size=17),
            legend_title=p9.element_text(size=17),
            legend_text=p9.element_text(size=16))
 + p9.ylab("Time (min)")
 + p9.xlab("")
 + scale_color_prism(palette = "colors")
 + p9.guides(color=p9.guide_legend(nrow=4, override_aes = p9.aes(shape = ".")))
 + p9.scale_y_continuous(breaks=list(np.arange(0, int(tab['time_min_median'].max())+5, 5)))
)
p.save("figures/run_time.png", dpi=300)
p

In [None]:
hline_tab = tab.groupby(["Modality", "dataset"]).value_median.agg("median").reset_index()
hline_tab["modality_dataset"] = hline_tab.apply(lambda x: f"{x.Modality}_{x.dataset}", axis=1)
hline_tab["hline_value_median"] = hline_tab["value_median"]
hline_tab.drop({"value_median"}, axis=1, inplace=True)
hline_tab