In [1]:
import seml
import pandas as pd
import glob
import mudata as mu
import deconvatac as de
import seaborn as sns
import tqdm
import os

  from .autonotebook import tqdm as notebook_tqdm
  self.seed = seed
  self.dl_pin_memory_gpu_training = (


## Create Results Table

In [2]:
def get_proportions(adata):
    df = pd.DataFrame(adata.obsm["proportions"], columns=adata.uns["proportion_names"], index=adata.obs_names)
    return df

In [None]:
data_path = "/vol/storage/data/deconvolution_results"
methods = ["tangram", "rctd", "spatialdwls", "destvi", "cell2location"]  # cell2location, moscot
modalities = ["atac", "rna"]
# go over all methods and modalities
df = [pd.DataFrame({'path': glob.glob(os.path.join(data_path, method, modality, "*", "*"))}) for method in methods for modality in modalities]
df = pd.concat(df)
df[['method', 'modality', 'dataset_features']] = df['path'].str.split('/', expand=True).iloc[:, 5:-1]
df['dataset'] = df['dataset_features'].str.rsplit("_", n=2).str[0]
df["features"] = df["dataset_features"].str.split("_", n=2).str[-1]

In [9]:
df

Unnamed: 0,path,method,modality,dataset_features,dataset,features
0,/vol/storage/data/deconvolution_results/tangra...,tangram,atac,Brain_1_highly_accessible,Brain_1,highly_accessible
1,/vol/storage/data/deconvolution_results/tangra...,tangram,atac,Heart_3_highly_accessible,Heart_3,highly_accessible
2,/vol/storage/data/deconvolution_results/tangra...,tangram,atac,russell_250_highly_variable,russell_250,highly_variable
3,/vol/storage/data/deconvolution_results/tangra...,tangram,atac,Brain_2_highly_variable,Brain_2,highly_variable
4,/vol/storage/data/deconvolution_results/tangra...,tangram,atac,russell_250_highly_accessible,russell_250,highly_accessible
...,...,...,...,...,...,...
13,/vol/storage/data/deconvolution_results/cell2l...,cell2location,rna,Heart_2_highly_variable,Heart_2,highly_variable
14,/vol/storage/data/deconvolution_results/cell2l...,cell2location,rna,Heart_1_highly_variable,Heart_1,highly_variable
15,/vol/storage/data/deconvolution_results/cell2l...,cell2location,rna,Heart_1_highly_variable,Heart_1,highly_variable
16,/vol/storage/data/deconvolution_results/cell2l...,cell2location,rna,Brain_4_highly_variable,Brain_4,highly_variable


In [10]:
mapping_dict = {
    "russell_250": "/vol/storage/data/simulations/russell_250.h5mu",
    "Heart_1": "/vol/storage/data/simulations/Heart_1.h5mu",
    "Heart_2": "/vol/storage/data/simulations/Heart_2.h5mu",
    "Heart_3": "/vol/storage/data/simulations/Heart_3.h5mu",
    "Heart_4": "/vol/storage/data/simulations/Heart_4.h5mu",
    "Brain_1": "/vol/storage/data/simulations/Brain_1.h5mu",
    "Brain_2": "/vol/storage/data/simulations/Brain_2.h5mu",
    "Brain_3": "/vol/storage/data/simulations/Brain_3.h5mu",
    "Brain_4": "/vol/storage/data/simulations/Brain_4.h5mu",
}

In [11]:
df["mdata_spatial_path"] = df['dataset'].map(mapping_dict)

Add Moscot results to table

In [None]:
# add Moscot
df_moscot = pd.DataFrame({"path": glob.glob(os.path.join(data_path, "moscot", "*", "*", "highly*", "*predictions*"))})
df_moscot[["method", "dataset", "modality", "features"]] = df_moscot["path"].str.split("/", expand=True).iloc[:, 5:-1]
df_moscot["mdata_spatial_path"] = df_moscot["dataset"].map(mapping_dict)
df = pd.concat([df, df_moscot])

In [16]:
df.groupby(['method', 'dataset', 'modality']).size().loc['moscot']

dataset     modality
Brain_1     atac        2
            rna         1
Brain_2     atac        2
            rna         1
Brain_3     atac        2
            rna         1
Brain_4     atac        2
            rna         1
Heart_1     atac        2
            rna         1
Heart_2     atac        2
            rna         1
Heart_3     atac        2
            rna         1
Heart_4     atac        2
            rna         1
russel_250  atac        2
            rna         1
dtype: int64

In [17]:
def load_table(path, index_col):
    res = pd.read_csv(path, index_col=index_col)
    if "q05cell_abundance_w_sf_" in res.columns[0]:
        res.columns = res.columns.to_series().str.split("q05cell_abundance_w_sf_", expand=True).loc[:, 1].values
    elif "meanscell_abundance_w_sf_" in res.columns[0]:
        res.columns = res.columns.to_series().str.split("meanscell_abundance_w_sf_", expand=True).loc[:, 1].values
    if res.index[0] != 0:
        res.index = res.index.astype(int) - 1
    res.index = res.index.astype(str)
    res.drop('cell_ID', axis=1, inplace=True)
    res = res.div(res.sum(axis=1), axis=0)
    return res

In [21]:
df = df.query("method == 'spatialdwls'")

In [22]:
jsd = []
rmse = []
for _, row in tqdm.tqdm(df.iterrows()):
    # load ground truth
    target_adata = mu.read(row["mdata_spatial_path"])
    targets = get_proportions(target_adata[row["modality"]])

    # load table
    predictions = load_table(row["path"], index_col=(None if row["method"] == "moscot" else 0))
    predictions = predictions.loc[targets.index, targets.columns]
    jsd.append(de.tl.jsd(predictions, targets))
    rmse.append(de.tl.rmse(predictions, targets))
df["jsd"] = jsd
df["rmse"] = rmse

27it [00:40,  1.48s/it]


In [None]:
df.to_csv("tables/results_table.csv")

In [None]:
df = pd.read_csv('tables/results_table.csv', index_col=0)

In [33]:
df.groupby(['method', 'features', 'modality'])[['jsd', 'rmse']].mean().sum(axis=1).sort_values()

method         features           modality
cell2location  highly_variable    rna         0.307014
rctd           highly_variable    rna         0.346914
cell2location  highly_variable    atac        0.404087
               highly_accessible  atac        0.415427
rctd           highly_variable    atac        0.416090
spatialdwls    highly_variable    rna         0.456846
rctd           highly_accessible  atac        0.457290
destvi         highly_variable    rna         0.501630
spatialdwls    highly_variable    atac        0.612917
destvi         highly_variable    atac        0.643956
tangram        highly_variable    rna         0.729420
spatialdwls    highly_accessible  atac        0.735594
tangram        highly_variable    atac        0.750257
               highly_accessible  atac        0.756998
destvi         highly_accessible  atac        0.790659
moscot         highly_accessible  atac        0.924676
               highly_variable    atac        0.926016
                      