# Read real and generated datasets 

In [53]:
import scanpy as sc
from celldreamer.eval.compute_evaluation_metrics import process_labels, compute_evaluation_metrics
from scipy import sparse
import pandas as pd
import numpy as np

In [54]:
def compute_mean_std(results):
    means = results.groupby("ct").mean(0).mean(0)
    stds = results.groupby("ct").mean(0).std(0) / np.sqrt(len(results.groupby("ct")))
    for (name, mean, std) in zip(list(means.index), list(means), list(stds)):
        print(f"{name} {np.round(mean, 2)} ± {np.round(std,2)}")

## PBMC3K

In [55]:
results_celldreamer_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/pbmc3k/celldreamer_pbmc3k")
results_scDiff_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/pbmc3k/scdiff_pbmc3k")
results_scvi_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/pbmc3k/scvi_pbmc3k")

In [56]:
compute_mean_std(results_celldreamer_pbmc3k)

Unnamed: 0 8.5 ± 0.76
1-Wasserstein_PCA 16.8 ± 0.45
2-Wasserstein_PCA 16.94 ± 0.44
Linear_MMD_PCA 203.58 ± 7.62
Poly_MMD_PCA 100691.4 ± 32618.27
RBF_MMD_PCA 0.85 ± 0.05
KNN identity 0.67 ± 0.06
KNN identity PCA 0.99 ± 0.01
precision 0.53 ± 0.08
recall 0.42 ± 0.12
density 1.03 ± 0.23
coverage 0.97 ± 0.02
precision_PCA 0.6 ± 0.16
recall_PCA 0.0 ± 0.0
density_PCA 0.13 ± 0.05
coverage_PCA 0.19 ± 0.08
KNN category 0.36 ± 0.15
KNN category PCA 0.33 ± 0.11
global_f1 0.48 ± 0.0


In [57]:
compute_mean_std(results_scDiff_pbmc3k)

Unnamed: 0 8.5 ± 0.76
1-Wasserstein_PCA 22.18 ± 1.2
2-Wasserstein_PCA 22.41 ± 1.21
Linear_MMD_PCA 403.09 ± 57.49
Poly_MMD_PCA 492896.86 ± 276909.09
RBF_MMD_PCA 1.27 ± 0.2
KNN identity 0.38 ± 0.04
KNN identity PCA 1.0 ± 0.0
precision 0.75 ± 0.13
recall 0.42 ± 0.11
density 1.94 ± 0.6
coverage 0.92 ± 0.06
precision_PCA 0.26 ± 0.15
recall_PCA 0.09 ± 0.09
density_PCA 0.03 ± 0.02
coverage_PCA 0.06 ± 0.03
KNN category 0.22 ± 0.06
KNN category PCA 0.22 ± 0.05
global_f1 0.55 ± 0.0


In [58]:
compute_mean_std(results_scvi_pbmc3k)

Unnamed: 0 8.5 ± 0.76
1-Wasserstein_PCA 17.52 ± 0.29
2-Wasserstein_PCA 17.66 ± 0.29
Linear_MMD_PCA 232.05 ± 7.53
Poly_MMD_PCA 133023.27 ± 43396.21
RBF_MMD_PCA 0.94 ± 0.05
KNN identity 0.53 ± 0.04
KNN identity PCA 1.0 ± 0.0
precision 0.47 ± 0.12
recall 0.61 ± 0.09
density 0.72 ± 0.25
coverage 0.94 ± 0.03
precision_PCA 0.55 ± 0.17
recall_PCA 0.0 ± 0.0
density_PCA 0.1 ± 0.04
coverage_PCA 0.14 ± 0.08
KNN category 0.43 ± 0.15
KNN category PCA 0.4 ± 0.1
global_f1 0.57 ± 0.0


## HLCA

In [59]:
results_celldreamer_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/hlca_core/celldreamer_hlca_core")
results_scDiff_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/hlca_core/scdiff_hlca_core")
results_scvi_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/hlca_core/scvi_hlca_core")

In [60]:
compute_mean_std(results_celldreamer_hlca)

Unnamed: 0 68.16 ± 2.02
1-Wasserstein_PCA 10.64 ± 0.08
2-Wasserstein_PCA 10.72 ± 0.08
Linear_MMD_PCA 91.04 ± 1.64
Poly_MMD_PCA 32650.54 ± 2584.1
RBF_MMD_PCA 0.54 ± 0.02
KNN identity 0.8 ± 0.01
KNN identity PCA 1.0 ± 0.0
precision 0.31 ± 0.02
recall 0.71 ± 0.01
density 0.28 ± 0.03
coverage 0.56 ± 0.03
precision_PCA 0.12 ± 0.03
recall_PCA 0.04 ± 0.01
density_PCA 0.02 ± 0.0
coverage_PCA 0.02 ± 0.0
KNN category 0.18 ± 0.02
KNN category PCA 0.12 ± 0.02
global_f1 0.73 ± 0.0


In [61]:
compute_mean_std(results_scDiff_hlca)

Unnamed: 0 68.16 ± 2.02
1-Wasserstein_PCA 15.7 ± 0.45
2-Wasserstein_PCA 15.82 ± 0.45
Linear_MMD_PCA 218.58 ± 14.23
Poly_MMD_PCA 55550.9 ± 5430.84
RBF_MMD_PCA 0.96 ± 0.04
KNN identity 0.68 ± 0.03
KNN identity PCA 1.0 ± 0.0
precision 0.84 ± 0.03
recall 0.02 ± 0.01
density 2.9 ± 0.45
coverage 0.72 ± 0.04
precision_PCA 0.01 ± 0.0
recall_PCA 0.0 ± 0.0
density_PCA 0.0 ± 0.0
coverage_PCA 0.0 ± 0.0
KNN category 0.08 ± 0.01
KNN category PCA 0.02 ± 0.01
global_f1 0.53 ± 0.0


In [62]:
compute_mean_std(results_scvi_hlca)

Unnamed: 0 68.16 ± 2.02
1-Wasserstein_PCA 11.62 ± 0.18
2-Wasserstein_PCA 11.78 ± 0.19
Linear_MMD_PCA 111.31 ± 4.39
Poly_MMD_PCA 31784.53 ± 2760.82
RBF_MMD_PCA 0.58 ± 0.02
KNN identity 0.84 ± 0.01
KNN identity PCA 1.0 ± 0.0
precision 0.29 ± 0.02
recall 0.69 ± 0.03
density 0.29 ± 0.04
coverage 0.5 ± 0.04
precision_PCA 0.14 ± 0.02
recall_PCA 0.01 ± 0.0
density_PCA 0.02 ± 0.0
coverage_PCA 0.02 ± 0.0
KNN category 0.09 ± 0.01
KNN category PCA 0.05 ± 0.01
global_f1 0.57 ± 0.0


## Tabula Muris

In [63]:
results_celldreamer_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/tabula_muris/celldreamer_tabula_muris")
results_scDiff_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/tabula_muris/scdiff_tabula_muris")
results_scvi_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/tabula_muris/scvi_tabula_muris")

In [64]:
compute_mean_std(results_celldreamer_tabula)

Unnamed: 0 19.0 ± 1.08
1-Wasserstein_PCA 7.2 ± 0.17
2-Wasserstein_PCA 7.39 ± 0.2
Linear_MMD_PCA 72.97 ± 21.49
Poly_MMD_PCA 42904.95 ± 21331.06
RBF_MMD_PCA 0.19 ± 0.02
KNN identity 0.79 ± 0.02
KNN identity PCA 1.0 ± 0.0
precision 0.23 ± 0.02
recall 0.77 ± 0.02
density 0.14 ± 0.02
coverage 0.41 ± 0.06
precision_PCA 0.23 ± 0.06
recall_PCA 0.02 ± 0.0
density_PCA 0.04 ± 0.01
coverage_PCA 0.02 ± 0.0
KNN category 0.15 ± 0.03
KNN category PCA 0.17 ± 0.05
global_f1 0.82 ± 0.0


In [43]:
compute_mean_std(results_scDiff_tabula)

Unnamed: 0 19.0 ± 1.08
1-Wasserstein_PCA 7.55 ± 0.4
2-Wasserstein_PCA 7.89 ± 0.45
Linear_MMD_PCA 74.1 ± 22.93
Poly_MMD_PCA 47194.74 ± 22894.47
RBF_MMD_PCA 0.24 ± 0.04
KNN identity 0.72 ± 0.06
KNN identity PCA 1.0 ± 0.0
precision 0.86 ± 0.05
recall 0.02 ± 0.01
density 3.87 ± 0.72
coverage 0.73 ± 0.08
precision_PCA 0.33 ± 0.08
recall_PCA 0.02 ± 0.01
density_PCA 0.09 ± 0.05
coverage_PCA 0.03 ± 0.0
KNN category 0.12 ± 0.01
KNN category PCA 0.09 ± 0.01
global_f1 0.72 ± 0.0


In [44]:
compute_mean_std(results_scvi_tabula)

Unnamed: 0 19.0 ± 1.08
1-Wasserstein_PCA 9.32 ± 0.47
2-Wasserstein_PCA 9.76 ± 0.53
Linear_MMD_PCA 86.03 ± 23.17
Poly_MMD_PCA 48905.92 ± 24907.48
RBF_MMD_PCA 0.26 ± 0.02
KNN identity 0.84 ± 0.02
KNN identity PCA 1.0 ± 0.0
precision 0.22 ± 0.02
recall 0.83 ± 0.02
density 0.17 ± 0.02
coverage 0.31 ± 0.03
precision_PCA 0.32 ± 0.06
recall_PCA 0.01 ± 0.0
density_PCA 0.07 ± 0.02
coverage_PCA 0.03 ± 0.0
KNN category 0.1 ± 0.02
KNN category PCA 0.09 ± 0.02
global_f1 0.66 ± 0.0


## Dentategyrus

In [45]:
results_celldreamer_dentategyrus = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/dentategyrus/celldreamer_dentategyrus")
results_scDiff_dentategyrus = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/dentategyrus/scdiff_dentategyrus")
results_scvi_dentategyrus = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/dentategyrus/scvi_dentategyrus")

In [46]:
compute_mean_std(results_celldreamer_dentategyrus)

Unnamed: 0 20.5 ± 1.12
1-Wasserstein_PCA 21.46 ± 0.17
2-Wasserstein_PCA 21.55 ± 0.17
Linear_MMD_PCA 427.22 ± 5.22
Poly_MMD_PCA 273032.17 ± 52045.18
RBF_MMD_PCA 1.12 ± 0.04
KNN identity 0.63 ± 0.02
KNN identity PCA 1.0 ± 0.0
precision 0.47 ± 0.03
recall 0.72 ± 0.03
density 0.57 ± 0.08
coverage 0.88 ± 0.05
precision_PCA 0.0 ± 0.0
recall_PCA 0.15 ± 0.07
density_PCA 0.0 ± 0.0
coverage_PCA 0.0 ± 0.0
KNN category 0.21 ± 0.03
KNN category PCA 0.14 ± 0.07
global_f1 0.73 ± 0.0


In [47]:
compute_mean_std(results_scDiff_dentategyrus)

Unnamed: 0 20.5 ± 1.12
1-Wasserstein_PCA 22.5 ± 0.1
2-Wasserstein_PCA 22.56 ± 0.1
Linear_MMD_PCA 479.89 ± 4.16
Poly_MMD_PCA 340201.6 ± 71783.3
RBF_MMD_PCA 1.22 ± 0.05
KNN identity 0.37 ± 0.03
KNN identity PCA 1.0 ± 0.0
precision 0.99 ± 0.0
recall 0.0 ± 0.0
density 8.16 ± 1.24
coverage 1.0 ± 0.0
precision_PCA 0.0 ± 0.0
recall_PCA 0.0 ± 0.0
density_PCA 0.0 ± 0.0
coverage_PCA 0.0 ± 0.0
KNN category 0.22 ± 0.03
KNN category PCA 0.13 ± 0.07
global_f1 0.77 ± 0.0


In [48]:
compute_mean_std(results_scvi_dentategyrus)

Unnamed: 0 20.5 ± 1.12
1-Wasserstein_PCA 22.53 ± 0.23
2-Wasserstein_PCA 22.61 ± 0.23
Linear_MMD_PCA 476.91 ± 8.35
Poly_MMD_PCA 301168.5 ± 52324.59
RBF_MMD_PCA 1.15 ± 0.04
KNN identity 0.63 ± 0.03
KNN identity PCA 1.0 ± 0.0
precision 0.57 ± 0.04
recall 0.75 ± 0.02
density 0.76 ± 0.07
coverage 0.98 ± 0.01
precision_PCA 0.01 ± 0.01
recall_PCA 0.0 ± 0.0
density_PCA 0.0 ± 0.0
coverage_PCA 0.0 ± 0.0
KNN category 0.19 ± 0.03
KNN category PCA 0.12 ± 0.07
global_f1 0.71 ± 0.0


## C.Elegans

In [49]:
results_celldreamer_celegans = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/c_elegans/celldreamer_c_elegans")
results_scDiff_celegans= pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/c_elegans/scdiff_c_elegans")
results_scvi_celegans = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results_cell_type/c_elegans/scvi_c_elegans")

In [50]:
compute_mean_std(results_celldreamer_celegans)

Unnamed: 0 39.5 ± 1.5
1-Wasserstein_PCA 8.5 ± 0.2
2-Wasserstein_PCA 8.69 ± 0.22
Linear_MMD_PCA 64.64 ± 6.16
Poly_MMD_PCA 20271.33 ± 6457.88
RBF_MMD_PCA 0.34 ± 0.03
KNN identity 0.81 ± 0.01
KNN identity PCA 0.99 ± 0.0
precision 0.34 ± 0.03
recall 0.53 ± 0.04
density 0.65 ± 0.11
coverage 0.83 ± 0.04
precision_PCA 0.5 ± 0.04
recall_PCA 0.21 ± 0.05
density_PCA 0.13 ± 0.03
coverage_PCA 0.26 ± 0.04
KNN category 0.15 ± 0.02
KNN category PCA 0.12 ± 0.03
global_f1 0.36 ± 0.0


In [51]:
compute_mean_std(results_scDiff_celegans)

Unnamed: 0 39.5 ± 1.5
1-Wasserstein_PCA 7.73 ± 0.22
2-Wasserstein_PCA 7.93 ± 0.25
Linear_MMD_PCA 56.11 ± 7.7
Poly_MMD_PCA 20002.51 ± 7786.48
RBF_MMD_PCA 0.29 ± 0.02
KNN identity 0.6 ± 0.04
KNN identity PCA 0.97 ± 0.0
precision 0.86 ± 0.02
recall 0.09 ± 0.03
density 2.79 ± 0.37
coverage 0.93 ± 0.03
precision_PCA 0.67 ± 0.04
recall_PCA 0.18 ± 0.06
density_PCA 0.22 ± 0.03
coverage_PCA 0.34 ± 0.04
KNN category 0.12 ± 0.02
KNN category PCA 0.09 ± 0.01
global_f1 0.36 ± 0.0


In [52]:
compute_mean_std(results_scvi_celegans)

Unnamed: 0 39.5 ± 1.5
1-Wasserstein_PCA 9.36 ± 0.26
2-Wasserstein_PCA 9.65 ± 0.28
Linear_MMD_PCA 73.73 ± 6.15
Poly_MMD_PCA 20790.22 ± 6767.18
RBF_MMD_PCA 0.36 ± 0.03
KNN identity 0.8 ± 0.01
KNN identity PCA 0.98 ± 0.0
precision 0.37 ± 0.03
recall 0.58 ± 0.04
density 0.62 ± 0.1
coverage 0.79 ± 0.04
precision_PCA 0.5 ± 0.04
recall_PCA 0.23 ± 0.05
density_PCA 0.13 ± 0.02
coverage_PCA 0.23 ± 0.03
KNN category 0.13 ± 0.03
KNN category PCA 0.09 ± 0.02
global_f1 0.32 ± 0.0
