# Read real and generated datasets 

In [65]:
import scanpy as sc
from celldreamer.eval.compute_evaluation_metrics import process_labels, compute_evaluation_metrics
from scipy import sparse
import pandas as pd
import numpy as np

In [66]:
def compute_mean_std(results):
    means = results.mean(0)
    stds = results.std(0) / np.sqrt(results.shape[0])
    for (name, mean, std) in zip(list(means.index), list(means), list(stds)):
        print(f"{name} {np.round(mean, 2)} ± {np.round(std,2)}")

## PBMC3K

In [67]:
results_celldreamer_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results/pbmc3k/celldreamer_pbmc3k")
results_activa_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results/pbmc3k/activa_pbmc3k")
results_scgan_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results/pbmc3k/scgan_pbmc3k")

In [68]:
compute_mean_std(results_celldreamer_pbmc3k)

Unnamed: 0 1.0 ± 0.58
1-Wasserstein_PCA 16.66 ± 0.06
2-Wasserstein_PCA 16.81 ± 0.06
Linear_MMD_PCA 204.24 ± 2.01
Poly_MMD_PCA 39695.49 ± 831.11
RBF_MMD_PCA 0.44 ± 0.0
KNN identity 0.66 ± 0.03
KNN identity PCA 0.99 ± 0.0
precision 0.33 ± 0.02
recall 0.2 ± 0.01
density 1.39 ± 0.04
coverage 0.99 ± 0.01
precision_PCA 0.44 ± 0.02
recall_PCA 0.0 ± 0.0
density_PCA 0.09 ± 0.01
coverage_PCA 0.11 ± 0.01


In [69]:
compute_mean_std(results_activa_pbmc3k)

Unnamed: 0 1.0 ± 0.58
1-Wasserstein_PCA 18.89 ± 0.11
2-Wasserstein_PCA 19.08 ± 0.12
Linear_MMD_PCA 270.23 ± 6.17
Poly_MMD_PCA 77596.24 ± 4357.65
RBF_MMD_PCA 0.56 ± 0.01
KNN identity 0.33 ± 0.0
KNN identity PCA 0.99 ± 0.0
precision 0.79 ± 0.03
recall 0.03 ± 0.01
density 0.69 ± 0.08
coverage 0.48 ± 0.07
precision_PCA 0.37 ± 0.02
recall_PCA 0.0 ± 0.0
density_PCA 0.16 ± 0.01
coverage_PCA 0.23 ± 0.01


In [70]:
compute_mean_std(results_scgan_pbmc3k)

Unnamed: 0 1.0 ± 0.58
1-Wasserstein_PCA 15.32 ± 0.05
2-Wasserstein_PCA 15.54 ± 0.06
Linear_MMD_PCA 241.48 ± 3.3
Poly_MMD_PCA 77061.9 ± 1590.49
RBF_MMD_PCA 0.36 ± 0.0
KNN identity 0.58 ± 0.03
KNN identity PCA 0.99 ± 0.0
precision 0.8 ± 0.01
recall 0.01 ± 0.0
density 5.68 ± 0.12
coverage 1.0 ± 0.0
precision_PCA 0.65 ± 0.01
recall_PCA 0.01 ± 0.0
density_PCA 0.2 ± 0.0
coverage_PCA 0.16 ± 0.0


## HLCA

In [71]:
results_celldreamer_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results/hlca_core/celldreamer_hlca_core")
results_activa_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results/hlca_core/activa_hlca_core")
results_scgan_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results/hlca_core/scgan_hlca_core")

In [72]:
compute_mean_std(results_celldreamer_hlca)

Unnamed: 0 14.5 ± 1.61
1-Wasserstein_PCA 11.02 ± 0.03
2-Wasserstein_PCA 11.18 ± 0.04
Linear_MMD_PCA 84.31 ± 1.04
Poly_MMD_PCA 7293.84 ± 231.83
RBF_MMD_PCA 0.15 ± 0.0
KNN identity 0.66 ± 0.0
KNN identity PCA 0.99 ± 0.0
precision 0.68 ± 0.0
recall 0.8 ± 0.0
density 0.75 ± 0.01
coverage 0.97 ± 0.0
precision_PCA 0.24 ± 0.01
recall_PCA 0.13 ± 0.0
density_PCA 0.05 ± 0.0
coverage_PCA 0.14 ± 0.0


In [73]:
compute_mean_std(results_activa_hlca)

Unnamed: 0 14.5 ± 1.61
1-Wasserstein_PCA 11.26 ± 0.03
2-Wasserstein_PCA 11.47 ± 0.03
Linear_MMD_PCA 77.51 ± 0.67
Poly_MMD_PCA 6475.26 ± 129.8
RBF_MMD_PCA 0.23 ± 0.0
KNN identity 0.37 ± 0.0
KNN identity PCA 0.99 ± 0.0
precision 0.86 ± 0.0
recall 0.03 ± 0.0
density 1.89 ± 0.02
coverage 0.97 ± 0.0
precision_PCA 0.4 ± 0.01
recall_PCA 0.01 ± 0.0
density_PCA 0.08 ± 0.0
coverage_PCA 0.12 ± 0.0


In [74]:
compute_mean_std(results_scgan_hlca)

Unnamed: 0 14.5 ± 1.61
1-Wasserstein_PCA 10.81 ± 0.02
2-Wasserstein_PCA 11.1 ± 0.02
Linear_MMD_PCA 29.91 ± 0.6
Poly_MMD_PCA 2767.81 ± 79.46
RBF_MMD_PCA 0.18 ± 0.0
KNN identity 0.84 ± 0.0
KNN identity PCA 0.99 ± 0.0
precision 0.37 ± 0.0
recall 0.12 ± 0.0
density 3.32 ± 0.05
coverage 0.54 ± 0.0
precision_PCA 0.47 ± 0.01
recall_PCA 0.06 ± 0.0
density_PCA 0.17 ± 0.0
coverage_PCA 0.15 ± 0.0


## Tabula Muris

In [75]:
results_celldreamer_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results/tabula_muris/celldreamer_tabula_muris")
results_activa_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results/tabula_muris/activa_tabula_muris")
results_scgan_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results/tabula_muris/scgan_tabula_muris")

In [76]:
compute_mean_std(results_celldreamer_tabula)

Unnamed: 0 14.5 ± 1.61
1-Wasserstein_PCA 8.02 ± 0.04
2-Wasserstein_PCA 8.54 ± 0.06
Linear_MMD_PCA 36.52 ± 1.09
Poly_MMD_PCA 1454.02 ± 315.2
RBF_MMD_PCA 0.08 ± 0.0
KNN identity 0.69 ± 0.0
KNN identity PCA 0.98 ± 0.0
precision 0.72 ± 0.0
recall 0.84 ± 0.0
density 0.65 ± 0.01
coverage 0.96 ± 0.0
precision_PCA 0.53 ± 0.01
recall_PCA 0.26 ± 0.0
density_PCA 0.15 ± 0.0
coverage_PCA 0.25 ± 0.0


In [77]:
compute_mean_std(results_activa_tabula)

Unnamed: 0 14.5 ± 1.61
1-Wasserstein_PCA 8.67 ± 0.11
2-Wasserstein_PCA 14.46 ± 3.2
Linear_MMD_PCA 17.72 ± 0.87
Poly_MMD_PCA -2920.6 ± 4568.08
RBF_MMD_PCA 0.15 ± 0.0
KNN identity 0.48 ± 0.0
KNN identity PCA 0.97 ± 0.0
precision 0.95 ± 0.0
recall 0.11 ± 0.0
density 5.18 ± 0.08
coverage 1.0 ± 0.0
precision_PCA 0.79 ± 0.01
recall_PCA 0.19 ± 0.0
density_PCA 0.31 ± 0.01
coverage_PCA 0.27 ± 0.0


In [78]:
compute_mean_std(results_scgan_tabula)

Unnamed: 0 14.5 ± 1.61
1-Wasserstein_PCA 12.07 ± 0.03
2-Wasserstein_PCA 12.85 ± 0.04
Linear_MMD_PCA 17.19 ± 0.55
Poly_MMD_PCA 3583.23 ± 133.01
RBF_MMD_PCA 0.25 ± 0.0
KNN identity 0.77 ± 0.01
KNN identity PCA 0.99 ± 0.0
precision 0.14 ± 0.0
recall 0.14 ± 0.0
density 0.79 ± 0.03
coverage 0.31 ± 0.0
precision_PCA 0.64 ± 0.02
recall_PCA 0.04 ± 0.0
density_PCA 0.21 ± 0.01
coverage_PCA 0.09 ± 0.0


## Dentategyrus

In [79]:
results_celldreamer_dentate = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results/dentategyrus/celldreamer_dentategyrus")
results_activa_dentate = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results/dentategyrus/activa_dentategyrus")
results_scgan_dentate = pd.read_csv("/home/icb/alessandro.palma/environment/cfgen/experiments/results/dentategyrus/scgan_dentategyrus")

In [80]:
compute_mean_std(results_celldreamer_dentate)

Unnamed: 0 1.0 ± 0.58
1-Wasserstein_PCA 21.11 ± 0.02
2-Wasserstein_PCA 21.2 ± 0.02
Linear_MMD_PCA 419.44 ± 1.9
Poly_MMD_PCA 167675.54 ± 2185.17
RBF_MMD_PCA 0.42 ± 0.0
KNN identity 0.64 ± 0.0
KNN identity PCA 1.0 ± 0.0
precision 0.41 ± 0.0
recall 0.7 ± 0.01
density 0.62 ± 0.01
coverage 0.84 ± 0.01
precision_PCA 0.0 ± 0.0
recall_PCA 0.43 ± 0.06
density_PCA 0.0 ± 0.0
coverage_PCA 0.0 ± 0.0


In [81]:
compute_mean_std(results_activa_dentate)

Unnamed: 0 1.0 ± 0.58
1-Wasserstein_PCA 22.48 ± 0.08
2-Wasserstein_PCA 22.72 ± 0.11
Linear_MMD_PCA 466.25 ± 2.07
Poly_MMD_PCA 204073.03 ± 1789.8
RBF_MMD_PCA 0.56 ± 0.0
KNN identity 0.33 ± 0.0
KNN identity PCA 1.0 ± 0.0
precision 0.99 ± 0.0
recall 0.0 ± 0.0
density 42.99 ± 0.9
coverage 1.0 ± 0.0
precision_PCA 0.0 ± 0.0
recall_PCA 0.5 ± 0.12
density_PCA 0.0 ± 0.0
coverage_PCA 0.0 ± 0.0


In [84]:
compute_mean_std(results_scgan_dentate)

Unnamed: 0 1.0 ± 0.58
1-Wasserstein_PCA 22.44 ± 0.03
2-Wasserstein_PCA 22.52 ± 0.03
Linear_MMD_PCA 616.04 ± 0.1
Poly_MMD_PCA 411409.1 ± 1121.23
RBF_MMD_PCA 0.42 ± 0.0
KNN identity 0.61 ± 0.0
KNN identity PCA 1.0 ± 0.0
precision 0.64 ± 0.01
recall 0.15 ± 0.0
density 2.03 ± 0.02
coverage 0.92 ± 0.0
precision_PCA 0.0 ± 0.0
recall_PCA 0.0 ± 0.0
density_PCA 0.0 ± 0.0
coverage_PCA 0.0 ± 0.0


## Check ranges

In [83]:
adata_scgan = sc.read_h5ad("/home/icb/alessandro.palma/environment/cfgen/project_folder/baseline_experiments/scgan/model_runs/hlca_core/hlca_core_generated_0.h5ad")

In [40]:
# adata_cfgen = sc.read_h5ad("/home/icb/alessandro.palma/environment/cfgen/project_folder/datasets/generated/hlca_core/generated_cells_0.h5ad")

In [None]:
adata_original = sc.read_h5ad("/home/icb/alessandro.palma/environment/cfgen/project_folder/datasets/processed_full_genome/hlca_core/hlca_core_test.h5ad")
adata_original.X = adata_original.layers["X_counts"].A.copy()
adata_original = adata_original[:,adata_scgan.var.index]
sc.pp.normalize_total(adata_original, target_sum=1e4)
sc.pp.log1p(adata_original)

In [None]:
sc.pp.normalize_total(adata_scgan, target_sum=1e4)

In [None]:
# adata_scgan.X.sum(1)
sc.pp.log1p(adata_scgan)

In [None]:
adata_scgan.X.A.sum(1).mean()

In [None]:
adata_original.X.sum(1).mean()

In [None]:
# adata_merged = sc.AnnData(X=np.concatenate([adata_original.X.copy().A, adata_cfgen.X.copy().A]),
#                           obs={"dataset": ["real" for _ in range(len(adata_original))]+["fake" for _ in range(len(adata_cfgen))]})

adata_merged = sc.AnnData(X=np.concatenate([adata_original.X.copy(), adata_scgan.X.copy().A]),
                          obs={"dataset": ["real" for _ in range(len(adata_original))]+["fake" for _ in range(len(adata_scgan))]})

In [None]:
sc.tl.pca(adata_merged)
sc.pp.neighbors(adata_merged)
sc.tl.umap(adata_merged)

In [None]:
sc.pl.umap(adata_merged, color="dataset")

In [37]:
(adata_original.X>0).sum(1).mean()

NameError: name 'adata_original' is not defined

In [None]:
(adata_scgan.X>0).sum(1).mean()