# Read real and generated datasets 

In [1]:
import scanpy as sc
from celldreamer.eval.compute_evaluation_metrics import process_labels, compute_evaluation_metrics
from scipy import sparse
import pandas as pd
import numpy as np

An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.


## PBMC3K

In [2]:
results_celldreamer_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc3k/celldreamer_pbmc3k")
results_scDiff_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc3k/scdiff_pbmc3k")
results_scvi_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc3k/scvi_pbmc3k")

In [3]:
results_celldreamer_pbmc3k.groupby("ct").mean(0).mean(0)

Unnamed: 0               8.500000
1-Wasserstein_PCA       14.641439
2-Wasserstein_PCA       14.773540
Linear_MMD_PCA         197.266388
Poly_MMD_PCA         95584.216146
KNN identity             0.668656
KNN identity PCA         0.995641
precision                0.528524
recall                   0.419747
density                  1.026494
coverage                 0.969991
precision_PCA            0.360238
recall_PCA               0.017053
density_PCA              0.042534
coverage_PCA             0.027360
KNN category             0.362435
KNN category PCA         0.342022
global_f1                0.482471
dtype: float64

In [4]:
results_scDiff_pbmc3k.groupby("ct").mean(0).mean(0)

Unnamed: 0                8.500000
1-Wasserstein_PCA        20.205438
2-Wasserstein_PCA        20.468624
Linear_MMD_PCA          384.211212
Poly_MMD_PCA         468134.206597
KNN identity              0.375386
KNN identity PCA          0.995012
precision                 0.754376
recall                    0.416368
density                   1.942962
coverage                  0.920271
precision_PCA             0.077549
recall_PCA                0.136786
density_PCA               0.007878
coverage_PCA              0.006988
KNN category              0.217319
KNN category PCA          0.232785
global_f1                 0.547374
dtype: float64

In [5]:
results_scvi_pbmc3k.groupby("ct").mean(0).mean(0)

Unnamed: 0                8.500000
1-Wasserstein_PCA        15.503302
2-Wasserstein_PCA        15.616017
Linear_MMD_PCA          225.166798
Poly_MMD_PCA         126934.672526
KNN identity              0.534112
KNN identity PCA          0.997485
precision                 0.467447
recall                    0.613868
density                   0.718467
coverage                  0.936612
precision_PCA             0.298459
recall_PCA                0.014672
density_PCA               0.031904
coverage_PCA              0.018476
KNN category              0.433424
KNN category PCA          0.462517
global_f1                 0.569406
dtype: float64

## PBMC68K

In [6]:
results_celldreamer_pbmc68k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc68k/celldreamer_pbmc68k")
results_scDiff_pbmc68k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc68k/scdiff_pbmc68k")
results_scvi_pbmc68k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc68k/scvi_pbmc68k")

In [7]:
results_celldreamer_pbmc68k.groupby("ct").mean(0).mean(0)

Unnamed: 0              13.000000
1-Wasserstein_PCA       10.866892
2-Wasserstein_PCA       10.954879
Linear_MMD_PCA         104.925854
Poly_MMD_PCA         11288.944734
KNN identity             0.584389
KNN identity PCA         0.994051
precision                0.605742
recall                   0.386294
density                  1.528960
coverage                 0.998372
precision_PCA            0.099891
recall_PCA               0.125106
density_PCA              0.028323
coverage_PCA             0.066918
KNN category             0.036116
KNN category PCA         0.041856
global_f1                0.181790
dtype: float64

In [8]:
results_scDiff_pbmc68k.groupby("ct").mean(0).mean(0)

Unnamed: 0              13.000000
1-Wasserstein_PCA       11.333264
2-Wasserstein_PCA       11.440775
Linear_MMD_PCA         114.245439
Poly_MMD_PCA         13945.753400
KNN identity             0.358744
KNN identity PCA         0.990671
precision                0.861131
recall                   0.100939
density                  3.744794
coverage                 0.995253
precision_PCA            0.115121
recall_PCA               0.103048
density_PCA              0.044123
coverage_PCA             0.082194
KNN category             0.040983
KNN category PCA         0.048898
global_f1                0.171715
dtype: float64

In [9]:
results_scvi_pbmc68k.groupby("ct").mean(0).mean(0)

Unnamed: 0              13.000000
1-Wasserstein_PCA       11.266849
2-Wasserstein_PCA       11.355505
Linear_MMD_PCA         116.049902
Poly_MMD_PCA         14518.493435
KNN identity             0.640510
KNN identity PCA         0.990665
precision                0.529436
recall                   0.538984
density                  0.986975
coverage                 0.998300
precision_PCA            0.099659
recall_PCA               0.183536
density_PCA              0.036279
coverage_PCA             0.080567
KNN category             0.046666
KNN category PCA         0.053542
global_f1                0.247091
dtype: float64

## Neurips

In [47]:
# results_celldreamer_neurips = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/neurips/celldreamer_neurips")
# results_scDiff_neurips = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/neurips/scdiff_neurips")
# results_scvi_neurips = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/neurips/scvi_neurips")

In [48]:
# results_celldreamer_neurips.groupby("ct").mean(0).mean(0)

In [49]:
# results_scDiff_neurips.groupby("ct").mean(0).mean(0)

In [50]:
# results_scvi_neurips.groupby("ct").mean(0).mean(0)

## HLCA

In [14]:
results_celldreamer_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/hlca_core/celldreamer_hlca_core")
results_scDiff_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/hlca_core/scdiff_hlca_core")
results_scvi_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/hlca_core/scvi_hlca_core")

In [15]:
results_celldreamer_hlca.groupby("ct").mean(0).mean(0)

Unnamed: 0              68.159420
1-Wasserstein_PCA        8.934355
2-Wasserstein_PCA        9.004859
Linear_MMD_PCA          75.675481
Poly_MMD_PCA         21488.341418
KNN identity             0.799119
KNN identity PCA         0.997990
precision                0.312188
recall                   0.714049
density                  0.277074
coverage                 0.562643
precision_PCA            0.247983
recall_PCA               0.049825
density_PCA              0.040411
coverage_PCA             0.020011
KNN category             0.175858
KNN category PCA         0.070032
global_f1                0.729281
dtype: float64

In [16]:
results_scDiff_hlca.groupby("ct").mean(0).mean(0)

Unnamed: 0              68.159420
1-Wasserstein_PCA       12.415400
2-Wasserstein_PCA       12.563746
Linear_MMD_PCA         153.265746
Poly_MMD_PCA         36092.431258
KNN identity             0.675179
KNN identity PCA         0.999406
precision                0.838976
recall                   0.019630
density                  2.896572
coverage                 0.722110
precision_PCA            0.081854
recall_PCA               0.040732
density_PCA              0.011583
coverage_PCA             0.006338
KNN category             0.077639
KNN category PCA         0.017750
global_f1                0.529007
dtype: float64

In [17]:
results_scvi_hlca.groupby("ct").mean(0).mean(0)

Unnamed: 0              68.159420
1-Wasserstein_PCA        9.715635
2-Wasserstein_PCA        9.863900
Linear_MMD_PCA          89.961478
Poly_MMD_PCA         20045.147813
KNN identity             0.836272
KNN identity PCA         0.998003
precision                0.293756
recall                   0.692874
density                  0.287013
coverage                 0.501307
precision_PCA            0.251153
recall_PCA               0.031976
density_PCA              0.037243
coverage_PCA             0.016125
KNN category             0.086380
KNN category PCA         0.033722
global_f1                0.566466
dtype: float64

## Tabula Muris

In [18]:
results_celldreamer_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/tabula_muris/celldreamer_tabula_muris")
results_scDiff_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/tabula_muris/scdiff_tabula_muris")
results_scvi_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/tabula_muris/scvi_tabula_muris")

In [19]:
results_celldreamer_tabula.groupby("ct").mean(0).mean(0)

Unnamed: 0              19.000000
1-Wasserstein_PCA        5.497185
2-Wasserstein_PCA        5.654067
Linear_MMD_PCA          48.982557
Poly_MMD_PCA         29400.772749
KNN identity             0.786290
KNN identity PCA         0.997071
precision                0.228479
recall                   0.766821
density                  0.144179
coverage                 0.408669
precision_PCA            0.320494
recall_PCA               0.038283
density_PCA              0.078457
coverage_PCA             0.023515
KNN category             0.152590
KNN category PCA         0.100402
global_f1                0.822986
dtype: float64

In [20]:
results_scDiff_tabula.groupby("ct").mean(0).mean(0)

Unnamed: 0              19.000000
1-Wasserstein_PCA        5.672982
2-Wasserstein_PCA        5.971829
Linear_MMD_PCA          47.145136
Poly_MMD_PCA         29875.455579
KNN identity             0.724058
KNN identity PCA         0.997935
precision                0.864466
recall                   0.018444
density                  3.872415
coverage                 0.732823
precision_PCA            0.368661
recall_PCA               0.023250
density_PCA              0.096775
coverage_PCA             0.018492
KNN category             0.115198
KNN category PCA         0.074284
global_f1                0.720903
dtype: float64

In [21]:
results_scvi_tabula.groupby("ct").mean(0).mean(0)

Unnamed: 0              19.000000
1-Wasserstein_PCA        7.053684
2-Wasserstein_PCA        7.491583
Linear_MMD_PCA          58.284025
Poly_MMD_PCA         33107.867704
KNN identity             0.837327
KNN identity PCA         0.996392
precision                0.221756
recall                   0.834922
density                  0.166329
coverage                 0.308248
precision_PCA            0.411885
recall_PCA               0.011911
density_PCA              0.105155
coverage_PCA             0.024234
KNN category             0.103828
KNN category PCA         0.081443
global_f1                0.661092
dtype: float64

## Dentategyrus

In [22]:
results_celldreamer_dentategyrus = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/dentategyrus/celldreamer_dentategyrus")
results_scDiff_dentategyrus = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/dentategyrus/scdiff_dentategyrus")
results_scvi_dentategyrus = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/dentategyrus/scvi_dentategyrus")

In [23]:
results_celldreamer_dentategyrus.groupby("ct").mean(0).mean(0)

Unnamed: 0               20.500000
1-Wasserstein_PCA        18.662224
2-Wasserstein_PCA        18.739739
Linear_MMD_PCA          342.775038
Poly_MMD_PCA         190140.036737
KNN identity              0.628234
KNN identity PCA          0.999245
precision                 0.467127
recall                    0.715846
density                   0.570774
coverage                  0.875912
precision_PCA             0.017435
recall_PCA                0.149869
density_PCA               0.001943
coverage_PCA              0.003429
KNN category              0.212285
KNN category PCA          0.121902
global_f1                 0.734901
dtype: float64

In [24]:
results_scDiff_dentategyrus.groupby("ct").mean(0).mean(0)

Unnamed: 0               20.500000
1-Wasserstein_PCA        19.650490
2-Wasserstein_PCA        19.702225
Linear_MMD_PCA          381.834917
Poly_MMD_PCA         236798.933873
KNN identity              0.371846
KNN identity PCA          0.999869
precision                 0.992310
recall                    0.003341
density                   8.157343
coverage                  0.999191
precision_PCA             0.012856
recall_PCA                0.000241
density_PCA               0.001299
coverage_PCA              0.000328
KNN category              0.217608
KNN category PCA          0.109804
global_f1                 0.772298
dtype: float64

In [25]:
results_scvi_dentategyrus.groupby("ct").mean(0).mean(0)

Unnamed: 0               20.500000
1-Wasserstein_PCA        19.777951
2-Wasserstein_PCA        19.854181
Linear_MMD_PCA          385.150649
Poly_MMD_PCA         207594.029111
KNN identity              0.634152
KNN identity PCA          0.999539
precision                 0.571208
recall                    0.747270
density                   0.758405
coverage                  0.975166
precision_PCA             0.032212
recall_PCA                0.000307
density_PCA               0.003352
coverage_PCA              0.000827
KNN category              0.186090
KNN category PCA          0.104092
global_f1                 0.711261
dtype: float64

## C. Elegans

In [43]:
# results_celldreamer_c_elegans = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/c_elegans/celldreamer_c_elegans")
# results_scDiff_c_elegans = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/c_elegans/scdiff_c_elegans")
# results_scvi_c_elegans = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/c_elegans/scvi_c_elegans")

In [44]:
# results_celldreamer_c_elegans.groupby("ct").mean(0).mean(0)

In [45]:
# results_scDiff_c_elegans.groupby("ct").mean(0).mean(0)

In [46]:
# results_scvi_c_elegans.groupby("ct").mean(0).mean(0)