# Read real and generated datasets 

In [49]:
import scanpy as sc
from celldreamer.eval.compute_evaluation_metrics import process_labels, compute_evaluation_metrics
from scipy import sparse
import pandas as pd
import numpy as np

## PBMC3K

In [50]:
results_celldreamer_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc3k/celldreamer_pbmc3k")
results_scDiff_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc3k/scdiff_pbmc3k")
results_scvi_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc3k/scvi_pbmc3k")

In [51]:
results_celldreamer_pbmc3k.groupby("ct").mean(0).mean(0)

Unnamed: 0               8.500000
1-Wasserstein_PCA       14.641439
2-Wasserstein_PCA       14.773540
Linear_MMD_PCA         197.266388
Poly_MMD_PCA         95584.216146
KNN identity             0.668656
KNN identity PCA         0.995641
precision                0.528524
recall                   0.419747
density                  1.026494
coverage                 0.969991
precision_PCA            0.360238
recall_PCA               0.017053
density_PCA              0.042534
coverage_PCA             0.027360
KNN category             0.362435
KNN category PCA         0.342022
global_f1                0.482471
dtype: float64

In [52]:
results_scDiff_pbmc3k.groupby("ct").mean(0).mean(0)

Unnamed: 0                8.500000
1-Wasserstein_PCA        20.205438
2-Wasserstein_PCA        20.468624
Linear_MMD_PCA          384.211212
Poly_MMD_PCA         468134.206597
KNN identity              0.375386
KNN identity PCA          0.995012
precision                 0.754376
recall                    0.416368
density                   1.942962
coverage                  0.920271
precision_PCA             0.077549
recall_PCA                0.136786
density_PCA               0.007878
coverage_PCA              0.006988
KNN category              0.217319
KNN category PCA          0.232785
global_f1                 0.547374
dtype: float64

In [53]:
results_scvi_pbmc3k.groupby("ct").mean(0).mean(0)

Unnamed: 0                8.500000
1-Wasserstein_PCA        15.503302
2-Wasserstein_PCA        15.616017
Linear_MMD_PCA          225.166798
Poly_MMD_PCA         126934.672526
KNN identity              0.534112
KNN identity PCA          0.997485
precision                 0.467447
recall                    0.613868
density                   0.718467
coverage                  0.936612
precision_PCA             0.298459
recall_PCA                0.014672
density_PCA               0.031904
coverage_PCA              0.018476
KNN category              0.433424
KNN category PCA          0.462517
global_f1                 0.569406
dtype: float64

## PBMC68K

In [54]:
results_celldreamer_pbmc68k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc68k/celldreamer_pbmc68k")
results_scDiff_pbmc68k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc68k/scdiff_pbmc68k")
results_scvi_pbmc68k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc68k/scvi_pbmc68k")

In [55]:
results_celldreamer_pbmc68k.groupby("ct").mean(0).mean(0)

Unnamed: 0              13.000000
1-Wasserstein_PCA       10.866892
2-Wasserstein_PCA       10.954879
Linear_MMD_PCA         104.925854
Poly_MMD_PCA         11288.944734
KNN identity             0.584389
KNN identity PCA         0.994051
precision                0.605742
recall                   0.386294
density                  1.528960
coverage                 0.998372
precision_PCA            0.099891
recall_PCA               0.125106
density_PCA              0.028323
coverage_PCA             0.066918
KNN category             0.036116
KNN category PCA         0.041856
global_f1                0.181790
dtype: float64

In [56]:
results_scDiff_pbmc68k.groupby("ct").mean(0).mean(0)

Unnamed: 0              13.000000
1-Wasserstein_PCA       11.333264
2-Wasserstein_PCA       11.440775
Linear_MMD_PCA         114.245439
Poly_MMD_PCA         13945.753400
KNN identity             0.358744
KNN identity PCA         0.990671
precision                0.861131
recall                   0.100939
density                  3.744794
coverage                 0.995253
precision_PCA            0.115121
recall_PCA               0.103048
density_PCA              0.044123
coverage_PCA             0.082194
KNN category             0.040983
KNN category PCA         0.048898
global_f1                0.171715
dtype: float64

In [57]:
results_scvi_pbmc68k.groupby("ct").mean(0).mean(0)

Unnamed: 0              13.000000
1-Wasserstein_PCA       11.266849
2-Wasserstein_PCA       11.355505
Linear_MMD_PCA         116.049902
Poly_MMD_PCA         14518.493435
KNN identity             0.640510
KNN identity PCA         0.990665
precision                0.529436
recall                   0.538984
density                  0.986975
coverage                 0.998300
precision_PCA            0.099659
recall_PCA               0.183536
density_PCA              0.036279
coverage_PCA             0.080567
KNN category             0.046666
KNN category PCA         0.053542
global_f1                0.247091
dtype: float64

## Neurips

In [58]:
results_celldreamer_neurips = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/neurips/celldreamer_neurips")
results_scDiff_neurips = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/neurips/scdiff_neurips")
results_scvi_neurips = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/neurips/scvi_neurips")

In [59]:
results_celldreamer_neurips.groupby("ct").mean(0).mean(0)

Unnamed: 0               54.561404
1-Wasserstein_PCA        20.783177
2-Wasserstein_PCA        20.871373
Linear_MMD_PCA          412.835950
Poly_MMD_PCA         264639.850466
KNN identity              0.640987
KNN identity PCA          0.999827
precision                 0.461815
recall                    0.749885
density                   0.446031
coverage                  0.791249
precision_PCA             0.001805
recall_PCA                0.035039
density_PCA               0.000287
coverage_PCA              0.002381
KNN category              0.183638
KNN category PCA          0.137738
global_f1                 0.513352
dtype: float64

In [60]:
results_scDiff_neurips.groupby("ct").mean(0).mean(0)

Unnamed: 0               54.561404
1-Wasserstein_PCA        19.471005
2-Wasserstein_PCA        19.548508
Linear_MMD_PCA          340.596710
Poly_MMD_PCA         112432.314179
KNN identity              0.899768
KNN identity PCA          0.999435
precision                 0.766492
recall                    0.005035
density                   1.902711
coverage                  0.448581
precision_PCA             0.036456
recall_PCA                0.000175
density_PCA               0.004872
coverage_PCA              0.005010
KNN category              0.107092
KNN category PCA          0.017683
global_f1                 0.360248
dtype: float64

In [61]:
results_scvi_neurips.groupby("ct").mean(0).mean(0)

Unnamed: 0               54.561404
1-Wasserstein_PCA        21.963241
2-Wasserstein_PCA        22.081605
Linear_MMD_PCA          459.160390
Poly_MMD_PCA         281347.473136
KNN identity              0.709490
KNN identity PCA          0.999837
precision                 0.427301
recall                    0.771091
density                   0.493752
coverage                  0.751475
precision_PCA             0.001077
recall_PCA                0.048434
density_PCA               0.000171
coverage_PCA              0.001153
KNN category              0.094968
KNN category PCA          0.078116
global_f1                 0.447228
dtype: float64

## HLCA

In [62]:
results_celldreamer_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/hlca_core/celldreamer_hlca_core")
results_scDiff_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/hlca_core/scdiff_hlca_core")
results_scvi_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/hlca_core/scvi_hlca_core")

In [63]:
results_celldreamer_hlca.groupby("ct").mean(0).mean(0)

Unnamed: 0              68.159420
1-Wasserstein_PCA       11.714789
2-Wasserstein_PCA       11.800174
Linear_MMD_PCA          99.324557
Poly_MMD_PCA         37843.675969
KNN identity             0.799119
KNN identity PCA         0.998666
precision                0.312188
recall                   0.714049
density                  0.277074
coverage                 0.562643
precision_PCA            0.086514
recall_PCA               0.036534
density_PCA              0.012755
coverage_PCA             0.018259
KNN category             0.175858
KNN category PCA         0.140397
global_f1                0.729281
dtype: float64

In [64]:
results_scDiff_hlca.groupby("ct").mean(0).mean(0)

Unnamed: 0              68.159420
1-Wasserstein_PCA       16.891569
2-Wasserstein_PCA       17.003974
Linear_MMD_PCA         239.167356
Poly_MMD_PCA         62378.449937
KNN identity             0.675179
KNN identity PCA         0.999844
precision                0.838976
recall                   0.019630
density                  2.896572
coverage                 0.722110
precision_PCA            0.014361
recall_PCA               0.000000
density_PCA              0.001630
coverage_PCA             0.001605
KNN category             0.077639
KNN category PCA         0.019938
global_f1                0.529007
dtype: float64

In [65]:
results_scvi_hlca.groupby("ct").mean(0).mean(0)

Unnamed: 0              68.159420
1-Wasserstein_PCA       12.721914
2-Wasserstein_PCA       12.882959
Linear_MMD_PCA         121.367426
Poly_MMD_PCA         37059.228194
KNN identity             0.836272
KNN identity PCA         0.998612
precision                0.293756
recall                   0.692874
density                  0.287013
coverage                 0.501307
precision_PCA            0.093992
recall_PCA               0.016694
density_PCA              0.013422
coverage_PCA             0.017302
KNN category             0.086380
KNN category PCA         0.058758
global_f1                0.566466
dtype: float64

## Tabula Muris

In [66]:
results_celldreamer_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/tabula_muris/celldreamer_tabula_muris")
results_scDiff_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/tabula_muris/scdiff_tabula_muris")
results_scvi_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/tabula_muris/scvi_tabula_muris")

In [67]:
results_celldreamer_tabula.groupby("ct").mean(0).mean(0)

Unnamed: 0              19.000000
1-Wasserstein_PCA        8.601209
2-Wasserstein_PCA        8.777106
Linear_MMD_PCA          86.535499
Poly_MMD_PCA         48384.760905
KNN identity             0.786290
KNN identity PCA         0.998199
precision                0.228479
recall                   0.766821
density                  0.144179
coverage                 0.408669
precision_PCA            0.167651
recall_PCA               0.017201
density_PCA              0.032465
coverage_PCA             0.019488
KNN category             0.152590
KNN category PCA         0.185623
global_f1                0.822986
dtype: float64

In [68]:
results_scDiff_tabula.groupby("ct").mean(0).mean(0)

Unnamed: 0              19.000000
1-Wasserstein_PCA        8.710917
2-Wasserstein_PCA        9.017732
Linear_MMD_PCA          85.739965
Poly_MMD_PCA         51165.477433
KNN identity             0.724058
KNN identity PCA         0.997720
precision                0.864466
recall                   0.018444
density                  3.872415
coverage                 0.732823
precision_PCA            0.292569
recall_PCA               0.018786
density_PCA              0.090786
coverage_PCA             0.029003
KNN category             0.115198
KNN category PCA         0.094224
global_f1                0.720903
dtype: float64

In [69]:
results_scvi_tabula.groupby("ct").mean(0).mean(0)

Unnamed: 0              19.000000
1-Wasserstein_PCA       10.658042
2-Wasserstein_PCA       11.049614
Linear_MMD_PCA         100.283401
Poly_MMD_PCA         53728.491474
KNN identity             0.837327
KNN identity PCA         0.996962
precision                0.221756
recall                   0.834922
density                  0.166329
coverage                 0.308248
precision_PCA            0.234495
recall_PCA               0.017089
density_PCA              0.049139
coverage_PCA             0.024186
KNN category             0.103828
KNN category PCA         0.096174
global_f1                0.661092
dtype: float64

## Dentategyrus

In [70]:
results_celldreamer_dentategyrus = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/dentategyrus/celldreamer_dentategyrus")
results_scDiff_dentategyrus = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/dentategyrus/scdiff_dentategyrus")
results_scvi_dentategyrus = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/dentategyrus/scvi_dentategyrus")

In [71]:
results_celldreamer_dentategyrus.groupby("ct").mean(0).mean(0)

Unnamed: 0               20.500000
1-Wasserstein_PCA        18.662221
2-Wasserstein_PCA        18.739736
Linear_MMD_PCA          342.774892
Poly_MMD_PCA         190139.858724
KNN identity              0.628234
KNN identity PCA          0.999245
precision                 0.467127
recall                    0.715846
density                   0.570774
coverage                  0.875912
precision_PCA             0.017435
recall_PCA                0.149869
density_PCA               0.001943
coverage_PCA              0.003429
KNN category              0.212285
KNN category PCA          0.121902
global_f1                 0.734901
dtype: float64

In [72]:
results_scDiff_dentategyrus.groupby("ct").mean(0).mean(0)

Unnamed: 0               20.500000
1-Wasserstein_PCA        19.650484
2-Wasserstein_PCA        19.702220
Linear_MMD_PCA          381.834711
Poly_MMD_PCA         236798.584728
KNN identity              0.371846
KNN identity PCA          0.999869
precision                 0.992310
recall                    0.003341
density                   8.157359
coverage                  0.999191
precision_PCA             0.012921
recall_PCA                0.000241
density_PCA               0.001305
coverage_PCA              0.000328
KNN category              0.217608
KNN category PCA          0.109804
global_f1                 0.772298
dtype: float64

In [73]:
results_scvi_dentategyrus.groupby("ct").mean(0).mean(0)

Unnamed: 0               20.500000
1-Wasserstein_PCA        19.777947
2-Wasserstein_PCA        19.854177
Linear_MMD_PCA          385.150489
Poly_MMD_PCA         207593.836961
KNN identity              0.634152
KNN identity PCA          0.999539
precision                 0.571208
recall                    0.747270
density                   0.758405
coverage                  0.975166
precision_PCA             0.032212
recall_PCA                0.000307
density_PCA               0.003352
coverage_PCA              0.000827
KNN category              0.186090
KNN category PCA          0.104092
global_f1                 0.711261
dtype: float64

## C. Elegans

In [74]:
results_celldreamer_c_elegans = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/c_elegans/celldreamer_c_elegans")
results_scDiff_c_elegans = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/c_elegans/scdiff_c_elegans")
results_scvi_c_elegans = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/c_elegans/scvi_c_elegans")

In [75]:
results_celldreamer_c_elegans.groupby("ct").mean(0).mean(0)

Unnamed: 0              39.500000
1-Wasserstein_PCA        6.518009
2-Wasserstein_PCA        6.705810
Linear_MMD_PCA          47.739747
Poly_MMD_PCA         11429.529593
KNN identity             0.814637
KNN identity PCA         0.984453
precision                0.341235
recall                   0.532569
density                  0.650531
coverage                 0.831439
precision_PCA            0.556448
recall_PCA               0.178957
density_PCA              0.131665
coverage_PCA             0.185308
KNN category             0.149434
KNN category PCA         0.077087
global_f1                0.359220
dtype: float64

In [76]:
results_scDiff_c_elegans.groupby("ct").mean(0).mean(0)

Unnamed: 0              39.500000
1-Wasserstein_PCA        5.800789
2-Wasserstein_PCA        5.974413
Linear_MMD_PCA          39.159740
Poly_MMD_PCA         11617.274658
KNN identity             0.596029
KNN identity PCA         0.979174
precision                0.864948
recall                   0.091663
density                  2.792390
coverage                 0.934051
precision_PCA            0.672273
recall_PCA               0.156636
density_PCA              0.174075
coverage_PCA             0.217021
KNN category             0.122225
KNN category PCA         0.056448
global_f1                0.355773
dtype: float64

In [77]:
results_scvi_c_elegans.groupby("ct").mean(0).mean(0)

Unnamed: 0              39.500000
1-Wasserstein_PCA        7.095104
2-Wasserstein_PCA        7.414576
Linear_MMD_PCA          51.646060
Poly_MMD_PCA         11440.373647
KNN identity             0.800537
KNN identity PCA         0.983384
precision                0.366265
recall                   0.578514
density                  0.618027
coverage                 0.792747
precision_PCA            0.555051
recall_PCA               0.189256
density_PCA              0.125461
coverage_PCA             0.167951
KNN category             0.133530
KNN category PCA         0.067718
global_f1                0.315274
dtype: float64