# Read real and generated datasets 

In [30]:
import scanpy as sc
from celldreamer.eval.compute_evaluation_metrics import process_labels, compute_evaluation_metrics
from scipy import sparse
import pandas as pd
import numpy as np

## PBMC3K

In [31]:
results_celldreamer_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc3k/celldreamer_pbmc3k")
results_scDiff_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc3k/scdiff_pbmc3k")
results_scvi_pbmc3k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc3k/scvi_pbmc3k")

In [32]:
results_celldreamer_pbmc3k.groupby("ct").mean(0).mean(0)

Unnamed: 0                8.500000
1-Wasserstein_PCA        18.507745
2-Wasserstein_PCA        18.645718
Linear_MMD_PCA          206.440316
Poly_MMD_PCA         103262.722222
KNN identity              0.668656
KNN identity PCA          0.978350
precision                 0.528524
recall                    0.419747
density                   1.026494
coverage                  0.969991
precision_PCA             0.756991
recall_PCA                0.001852
density_PCA               0.459032
coverage_PCA              0.454184
KNN category              0.362435
KNN category PCA          0.360141
global_f1                 0.482471
dtype: float64

In [33]:
results_scDiff_pbmc3k.groupby("ct").mean(0).mean(0)

Unnamed: 0                8.500000
1-Wasserstein_PCA        23.546097
2-Wasserstein_PCA        23.753073
Linear_MMD_PCA          411.148521
Poly_MMD_PCA         504912.364149
KNN identity              0.375386
KNN identity PCA          0.994276
precision                 0.754376
recall                    0.416368
density                   1.942962
coverage                  0.920271
precision_PCA             0.462234
recall_PCA                0.036325
density_PCA               0.157505
coverage_PCA              0.205276
KNN category              0.217319
KNN category PCA          0.227511
global_f1                 0.547374
dtype: float64

In [34]:
results_scvi_pbmc3k.groupby("ct").mean(0).mean(0)

Unnamed: 0                8.500000
1-Wasserstein_PCA        19.164504
2-Wasserstein_PCA        19.296253
Linear_MMD_PCA          235.495183
Poly_MMD_PCA         136190.481771
KNN identity              0.534112
KNN identity PCA          0.986657
precision                 0.467447
recall                    0.613868
density                   0.718467
coverage                  0.936612
precision_PCA             0.703626
recall_PCA                0.000000
density_PCA               0.402758
coverage_PCA              0.354359
KNN category              0.433424
KNN category PCA          0.367215
global_f1                 0.569406
dtype: float64

## PBMC68K

In [35]:
results_celldreamer_pbmc68k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc68k/celldreamer_pbmc68k")
results_scDiff_pbmc68k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc68k/scdiff_pbmc68k")
results_scvi_pbmc68k = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/pbmc68k/scvi_pbmc68k")

In [36]:
results_celldreamer_pbmc68k.groupby("ct").mean(0).mean(0)

Unnamed: 0              13.000000
1-Wasserstein_PCA       16.738084
2-Wasserstein_PCA       16.843674
Linear_MMD_PCA         169.941071
Poly_MMD_PCA         28956.810836
KNN identity             0.584389
KNN identity PCA         0.996573
precision                0.605742
recall                   0.386294
density                  1.528960
coverage                 0.998372
precision_PCA            0.048303
recall_PCA               0.184313
density_PCA              0.020210
coverage_PCA             0.075751
KNN category             0.036116
KNN category PCA         0.046735
global_f1                0.181790
dtype: float64

In [37]:
results_scDiff_pbmc68k.groupby("ct").mean(0).mean(0)

Unnamed: 0              13.000000
1-Wasserstein_PCA       16.896318
2-Wasserstein_PCA       17.035389
Linear_MMD_PCA         180.022656
Poly_MMD_PCA         33879.334418
KNN identity             0.358744
KNN identity PCA         0.989600
precision                0.861131
recall                   0.100939
density                  3.744794
coverage                 0.995253
precision_PCA            0.077783
recall_PCA               0.080499
density_PCA              0.057410
coverage_PCA             0.113228
KNN category             0.040983
KNN category PCA         0.047737
global_f1                0.171715
dtype: float64

In [38]:
results_scvi_pbmc68k.groupby("ct").mean(0).mean(0)

Unnamed: 0              13.000000
1-Wasserstein_PCA       17.048357
2-Wasserstein_PCA       17.167889
Linear_MMD_PCA         186.764956
Poly_MMD_PCA         36055.135344
KNN identity             0.640510
KNN identity PCA         0.992825
precision                0.529436
recall                   0.538984
density                  0.986975
coverage                 0.998300
precision_PCA            0.049443
recall_PCA               0.305103
density_PCA              0.036035
coverage_PCA             0.138192
KNN category             0.046666
KNN category PCA         0.063987
global_f1                0.247091
dtype: float64

## Neurips

In [39]:
results_celldreamer_neurips = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/neurips/celldreamer_neurips")
results_scDiff_neurips = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/neurips/scdiff_neurips")
results_scvi_neurips = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/neurips/scvi_neurips")

In [40]:
results_celldreamer_neurips.groupby("ct").mean(0).mean(0)

Unnamed: 0               54.561404
1-Wasserstein_PCA        20.783177
2-Wasserstein_PCA        20.871373
Linear_MMD_PCA          412.835950
Poly_MMD_PCA         264639.850466
KNN identity              0.640987
KNN identity PCA          0.999827
precision                 0.461815
recall                    0.749885
density                   0.446031
coverage                  0.791249
precision_PCA             0.001805
recall_PCA                0.035039
density_PCA               0.000287
coverage_PCA              0.002381
KNN category              0.183638
KNN category PCA          0.137738
global_f1                 0.513352
dtype: float64

In [41]:
results_scDiff_neurips.groupby("ct").mean(0).mean(0)

Unnamed: 0               54.561404
1-Wasserstein_PCA        19.471005
2-Wasserstein_PCA        19.548508
Linear_MMD_PCA          340.596710
Poly_MMD_PCA         112432.314179
KNN identity              0.899768
KNN identity PCA          0.999435
precision                 0.766492
recall                    0.005035
density                   1.902711
coverage                  0.448581
precision_PCA             0.036456
recall_PCA                0.000175
density_PCA               0.004872
coverage_PCA              0.005010
KNN category              0.107092
KNN category PCA          0.017683
global_f1                 0.360248
dtype: float64

In [42]:
results_scvi_neurips.groupby("ct").mean(0).mean(0)

Unnamed: 0               54.561404
1-Wasserstein_PCA        21.963241
2-Wasserstein_PCA        22.081605
Linear_MMD_PCA          459.160390
Poly_MMD_PCA         281347.473136
KNN identity              0.709490
KNN identity PCA          0.999837
precision                 0.427301
recall                    0.771091
density                   0.493752
coverage                  0.751475
precision_PCA             0.001077
recall_PCA                0.048434
density_PCA               0.000171
coverage_PCA              0.001153
KNN category              0.094968
KNN category PCA          0.078116
global_f1                 0.447228
dtype: float64

## HLCA

In [43]:
results_celldreamer_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/hlca_core/celldreamer_hlca_core")
results_scDiff_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/hlca_core/scdiff_hlca_core")
results_scvi_hlca = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/hlca_core/scvi_hlca_core")

In [44]:
results_celldreamer_hlca.groupby("ct").mean(0).mean(0)

Unnamed: 0              68.159420
1-Wasserstein_PCA       11.714789
2-Wasserstein_PCA       11.800174
Linear_MMD_PCA          99.324557
Poly_MMD_PCA         37843.675969
KNN identity             0.799119
KNN identity PCA         0.998666
precision                0.312188
recall                   0.714049
density                  0.277074
coverage                 0.562643
precision_PCA            0.086514
recall_PCA               0.036534
density_PCA              0.012755
coverage_PCA             0.018259
KNN category             0.175858
KNN category PCA         0.140397
global_f1                0.729281
dtype: float64

In [45]:
results_scDiff_hlca.groupby("ct").mean(0).mean(0)

Unnamed: 0              68.159420
1-Wasserstein_PCA       16.891569
2-Wasserstein_PCA       17.003974
Linear_MMD_PCA         239.167356
Poly_MMD_PCA         62378.449937
KNN identity             0.675179
KNN identity PCA         0.999844
precision                0.838976
recall                   0.019630
density                  2.896572
coverage                 0.722110
precision_PCA            0.014361
recall_PCA               0.000000
density_PCA              0.001630
coverage_PCA             0.001605
KNN category             0.077639
KNN category PCA         0.019938
global_f1                0.529007
dtype: float64

In [46]:
results_scvi_hlca.groupby("ct").mean(0).mean(0)

Unnamed: 0              68.159420
1-Wasserstein_PCA       12.721914
2-Wasserstein_PCA       12.882959
Linear_MMD_PCA         121.367426
Poly_MMD_PCA         37059.228194
KNN identity             0.836272
KNN identity PCA         0.998612
precision                0.293756
recall                   0.692874
density                  0.287013
coverage                 0.501307
precision_PCA            0.093992
recall_PCA               0.016694
density_PCA              0.013422
coverage_PCA             0.017302
KNN category             0.086380
KNN category PCA         0.058758
global_f1                0.566466
dtype: float64

## Tabula Muris

In [47]:
results_celldreamer_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/tabula_muris/celldreamer_tabula_muris")
results_scDiff_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/tabula_muris/scdiff_tabula_muris")
results_scvi_tabula = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/tabula_muris/scvi_tabula_muris")

In [48]:
results_celldreamer_tabula.groupby("ct").mean(0).mean(0)

Unnamed: 0              19.000000
1-Wasserstein_PCA        8.601209
2-Wasserstein_PCA        8.777106
Linear_MMD_PCA          86.535499
Poly_MMD_PCA         48384.760905
KNN identity             0.786290
KNN identity PCA         0.998199
precision                0.228479
recall                   0.766821
density                  0.144179
coverage                 0.408669
precision_PCA            0.167651
recall_PCA               0.017201
density_PCA              0.032465
coverage_PCA             0.019488
KNN category             0.152590
KNN category PCA         0.185623
global_f1                0.822986
dtype: float64

In [49]:
results_scDiff_tabula.groupby("ct").mean(0).mean(0)

Unnamed: 0              19.000000
1-Wasserstein_PCA        8.710917
2-Wasserstein_PCA        9.017732
Linear_MMD_PCA          85.739965
Poly_MMD_PCA         51165.477433
KNN identity             0.724058
KNN identity PCA         0.997720
precision                0.864466
recall                   0.018444
density                  3.872415
coverage                 0.732823
precision_PCA            0.292569
recall_PCA               0.018786
density_PCA              0.090786
coverage_PCA             0.029003
KNN category             0.115198
KNN category PCA         0.094224
global_f1                0.720903
dtype: float64

In [50]:
results_scvi_tabula.groupby("ct").mean(0).mean(0)

Unnamed: 0              19.000000
1-Wasserstein_PCA       10.658042
2-Wasserstein_PCA       11.049614
Linear_MMD_PCA         100.283401
Poly_MMD_PCA         53728.491474
KNN identity             0.837327
KNN identity PCA         0.996962
precision                0.221756
recall                   0.834922
density                  0.166329
coverage                 0.308248
precision_PCA            0.234495
recall_PCA               0.017089
density_PCA              0.049139
coverage_PCA             0.024186
KNN category             0.103828
KNN category PCA         0.096174
global_f1                0.661092
dtype: float64

## Dentategyrus

In [51]:
results_celldreamer_dentategyrus = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/dentategyrus/celldreamer_dentategyrus")
results_scDiff_dentategyrus = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/dentategyrus/scdiff_dentategyrus")
results_scvi_dentategyrus = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/dentategyrus/scvi_dentategyrus")

In [52]:
results_celldreamer_dentategyrus.groupby("ct").mean(0).mean(0)

Unnamed: 0               20.500000
1-Wasserstein_PCA        22.521032
2-Wasserstein_PCA        22.606862
Linear_MMD_PCA          447.444454
Poly_MMD_PCA         293682.330915
KNN identity              0.628234
KNN identity PCA          0.999439
precision                 0.467127
recall                    0.715846
density                   0.570774
coverage                  0.875912
precision_PCA             0.001071
recall_PCA                0.131068
density_PCA               0.000107
coverage_PCA              0.001005
KNN category              0.212285
KNN category PCA          0.141556
global_f1                 0.734901
dtype: float64

In [53]:
results_scDiff_dentategyrus.groupby("ct").mean(0).mean(0)

Unnamed: 0               20.500000
1-Wasserstein_PCA        23.465292
2-Wasserstein_PCA        23.528260
Linear_MMD_PCA          502.444743
Poly_MMD_PCA         365375.890997
KNN identity              0.371846
KNN identity PCA          0.999902
precision                 0.992310
recall                    0.003341
density                   8.157343
coverage                  0.999191
precision_PCA             0.000066
recall_PCA                0.000000
density_PCA               0.000007
coverage_PCA              0.000066
KNN category              0.217608
KNN category PCA          0.124079
global_f1                 0.772298
dtype: float64

In [54]:
results_scvi_dentategyrus.groupby("ct").mean(0).mean(0)

Unnamed: 0               20.500000
1-Wasserstein_PCA        23.528413
2-Wasserstein_PCA        23.612565
Linear_MMD_PCA          497.845132
Poly_MMD_PCA         323506.075893
KNN identity              0.634152
KNN identity PCA          0.999902
precision                 0.571208
recall                    0.747270
density                   0.758405
coverage                  0.975166
precision_PCA             0.002558
recall_PCA                0.000000
density_PCA               0.000256
coverage_PCA              0.000262
KNN category              0.186090
KNN category PCA          0.111720
global_f1                 0.711261
dtype: float64

## C. Elegans

In [55]:
results_celldreamer_c_elegans = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/c_elegans/celldreamer_c_elegans")
results_scDiff_c_elegans = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/c_elegans/scdiff_c_elegans")
results_scvi_c_elegans = pd.read_csv("/home/icb/alessandro.palma/environment/celldreamer/experiments/results_cell_type/c_elegans/scvi_c_elegans")

In [56]:
results_celldreamer_c_elegans.groupby("ct").mean(0).mean(0)

Unnamed: 0              39.500000
1-Wasserstein_PCA        9.879584
2-Wasserstein_PCA       10.079601
Linear_MMD_PCA          74.191497
Poly_MMD_PCA         25013.776131
KNN identity             0.814637
KNN identity PCA         0.985401
precision                0.341235
recall                   0.532569
density                  0.650531
coverage                 0.831439
precision_PCA            0.464563
recall_PCA               0.183890
density_PCA              0.141900
coverage_PCA             0.293334
KNN category             0.149434
KNN category PCA         0.157362
global_f1                0.359220
dtype: float64

In [57]:
results_scDiff_c_elegans.groupby("ct").mean(0).mean(0)

Unnamed: 0              39.500000
1-Wasserstein_PCA        9.008274
2-Wasserstein_PCA        9.211179
Linear_MMD_PCA          64.325216
Poly_MMD_PCA         23888.387431
KNN identity             0.596029
KNN identity PCA         0.969001
precision                0.864948
recall                   0.091663
density                  2.792390
coverage                 0.934051
precision_PCA            0.684806
recall_PCA               0.171592
density_PCA              0.269599
coverage_PCA             0.409460
KNN category             0.122225
KNN category PCA         0.108202
global_f1                0.355773
dtype: float64

In [58]:
results_scvi_c_elegans.groupby("ct").mean(0).mean(0)

Unnamed: 0              39.500000
1-Wasserstein_PCA       10.744459
2-Wasserstein_PCA       11.023530
Linear_MMD_PCA          83.992075
Poly_MMD_PCA         25279.817668
KNN identity             0.800537
KNN identity PCA         0.981495
precision                0.366265
recall                   0.578514
density                  0.618027
coverage                 0.792747
precision_PCA            0.481837
recall_PCA               0.236960
density_PCA              0.134474
coverage_PCA             0.261887
KNN category             0.133530
KNN category PCA         0.126695
global_f1                0.315274
dtype: float64