In [4]:
import pandas as pd


def calculate_dataframe_mean(df):
    df_numeric = df.apply(pd.to_numeric, errors='coerce')
    df_clean = df_numeric.dropna().drop(['fold'], axis=1)
    df_mean = df_clean.mean().to_frame().T
    return df_mean

In [5]:
from interpretable_ssl.trainers.scvi_trainer import *
from interpretable_ssl.trainers.scpoli_trainer import *
from interpretable_ssl.trainers.scpoli_original import *

Global seed set to 0
  new_rank_zero_deprecation(
  return new_rank_zero_deprecation(*args, **kwargs)
 captum (see https://github.com/pytorch/captum).


In [10]:
import pandas as pd
from copy import deepcopy

def drop_redundant_rows(df):
    df_numeric = df.apply(pd.to_numeric, errors='coerce').dropna(how='all')
    df_numeric['trainer'] = df['trainer']
    return df_numeric
    
def process_dataframe(df):
    
    # Remove all rows with string values (except the 'trainer' column)
    df_numeric = df.drop(['fold', 'trainer'], axis=1).apply(pd.to_numeric, errors='coerce').dropna(how='all')

    # # Add back the 'trainer' column
    df_numeric['trainer'] = df['trainer']

    # # Group by 'trainer' and calculate mean
    result = df_numeric.groupby('trainer').mean()

    return result

In [11]:
def compare_trainers_query_cross_val(trainers):
    names = [trainer.get_model_name() for trainer in trainers]
    all_results_new = pd.DataFrame()
    for trainer, name in zip(trainers, names):
        # eval_fn = partial(trainer.query_scib_metrics, fine_tuning=True)
        
        df = trainer.evaluate_custom_cross_val_models(trainer.query_scib_metrics)
        df['trainer'] = name
        
        all_results_new = pd.concat([all_results_new, df], ignore_index=True)
    return process_dataframe(deepcopy(all_results_new)), drop_redundant_rows(all_results_new)

In [12]:
def compare_results(trainers):
    names = [trainer.get_model_name() for trainer in trainers]

    all_results_new = pd.DataFrame()
    for trainer, name in zip(trainers, names):
        trainer.fine_tuning_epochs = 50
        df = trainer.query_scib_metrics(True)
        df['trainer'] = name
        
        all_results_new = pd.concat([all_results_new, df], ignore_index=True)
    return drop_redundant_rows(all_results_new)

In [21]:
ds = ImmuneDataset()
scvi = ScviTrainer(dataset = ds)
scpoli = OriginalTrainer(ds)
simclr = SimClrTraner(ds)
barlow = ScpoliProtBarlowTrainer(ds)

barlow32 = ScpoliProtBarlowTrainer(ds)
barlow32.num_prototypes = 32

loading data


# barlow result batch size 1024



In [22]:
barlow.batch_size = 1024

df = barlow.evaluate_custom_cross_val_models(barlow.query_scib_metrics)
df['trainer'] = barlow.get_model_name()
process_dataframe(df)

running custom cross validation evaluation
Evaluating fold 1
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  442.29 - val_cvae_loss:  442.29
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 33 clusters.
 |████████████████████| 100.0%  - val_loss:  441.74 - val_cvae_loss:  441.74 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.81


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.65s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:11,  1.28s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:11,  1.28s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:15<01:09,  8.67s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:15<01:09,  8.67s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:15<00:35,  5.01s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:15<00:35,  5.01s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:15<00:18,  3.

Evaluating fold 2
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'HSPCs', 'Monocyte progenitors', 'Erythroid progenitors', 'Erythrocytes', 'Plasma cells', 'CD10+ B cells'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
The missing labels are: {'HSPCs', 'Monocyte progenitors', 'Erythroid progenitors', 'Erythrocytes', 'Plasma cells', 'CD10+ B cells'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  888.12 - val_cvae_loss:  888.12
Initializing unlabeled prototypes with Leiden with 

Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.28s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.25it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.25it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.02s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.02s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.44s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.44s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

Evaluating fold 3
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  445.88 - val_cvae_loss:  445.88
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 29 clusters.
 |████████████████████| 100.0%  - val_loss:  446.69 - val_cvae_loss:  446.69 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.65


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.24s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.25it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.25it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:06<00:31,  3.92s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:06<00:31,  3.92s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:07<00:17,  2.44s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:07<00:17,  2.44s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:07<00:14,  2.

Evaluating fold 4
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4435.20 - val_cvae_loss: 4435.20
Initializing unlabeled prototypes with Leiden with an unknown number of

Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.39s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.06it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.06it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.58s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.58s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:06,  1.02it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:06,  1.02it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:05,  1.

Evaluating fold 5
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  884.32 - val_cvae_loss:  884.32
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 26 clusters.
 |████████████████████| 100.0%  - val_loss:  884.76 - val_cvae_loss:  884.76 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.61


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.71s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.51it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.51it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.74s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.74s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.13s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.13s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:04,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:13<00:10,  3.55s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:13<00:10,  3.55s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:14<00:00, 14.05s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 6
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  445.30 - val_cvae_loss:  445.30
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 28 clusters.
 |████████████████████| 100.0%  - val_loss:  446.79 - val_cvae_loss:  446.79 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.55


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.89s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.65it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.65it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.60s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.60s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.05s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.05s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:04,  1.

Evaluating fold 7
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4411.61 - val_cvae_loss: 4411.61
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 22 clusters.
 |████████████████████| 100.0%  - val_loss: 4385.37 - val_cvae_loss: 4385.37 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.83


Computing neighbors: 100%|██████████| 1/1 [00:01<00:00,  1.98s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 21.21it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:04,  1.63it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:04,  1.63it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.63it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:01,  3.54it/s, Bio conservation: clisi_knn][A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:01,  3.54it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:01<00:01,  3.54it/s

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:02<00:01,  2.62it/s, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:02<00:01,  2.62it/s, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:02<00:00,  2.77s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 8
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  448.95 - val_cvae_loss:  448.95
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 32 clusters.
 |████████████████████| 100.0%  - val_loss:  449.39 - val_cvae_loss:  449.39 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.95


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.44s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.48it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.48it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.09s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.09s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.43s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.43s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:08,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:19<00:11,  3.71s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:19<00:11,  3.71s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:19<00:07,  3.71s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:19<00:00, 19.71s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 9
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4131.76 - val_cvae_loss: 4131.76
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 25 clusters.
 |████████████████████| 100.0%  - val_loss: 4146.72 - val_cvae_loss: 4146.72 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.58


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.81s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.42it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.42it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.58s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.58s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.02s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.02s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:06,  1.

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:08<00:04,  1.34s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:08<00:04,  1.34s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:08<00:00,  8.18s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 10
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4451.55 - val_cvae_loss: 4451.55
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 26 clusters.
 |████████████████████| 100.0%  - val_loss: 4453.09 - val_cvae_loss: 4453.09 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.57


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.03s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.44it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.44it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.14it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.14it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.69it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.69it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:03,  1.

Unnamed: 0_level_0,Isolated labels,KMeans NMI,KMeans ARI,Silhouette label,cLISI,Silhouette batch,iLISI,KBET,Graph connectivity,PCR comparison,Batch correction,Bio conservation,Total
trainer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
barlow-num-prot-16_hidden-64_bs-1024,0.624711,0.637121,0.459713,0.562171,0.978795,0.774145,0.228012,0.41362,0.845072,0.748697,0.601909,0.652502,0.632265


# compare models with higher batch size (bs = 512)

In [14]:
trainers = [barlow, barlow32, simclr, scpoli, scvi]
for tr in trainers:
    tr.batch_size = 512
    print(tr.get_model_path())

/home/icb/fatemehs.hashemig/models//pbmc-immune/barlow-num-prot-16_hidden-64_bs-512.pth
/home/icb/fatemehs.hashemig/models//pbmc-immune/barlow-num-prot-32_hidden-64_bs-512.pth
/home/icb/fatemehs.hashemig/models//pbmc-immune/simclr-num-prot-16_hidden-64_bs-512.pth
/home/icb/fatemehs.hashemig/models//pbmc-immune/scpoli-original-latent_dim8_bs512.pth
/home/icb/fatemehs.hashemig/models//pbmc-immune/scvi-latent_dim8_bs512


In [15]:
df = compare_trainers_query_cross_val(trainers)
df

running custom cross validation evaluation
Evaluating fold 1
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



  c = torch.tensor(label_tensor, device=device).T
INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  441.59 - val_cvae_loss:  441.59
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 30 clusters.
 |████████████████████| 100.0%  - val_loss:  440.63 - val_cvae_loss:  440.63 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.51


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.59s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A

Metrics:  10%|[34m█         [0m| 1/10 [00:14<02:12, 14.67s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:14<02:12, 14.67s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:17<01:01,  7.64s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:17<01:01,  7.64s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:18<00:31,  4.46s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:18<00:31,  4.46s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:19<00:18,  3.15s/it, Bio conservation: clisi_knn][A
Metrics:  40%|[34m████      [0m| 4/10 [00:19<00:18, 

Evaluating fold 2
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'HSPCs', 'Monocyte progenitors', 'Erythroid progenitors', 'Erythrocytes', 'Plasma cells', 'CD10+ B cells'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
The missing labels are: {'HSPCs', 'Monocyte progenitors', 'Erythroid progenitors', 'Erythrocytes', 'Plasma cells', 'CD10+ B cells'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  883.74 - val_cvae_loss:  883.74
Initializing unlabeled prototypes with Leiden with 

Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.50s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:02<00:23,  2.62s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:02<00:23,  2.62s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:05<00:20,  2.55s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:05<00:20,  2.55s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:05<00:12,  1.73s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:05<00:12,  1.73s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:06<00:08,  1.

Evaluating fold 3
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  443.41 - val_cvae_loss:  443.41
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 29 clusters.
 |████████████████████| 100.0%  - val_loss:  444.40 - val_cvae_loss:  444.40 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.58


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.86s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:02<00:19,  2.19s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:02<00:19,  2.19s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:15,  1.99s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:15,  1.99s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.37s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.37s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:05<00:07,  1.

Evaluating fold 4
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4719.28 - val_cvae_loss: 4719.28
Initializing unlabeled prototypes with Leiden with an unknown number of

Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.63s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:05,  1.64it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:05,  1.64it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.62s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.62s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.00s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.00s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:05,  1.

Evaluating fold 5
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  881.20 - val_cvae_loss:  881.20
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 24 clusters.
 |████████████████████| 100.0%  - val_loss:  881.81 - val_cvae_loss:  881.81 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.39


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.86s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:17,  1.91s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:17,  1.91s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:17,  2.24s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:17,  2.24s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.44s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.44s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:05<00:07,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:22<00:15,  5.15s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:22<00:15,  5.15s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:22<00:10,  5.15s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:23<00:00, 23.33s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 6
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  444.58 - val_cvae_loss:  444.58
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 28 clusters.
 |████████████████████| 100.0%  - val_loss:  446.39 - val_cvae_loss:  446.39 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.40


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.24s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:12,  1.44s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:12,  1.44s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.66s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.66s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.04s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.04s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

Evaluating fold 7
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4451.57 - val_cvae_loss: 4451.57
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 20 clusters.
 |████████████████████| 100.0%  - val_loss: 4425.74 - val_cvae_loss: 4425.74 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.65


Computing neighbors: 100%|██████████| 1/1 [00:02<00:00,  2.11s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.49it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.49it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.05s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.05s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:07,  1.05s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:04,  1.33it/s, Bio conservation: clisi_knn][A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:04,  1.33it/s,

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:06<00:02,  1.11it/s, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:06<00:02,  1.11it/s, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:06<00:01,  1.11it/s, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:06<00:00,  6.87s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 8
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  443.10 - val_cvae_loss:  443.10
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 27 clusters.
 |████████████████████| 100.0%  - val_loss:  443.96 - val_cvae_loss:  443.96 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.63


Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.19s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:02<00:21,  2.35s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:02<00:21,  2.35s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:15,  1.96s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:15,  1.96s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.37s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.37s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:05<00:07,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:25<00:17,  5.92s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:25<00:17,  5.92s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:25<00:11,  5.92s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:26<00:00, 26.02s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 9
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4147.40 - val_cvae_loss: 4147.40
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 28 clusters.
 |████████████████████| 100.0%  - val_loss: 4164.39 - val_cvae_loss: 4164.39 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.51


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.08s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.13it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.13it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:08,  1.11s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:08,  1.11s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.36it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.36it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:04,  1.

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:10<00:07,  2.38s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:10<00:07,  2.38s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:10<00:04,  2.38s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:11<00:00, 11.54s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 10
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4441.63 - val_cvae_loss: 4441.63
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 27 clusters.
 |████████████████████| 100.0%  - val_loss: 4445.72 - val_cvae_loss: 4445.72 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.48


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.13s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:12,  1.37s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:12,  1.37s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:12,  1.60s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:12,  1.60s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:06,  1.00it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:06,  1.00it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:06,  1.

running custom cross validation evaluation
Evaluating fold 1
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  441.25 - val_cvae_loss:  441.25
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 27 clusters.
 |████████████████████| 100.0%  - val_loss:  440.38 - val_cvae_loss:  440.38 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.50


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.39s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:04,  2.18it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:04,  2.18it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:13,  1.63s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:13,  1.63s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.17s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.17s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

Evaluating fold 2
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'HSPCs', 'Monocyte progenitors', 'Erythroid progenitors', 'Erythrocytes', 'Plasma cells', 'CD10+ B cells'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
The missing labels are: {'HSPCs', 'Monocyte progenitors', 'Erythroid progenitors', 'Erythrocytes', 'Plasma cells', 'CD10+ B cells'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  883.60 - val_cvae_loss:  883.60
Initializing unlabeled prototypes with Leiden with 

Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.24s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.26it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.26it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:17,  2.23s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:17,  2.23s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.55s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.55s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:05<00:06,  1.

Evaluating fold 3
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  444.09 - val_cvae_loss:  444.09
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 28 clusters.
 |████████████████████| 100.0%  - val_loss:  445.07 - val_cvae_loss:  445.07 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.64


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.71s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.48it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.48it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.71s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.71s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.22s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.22s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

Evaluating fold 4
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4535.73 - val_cvae_loss: 4535.73
Initializing unlabeled prototypes with Leiden with an unknown number of

Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.50s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.46it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.46it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:10,  1.28s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:10,  1.28s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.21it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.21it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:04,  1.

Evaluating fold 5
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  880.13 - val_cvae_loss:  880.13
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 27 clusters.
 |████████████████████| 100.0%  - val_loss:  880.74 - val_cvae_loss:  880.74 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.43


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.84s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.56it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.56it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.72s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.72s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.10s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.10s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:06,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:13<00:07,  2.53s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:13<00:07,  2.53s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:13<00:00, 13.67s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 6
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  443.00 - val_cvae_loss:  443.00
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 27 clusters.
 |████████████████████| 100.0%  - val_loss:  445.16 - val_cvae_loss:  445.16 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.40


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.77s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.19it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.19it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.40s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.40s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.10it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.10it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:05,  1.

Evaluating fold 7
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4492.98 - val_cvae_loss: 4492.98
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 19 clusters.
 |████████████████████| 100.0%  - val_loss: 4464.64 - val_cvae_loss: 4464.64 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.52


Computing neighbors: 100%|██████████| 1/1 [00:02<00:00,  2.00s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 21.58it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.33it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.33it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:00<00:03,  2.33it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:00<00:02,  2.33it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:00<00:02,  2.33it/s, Batch correction: ilisi_knn]       [A
Metrics:  60%|[34m██████    [0m| 6/10 [00:00<00:00,  7

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  7.58it/s, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:01<00:00,  7.58it/s, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:01<00:00,  1.91s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 8
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  443.96 - val_cvae_loss:  443.96
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 28 clusters.
 |████████████████████| 100.0%  - val_loss:  444.67 - val_cvae_loss:  444.67 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.70


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.93s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:05,  1.54it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:05,  1.54it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.49s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.49s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.09s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.09s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:06,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:17<00:13,  4.42s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:17<00:13,  4.42s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:17<00:08,  4.42s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:17<00:00, 17.96s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 9
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4284.45 - val_cvae_loss: 4284.45
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 26 clusters.
 |████████████████████| 100.0%  - val_loss: 4300.18 - val_cvae_loss: 4300.18 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.50


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.99s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.47it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.47it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.49s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.49s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.02it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.02it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:05,  1.

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:08<00:04,  1.48s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:08<00:04,  1.48s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:08<00:00,  8.70s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 10
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4439.00 - val_cvae_loss: 4439.00
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 27 clusters.
 |████████████████████| 100.0%  - val_loss: 4442.48 - val_cvae_loss: 4442.48 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.66


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.09s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  5.69it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  5.69it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:05,  1.38it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:05,  1.38it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:03,  2.12it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:03,  2.12it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:02,  2.

running custom cross validation evaluation
Evaluating fold 1
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  441.65 - val_cvae_loss:  441.65
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 27 clusters.
 |████████████████████| 100.0%  - val_loss:  440.78 - val_cvae_loss:  440.78 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.45


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.82s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.38it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.38it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.50s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.50s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.05s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.05s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:04,  1.

Evaluating fold 2
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'HSPCs', 'Monocyte progenitors', 'Erythroid progenitors', 'Erythrocytes', 'Plasma cells', 'CD10+ B cells'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
The missing labels are: {'HSPCs', 'Monocyte progenitors', 'Erythroid progenitors', 'Erythrocytes', 'Plasma cells', 'CD10+ B cells'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  882.80 - val_cvae_loss:  882.80
Initializing unlabeled prototypes with Leiden with 

Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.83s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.26it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.26it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.72s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.72s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.40s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.40s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

Evaluating fold 3
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  444.20 - val_cvae_loss:  444.20
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 29 clusters.
 |████████████████████| 100.0%  - val_loss:  445.68 - val_cvae_loss:  445.68 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.39


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.05s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.13it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.13it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:18,  2.36s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:18,  2.36s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:11,  1.58s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:11,  1.58s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:05<00:06,  1.

Evaluating fold 4
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4335.67 - val_cvae_loss: 4335.67
Initializing unlabeled prototypes with Leiden with an unknown number of

Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.29s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.88it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.88it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.17it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.17it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:03,  1.81it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:03,  1.81it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:03,  1.

Evaluating fold 5
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  876.98 - val_cvae_loss:  876.98
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 30 clusters.
 |████████████████████| 100.0%  - val_loss:  877.75 - val_cvae_loss:  877.75 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.36


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.33s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.45it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.45it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:14,  1.76s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:14,  1.76s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.14s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.14s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:06,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:15<00:08,  2.91s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:15<00:08,  2.91s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:15<00:05,  2.91s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:15<00:00, 15.50s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 6
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  444.00 - val_cvae_loss:  444.00
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 26 clusters.
 |████████████████████| 100.0%  - val_loss:  446.05 - val_cvae_loss:  446.05 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.33


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.85s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.43it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.43it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.06s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.06s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.38it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.38it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:04,  1.

Evaluating fold 7
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4442.55 - val_cvae_loss: 4442.55
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 20 clusters.
 |████████████████████| 100.0%  - val_loss: 4418.82 - val_cvae_loss: 4418.82 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.60


Computing neighbors: 100%|██████████| 1/1 [00:01<00:00,  1.90s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 21.40it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.31it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.31it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:00<00:03,  2.31it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:00<00:02,  2.31it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:00<00:02,  2.31it/s, Batch correction: ilisi_knn]       [A
Metrics:  60%|[34m██████    [0m| 6/10 [00:00<00:00,  7

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  7.53it/s, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:01<00:00,  7.53it/s, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:01<00:00,  1.60s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 8
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  444.19 - val_cvae_loss:  444.19
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 28 clusters.
 |████████████████████| 100.0%  - val_loss:  445.62 - val_cvae_loss:  445.62 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.34


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.06s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.23it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.23it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.02s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.02s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.53s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.53s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:20<00:16,  5.34s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:20<00:16,  5.34s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:20<00:10,  5.34s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:20<00:00, 20.65s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 9
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4163.39 - val_cvae_loss: 4163.39
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 27 clusters.
 |████████████████████| 100.0%  - val_loss: 4177.58 - val_cvae_loss: 4177.58 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.52


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.67s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.44it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.44it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.39s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.39s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.06it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.06it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:03,  1.

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:07<00:04,  1.37s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:07<00:04,  1.37s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:07<00:00,  7.37s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 10
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4441.21 - val_cvae_loss: 4441.21
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 27 clusters.
 |████████████████████| 100.0%  - val_loss: 4444.14 - val_cvae_loss: 4444.14 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.63


Computing neighbors: 100%|██████████| 1/1 [00:03<00:00,  3.87s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  5.15it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  5.15it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.03s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.03s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.51it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.51it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:03,  1.

running custom cross validation evaluation


INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 1
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  443.92 - val_cvae_loss:  443.92
Initializing unlabeled prototypes w

Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.61s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.38it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.38it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.43s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.43s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.04it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.04it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:05,  1.

Evaluating fold 2
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'HSPCs', 'Monocyte progenitors', 'Erythroid progenitors', 'Erythrocytes', 'Plasma cells', 'CD10+ B cells'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
The missing labels are: {'HSPCs', 'Monocyte progenitors', 'Erythroid progenitors', 'Erythrocytes', 'Plasma cells', 'CD10+ B cells'}
Therefore integer value of those labels is set to -1
T

Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.54s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.23it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.23it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:18,  2.35s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:18,  2.35s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:05<00:11,  1.63s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:05<00:11,  1.63s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:05<00:06,  1.

Evaluating fold 3
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  448.46 - val_cvae_loss:  448.46
Initializing unlabeled prototypes w

Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.78s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.48it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.48it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.12s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.12s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.51s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.51s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

Evaluating fold 4
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'CD10+ B cells', 'Monocyte progenitors', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocyt

Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.82s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.88it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.88it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:09,  1.23s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:09,  1.23s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.27it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.27it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:04,  1.

Evaluating fold 5
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  883.94 - val_cvae_loss:  883.94
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 26 clusters.
 |████████████████████| 100.0%  - val_loss:  885.30 - val_cvae_loss:  885.30 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.16


Computing neighbors: 100%|██████████| 1/1 [00:06<00:00,  6.02s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.55it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.55it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.61s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.61s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.10s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.10s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:06,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:15<00:10,  3.61s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:15<00:10,  3.61s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:15<00:00, 15.19s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)
INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 6
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  447.98 - val_cvae_loss:  447.98
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 29 clusters.
 |████████████████████| 100.0%  - val_loss:  450.52 - val_cvae_loss:  450.52 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.17


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.67s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.98it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.98it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.56s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.56s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.03s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.03s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:04,  1.

Evaluating fold 7
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4313.81 - val_cvae_loss: 4313.81
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 20 clusters.
 |████████████████████| 100.0%  - val_loss: 4290.69 - val_cvae_loss: 4290.69 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.20


Computing neighbors: 100%|██████████| 1/1 [00:02<00:00,  2.08s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 14.59it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.28it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.28it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:00<00:03,  2.28it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:00<00:02,  2.28it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:00<00:02,  2.28it/s, Batch correction: ilisi_knn]       [A
Metrics:  60%|[34m██████    [0m| 6/10 [00:00<00:00,  7

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  7.40it/s, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:01<00:00,  7.40it/s, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:01<00:00,  1.58s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)
INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 8
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  448.75 - val_cvae_loss:  448.75
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 28 clusters.
 |████████████████████| 100.0%  - val_loss:  450.50 - val_cvae_loss:  450.50 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.16


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.87s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.11it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.11it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.98s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.98s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.37s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.37s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:18<00:14,  4.93s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:18<00:14,  4.93s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:18<00:00, 19.00s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)
INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 9
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 3939.56 - val_cvae_loss: 3939.56
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 25 clusters.
 |████████████████████| 100.0%  - val_loss: 3960.14 - val_cvae_loss: 3960.14 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.16


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.22s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.12it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.12it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:13,  1.66s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:13,  1.66s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.04s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.04s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:04,  1.

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:07<00:04,  1.36s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:07<00:04,  1.36s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:07<00:00,  7.57s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)
INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 10
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4315.71 - val_cvae_loss: 4315.71
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 30 clusters.
 |████████████████████| 100.0%  - val_loss: 4316.58 - val_cvae_loss: 4316.58 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.14


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.26s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.88it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.88it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.50s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.50s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.08it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.08it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:03,  1.

running custom cross validation evaluation
Evaluating fold 1
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_bs512_ccross-val/fold-1/[0m[95mmodel.pt[0m   
         already downloaded                                                                                        



For instance checks, use `isinstance(X, (anndata.experimental.CSRDataset, anndata.experimental.CSCDataset))` instead.

For creation, use `anndata.experimental.sparse_dataset(X)` instead.

  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:38<00:00,  1.22it/s, loss=928, v_num=1]   

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, loss=928, v_num=1]
[34mINFO    [0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup                             
[34mINFO    [0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup                             


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.26s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.52it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.52it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.41s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.41s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.05it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.05it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:03,  1.

Evaluating fold 2
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_bs512_ccross-val/fold-2/[0m[95mmodel.pt[0m   
         already downloaded                                                                                        


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [01:13<00:00,  1.14s/it, loss=1.21e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [01:13<00:00,  1.47s/it, loss=1.21e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.01s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.29it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.29it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:14,  1.76s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:14,  1.76s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:09,  1.31s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:09,  1.31s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:07,  1.

Evaluating fold 3
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_bs512_ccross-val/fold-3/[0m[95mmodel.pt[0m   
         already downloaded                                                                                        


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:56<00:00,  1.28it/s, loss=866, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:56<00:00,  1.13s/it, loss=866, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.38s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.12it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.12it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.89s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.89s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.39s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.39s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:08,  1.

Evaluating fold 4
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_bs512_ccross-val/fold-4/[0m[95mmodel.pt[0m   
         already downloaded                                                                                        


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:33<00:00,  1.30it/s, loss=3.11e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, loss=3.11e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.19s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.87it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.87it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.13it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.13it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.64it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.64it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:03,  1.

Evaluating fold 5
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_bs512_ccross-val/fold-5/[0m[95mmodel.pt[0m   
         already downloaded                                                                                        


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:43<00:00,  1.39it/s, loss=1.1e+03, v_num=1] 

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:43<00:00,  1.15it/s, loss=1.1e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.55s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.64it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.64it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.07it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.07it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.48it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.48it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:04,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:13<00:07,  2.66s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:13<00:07,  2.66s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:13<00:00, 13.37s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 6
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_bs512_ccross-val/fold-6/[0m[95mmodel.pt[0m   
         already downloaded                                                                                        


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:48<00:00,  1.03s/it, loss=624, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:48<00:00,  1.03it/s, loss=624, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.89s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.69it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.69it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.40s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.40s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.13it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.13it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:05,  1.

Evaluating fold 7
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_bs512_ccross-val/fold-7/[0m[95mmodel.pt[0m   
         already downloaded                                                                                        


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:17<00:00,  2.68it/s, loss=6.96e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:17<00:00,  2.82it/s, loss=6.96e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:02<00:00,  2.40s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 14.69it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:05,  1.43it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:05,  1.43it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.43it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:01,  3.11it/s, Bio conservation: clisi_knn][A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:01,  3.11it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:01<00:01,  3.11it/s

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:03<00:01,  2.36it/s, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:03<00:01,  2.36it/s, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:03<00:00,  3.06s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 8
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_bs512_ccross-val/fold-8/[0m[95mmodel.pt[0m   
         already downloaded                                                                                        


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [01:05<00:00,  1.08it/s, loss=994, v_num=1]     

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [01:05<00:00,  1.31s/it, loss=994, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.57s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.47it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.47it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:06<00:29,  3.66s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:06<00:29,  3.66s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:07<00:16,  2.36s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:07<00:16,  2.36s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:07<00:08,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:20<00:13,  4.66s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:20<00:13,  4.66s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:20<00:00, 20.68s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 9
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_bs512_ccross-val/fold-9/[0m[95mmodel.pt[0m   
         already downloaded                                                                                        


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:35<00:00,  1.28it/s, loss=3.81e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, loss=3.81e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.75s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.38it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.38it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.53s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.53s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:06,  1.02it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:06,  1.02it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:05,  1.

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:06<00:02,  1.09it/s, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:06<00:02,  1.09it/s, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:06<00:00,  6.20s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 10
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_bs512_ccross-val/fold-10/[0m[95mmodel.pt[0m  
         already downloaded                                                                                        


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:36<00:00,  1.21it/s, loss=3.43e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, loss=3.43e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.67s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  5.64it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  5.64it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.02s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.02s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.49it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.49it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:04,  1.

(                                     Isolated labels  KMeans NMI  KMeans ARI  \
 trainer                                                                        
 barlow-num-prot-16_hidden-64_bs-512         0.641688    0.644365    0.422731   
 barlow-num-prot-32_hidden-64_bs-512         0.646254    0.648551    0.436815   
 scpoli-original-latent_dim8_bs512           0.680112    0.701193    0.525359   
 scvi-latent_dim8_bs512                      0.646387    0.683340    0.485468   
 simclr-num-prot-16_hidden-64_bs-512         0.657351    0.675843    0.462521   
 
                                      Silhouette label     cLISI  \
 trainer                                                           
 barlow-num-prot-16_hidden-64_bs-512          0.575524  0.981635   
 barlow-num-prot-32_hidden-64_bs-512          0.581645  0.982618   
 scpoli-original-latent_dim8_bs512            0.623122  0.990888   
 scvi-latent_dim8_bs512                       0.583205  0.992197   
 simclr-num-prot-16_hid

In [19]:
res, fold_res = df

In [20]:
res

Unnamed: 0_level_0,Isolated labels,KMeans NMI,KMeans ARI,Silhouette label,cLISI,Silhouette batch,iLISI,KBET,Graph connectivity,PCR comparison,Batch correction,Bio conservation,Total
trainer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
barlow-num-prot-16_hidden-64_bs-512,0.641688,0.644365,0.422731,0.575524,0.981635,0.78864,0.241616,0.409873,0.877861,0.713269,0.606252,0.653188,0.634414
barlow-num-prot-32_hidden-64_bs-512,0.646254,0.648551,0.436815,0.581645,0.982618,0.790548,0.236998,0.43599,0.867323,0.678996,0.601971,0.659177,0.636294
scpoli-original-latent_dim8_bs512,0.680112,0.701193,0.525359,0.623122,0.990888,0.771006,0.299171,0.488597,0.895195,0.719661,0.634726,0.704135,0.676371
scvi-latent_dim8_bs512,0.646387,0.68334,0.485468,0.583205,0.992197,0.81793,0.221461,0.372219,0.912276,0.66785,0.598347,0.67812,0.646211
simclr-num-prot-16_hidden-64_bs-512,0.657351,0.675843,0.462521,0.592092,0.987881,0.823351,0.267542,0.420972,0.890487,0.71241,0.622952,0.675138,0.654264


In [31]:
trainers = [scvi, scpoli, barlow]
names = [trainer.get_model_name() for trainer in trainers]

all_results_new = pd.DataFrame()
for trainer, name in zip(trainers, names):
    eval_fn = partial(trainer.query_scib_metrics, fine_tuning=True)

    df = trainer.evaluate_custom_cross_val_models(eval_fn)
    df['trainer'] = name
    
    all_results_new = pd.concat([all_results_new, df], ignore_index=True)
process_dataframe(deepcopy(all_results_new))

running custom cross validation evaluation
Evaluating fold 1
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-1/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:53<00:00,  1.08s/it, loss=937, v_num=1]   

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:53<00:00,  1.06s/it, loss=937, v_num=1]
[34mINFO    [0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup                             
[34mINFO    [0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup                             


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.31s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:04,  1.82it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:04,  1.82it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:14,  1.85s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:14,  1.85s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.25s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.25s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:07,  1.

Evaluating fold 2
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-2/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:54<00:00,  1.03s/it, loss=1.21e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:54<00:00,  1.10s/it, loss=1.21e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.05s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.27it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.27it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.12s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.12s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:11,  1.63s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:11,  1.63s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:05<00:06,  1.

Evaluating fold 3
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-3/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:54<00:00,  1.34s/it, loss=880, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:54<00:00,  1.10s/it, loss=880, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.52s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.47it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.47it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.56s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.56s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:09,  1.34s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:09,  1.34s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

Evaluating fold 4
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-4/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:38<00:00,  1.39it/s, loss=3.33e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:38<00:00,  1.29it/s, loss=3.33e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.15s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.78it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.78it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.18it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.18it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.74it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.74it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:03,  1.

Evaluating fold 5
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-5/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:35<00:00,  1.62it/s, loss=1.11e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, loss=1.11e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.66s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.41it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.41it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.02it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.02it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.46it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.46it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:02,  2.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:11<00:09,  3.06s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:11<00:09,  3.06s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:11<00:00, 11.36s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 6
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-6/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:33<00:00,  1.83it/s, loss=626, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:33<00:00,  1.47it/s, loss=626, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.85s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.40it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.40it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.25it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.25it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:03,  1.88it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:03,  1.88it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:03,  1.

Evaluating fold 7
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-7/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:14<00:00,  4.67it/s, loss=7.2e+03, v_num=1] 

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:14<00:00,  3.53it/s, loss=7.2e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:02<00:00,  2.16s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 20.58it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.29it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.29it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:00<00:03,  2.29it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:00<00:02,  2.29it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:00<00:02,  2.29it/s, Batch correction: ilisi_knn]       [A
Metrics:  60%|[34m██████    [0m| 6/10 [00:00<00:00,  7

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  7.45it/s, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:01<00:00,  7.45it/s, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:01<00:00,  1.87s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 8
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-8/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:55<00:00,  1.32s/it, loss=1.02e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:55<00:00,  1.11s/it, loss=1.02e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.13s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.39it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.39it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.52s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.52s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.11s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.11s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:06,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:14<00:10,  3.45s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:14<00:10,  3.45s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:14<00:00, 14.68s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 9
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-9/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:38<00:00,  1.66it/s, loss=3.46e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, loss=3.46e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.50s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.72it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.72it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.03s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.03s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.48it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.48it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:04,  1.

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:05<00:02,  1.05it/s, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:05<00:02,  1.05it/s, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:05<00:00,  5.76s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 10
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-10/[0m[95mmodel.pt[0m already
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:28<00:00,  1.45it/s, loss=3.65e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:28<00:00,  1.72it/s, loss=3.65e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.70s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.84it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.84it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:10,  1.28s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:10,  1.28s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.24it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.24it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:04,  1.

running custom cross validation evaluation


INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 1
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  443.95 - val_cvae_loss:  443.95
Initializing unlabeled prototypes w

Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.59s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:04,  1.82it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:04,  1.82it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.96s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.96s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.31s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.31s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

Evaluating fold 2
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'CD10+ B cells', 'Monocyte progenitors', 'Plasma cells', 'Erythroid progenitors', 'HSPCs', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
The missing labels are: {'CD10+ B cells', 'Monocyte progenitors', 'Plasma cells', 'Erythroid progenitors', 'HSPCs', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
T

Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.40s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.05it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.05it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.88s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.88s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.44s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.44s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

Evaluating fold 3
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  448.46 - val_cvae_loss:  448.46
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 33 clusters.
 |████████████████████| 100.0%  - val_loss:  450.27 - val_cvae_loss:  450.27 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.19


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.75s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.11it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.11it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.07s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.07s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:11,  1.63s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:11,  1.63s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:05<00:06,  1.

Evaluating fold 4
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Monocyte progenitors', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Monocyte p

Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.66s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.85it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.85it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.05s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.05s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.37it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.37it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:02,  2.

Evaluating fold 5
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  883.94 - val_cvae_loss:  883.94
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 26 clusters.
 |████████████████████| 100.0%  - val_loss:  885.30 - val_cvae_loss:  885.30 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.16


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.83s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:04,  2.09it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:04,  2.09it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.49s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.49s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.01s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.01s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:03,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:11<00:08,  3.00s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:11<00:08,  3.00s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:12<00:00, 12.04s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)
INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 6
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  447.98 - val_cvae_loss:  447.98
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 29 clusters.
 |████████████████████| 100.0%  - val_loss:  450.52 - val_cvae_loss:  450.52 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.17


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.69s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.42it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.42it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.30it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.30it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:03,  1.92it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:03,  1.92it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:03,  1.

Evaluating fold 7
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4313.81 - val_cvae_loss: 4313.81
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 20 clusters.
 |████████████████████| 100.0%  - val_loss: 4290.69 - val_cvae_loss: 4290.69 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.20


Computing neighbors: 100%|██████████| 1/1 [00:02<00:00,  2.06s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 20.40it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.32it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.32it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:00<00:03,  2.32it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:00<00:02,  2.32it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:00<00:02,  2.32it/s, Batch correction: ilisi_knn]       [A
Metrics:  60%|[34m██████    [0m| 6/10 [00:00<00:00,  7

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  7.46it/s, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:01<00:00,  7.46it/s, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:01<00:00,  1.59s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)
INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 8
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  448.75 - val_cvae_loss:  448.75
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 28 clusters.
 |████████████████████| 100.0%  - val_loss:  450.50 - val_cvae_loss:  450.50 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.16


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.78s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.35it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.35it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.53s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.53s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.23s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.23s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:04,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:16<00:12,  4.15s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:16<00:12,  4.15s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:16<00:08,  4.15s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:16<00:00, 16.25s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)
INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 9
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 3939.56 - val_cvae_loss: 3939.56
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 25 clusters.
 |████████████████████| 100.0%  - val_loss: 3960.14 - val_cvae_loss: 3960.14 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.16


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.11s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.91it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.91it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:09,  1.20s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:09,  1.20s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.20it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.20it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:03,  1.

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:05<00:03,  1.01s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:05<00:03,  1.01s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:05<00:00,  5.81s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)
INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 10
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4315.71 - val_cvae_loss: 4315.71
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 30 clusters.
 |████████████████████| 100.0%  - val_loss: 4316.58 - val_cvae_loss: 4316.58 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.14


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.23s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.60it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.60it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.45s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.45s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.10it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.10it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:05,  1.

running custom cross validation evaluation
Evaluating fold 1
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  443.09 - val_cvae_loss:  443.09
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 29 clusters.
 |████████████████████| 100.0%  - val_loss:  442.44 - val_cvae_loss:  442.44 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.37


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.58s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.57it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.57it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:10,  1.36s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:10,  1.36s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.06it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.06it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:05,  1.

Evaluating fold 2
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'CD10+ B cells', 'Monocyte progenitors', 'Plasma cells', 'Erythroid progenitors', 'HSPCs', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
The missing labels are: {'CD10+ B cells', 'Monocyte progenitors', 'Plasma cells', 'Erythroid progenitors', 'HSPCs', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  886.14 - val_cvae_loss:  886.14
Initializing unlabeled prototypes with Leiden with 

Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.51s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.03it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.03it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.93s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.93s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.56s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.56s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:06,  1.

Evaluating fold 3
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  444.02 - val_cvae_loss:  444.02
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 29 clusters.
 |████████████████████| 100.0%  - val_loss:  445.70 - val_cvae_loss:  445.70 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.36


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.84s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.09it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.09it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:14,  1.83s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:14,  1.83s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.48s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.48s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

Evaluating fold 4
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Monocyte progenitors', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Monocyte progenitors', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 5053.40 - val_cvae_loss: 5053.40
Initializing unlabeled prototypes with Leiden with an unknown number of

Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.15s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.63it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.63it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.12it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.12it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.72it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.72it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:03,  1.

Evaluating fold 5
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  878.08 - val_cvae_loss:  878.08
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 27 clusters.
 |████████████████████| 100.0%  - val_loss:  879.20 - val_cvae_loss:  879.20 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.31


Computing neighbors: 100%|██████████| 1/1 [00:06<00:00,  6.02s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.63it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.63it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:14,  1.79s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:14,  1.79s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.17s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.17s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:07,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:12<00:06,  2.27s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:12<00:06,  2.27s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:12<00:00, 12.63s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 6
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  443.39 - val_cvae_loss:  443.39
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 31 clusters.
 |████████████████████| 100.0%  - val_loss:  445.45 - val_cvae_loss:  445.45 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.35


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.10s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.23it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.23it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.33it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.33it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:03,  1.82it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:03,  1.82it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:03,  1.

Evaluating fold 7
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4531.76 - val_cvae_loss: 4531.76
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 21 clusters.
 |████████████████████| 100.0%  - val_loss: 4502.38 - val_cvae_loss: 4502.38 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.58


Computing neighbors: 100%|██████████| 1/1 [00:01<00:00,  1.87s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 14.73it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:05,  1.55it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:05,  1.55it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.55it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:01,  3.24it/s, Bio conservation: clisi_knn][A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:01,  3.24it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:01<00:01,  3.24it/s

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:02<00:00,  3.39it/s, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:02<00:00,  3.39it/s, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:02<00:00,  2.35s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 8
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  442.66 - val_cvae_loss:  442.66
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 29 clusters.
 |████████████████████| 100.0%  - val_loss:  444.09 - val_cvae_loss:  444.09 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.40


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.05s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.45it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.45it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.97s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.97s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.36s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.36s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:18<00:14,  4.69s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:18<00:14,  4.69s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:18<00:09,  4.69s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:18<00:00, 18.17s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 9
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4430.21 - val_cvae_loss: 4430.21
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 26 clusters.
 |████████████████████| 100.0%  - val_loss: 4450.07 - val_cvae_loss: 4450.07 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.39


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.63s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.67it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.67it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.16it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.16it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.70it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.70it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:03,  1.

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:06<00:03,  1.07s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:06<00:03,  1.07s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:06<00:00,  6.10s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 10
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4499.46 - val_cvae_loss: 4499.46
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 28 clusters.
 |████████████████████| 100.0%  - val_loss: 4498.06 - val_cvae_loss: 4498.06 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.46


Computing neighbors: 100%|██████████| 1/1 [00:03<00:00,  3.84s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.94it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.94it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.42s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.42s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.11it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.11it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:05,  1.

Unnamed: 0_level_0,Isolated labels,KMeans NMI,KMeans ARI,Silhouette label,cLISI,Silhouette batch,iLISI,KBET,Graph connectivity,PCR comparison,Batch correction,Bio conservation,Total
trainer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
barlow-num-prot-16_hidden-64_bs-64,0.648321,0.653568,0.421368,0.580857,0.988747,0.835854,0.239628,0.403892,0.902028,0.65335,0.60695,0.658572,0.637923
scpoli-original-latent_dim8,0.680113,0.701175,0.525335,0.623122,0.990887,0.771017,0.299044,0.488385,0.895075,0.719558,0.634616,0.704126,0.676322
scvi-latent_dim8,0.646387,0.68334,0.485468,0.583205,0.992197,0.81793,0.221461,0.372219,0.912276,0.66785,0.598347,0.67812,0.646211


In [39]:
drop_redundant_rows(all_results_new).dropna()

Unnamed: 0,Isolated labels,KMeans NMI,KMeans ARI,Silhouette label,cLISI,Silhouette batch,iLISI,KBET,Graph connectivity,PCR comparison,Batch correction,Bio conservation,Total,fold,trainer
0,0.574879,0.567107,0.318055,0.553871,0.980997,0.867174,0.2647806,0.334462,0.923864,0.567113,0.591479,0.598982,0.595981,1,scvi-latent_dim8
2,0.57663,0.663592,0.422228,0.587675,0.994731,0.881993,0.1093161,0.295316,0.906895,0.580447,0.554793,0.648971,0.6113,2,scvi-latent_dim8
4,0.535672,0.684667,0.48964,0.562875,0.990961,0.865762,0.2907567,0.387067,0.92592,0.582505,0.610402,0.652763,0.635819,3,scvi-latent_dim8
6,0.612788,0.659736,0.505247,0.537095,0.980942,0.86878,0.2910341,0.346154,0.873307,0.766447,0.629144,0.659162,0.647155,4,scvi-latent_dim8
8,0.697203,0.697533,0.489407,0.629673,0.998996,0.805828,0.1246041,0.366469,0.921159,0.781943,0.600001,0.702562,0.661538,5,scvi-latent_dim8
10,0.770944,0.645046,0.396346,0.574807,0.992759,0.87977,0.430909,0.353923,0.925416,0.303439,0.578691,0.67598,0.637065,6,scvi-latent_dim8
12,0.610893,0.69345,0.532274,0.562259,0.987673,0.729241,1.192093e-07,0.629156,0.894742,0.724734,0.595575,0.67731,0.644616,7,scvi-latent_dim8
14,0.759332,0.728149,0.520497,0.628723,0.998441,0.878441,0.334147,0.434773,0.920327,0.78151,0.669839,0.727028,0.704153,8,scvi-latent_dim8
16,0.676925,0.740664,0.597282,0.628061,0.999733,0.530231,0.0,0.2507,0.934639,0.747924,0.492699,0.728533,0.634199,9,scvi-latent_dim8
18,0.648604,0.753461,0.583708,0.567009,0.996738,0.872081,0.3690579,0.32417,0.896496,0.842442,0.660849,0.709904,0.690282,10,scvi-latent_dim8


In [32]:
process_dataframe(deepcopy(all_results_new))

Unnamed: 0_level_0,Isolated labels,KMeans NMI,KMeans ARI,Silhouette label,cLISI,Silhouette batch,iLISI,KBET,Graph connectivity,PCR comparison,Batch correction,Bio conservation,Total
trainer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
barlow-num-prot-16_hidden-64_bs-64,0.648321,0.653568,0.421368,0.580857,0.988747,0.835854,0.239628,0.403892,0.902028,0.65335,0.60695,0.658572,0.637923
scpoli-original-latent_dim8,0.680113,0.701175,0.525335,0.623122,0.990887,0.771017,0.299044,0.488385,0.895075,0.719558,0.634616,0.704126,0.676322
scvi-latent_dim8,0.646387,0.68334,0.485468,0.583205,0.992197,0.81793,0.221461,0.372219,0.912276,0.66785,0.598347,0.67812,0.646211


In [33]:
barlow_32 = ScpoliProtBarlowTrainer(ds)
barlow_32.num_prototypes = 32

In [44]:
eval_fn = partial(barlow_32.query_scib_metrics, fine_tuning=True)

df = barlow_32.evaluate_custom_cross_val_models(eval_fn)

running custom cross validation evaluation
Evaluating fold 1
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  442.94 - val_cvae_loss:  442.94
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 24 clusters.
 |████████████████████| 100.0%  - val_loss:  442.58 - val_cvae_loss:  442.58 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.36


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.99s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:02<00:19,  2.12s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:02<00:19,  2.12s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:19<01:26, 10.85s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:19<01:26, 10.85s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:19<00:43,  6.19s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:19<00:43,  6.19s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:19<00:22,  3.

Evaluating fold 2
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'CD10+ B cells', 'Monocyte progenitors', 'Plasma cells', 'Erythroid progenitors', 'HSPCs', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
The missing labels are: {'CD10+ B cells', 'Monocyte progenitors', 'Plasma cells', 'Erythroid progenitors', 'HSPCs', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  885.26 - val_cvae_loss:  885.26
Initializing unlabeled prototypes with Leiden with 

Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.46s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.03it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.03it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:19,  2.48s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:04<00:19,  2.48s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:05<00:11,  1.71s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:05<00:11,  1.71s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:05<00:06,  1.

Evaluating fold 3
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  443.71 - val_cvae_loss:  443.71
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 30 clusters.
 |████████████████████| 100.0%  - val_loss:  445.30 - val_cvae_loss:  445.30 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.37


Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.54s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.10it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:08,  1.10it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:17,  2.16s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:17,  2.16s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.46s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.46s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:08,  1.

Evaluating fold 4
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Monocyte progenitors', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Monocyte progenitors', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4820.93 - val_cvae_loss: 4820.93
Initializing unlabeled prototypes with Leiden with an unknown number of

Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.16s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.14it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.14it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.62s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.62s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.01s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.01s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:06,  1.

Evaluating fold 5
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  879.51 - val_cvae_loss:  879.51
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 29 clusters.
 |████████████████████| 100.0%  - val_loss:  880.90 - val_cvae_loss:  880.90 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.29


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.31s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:04,  2.01it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:04,  2.01it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.93s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.93s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.27s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.27s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:07,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:15<00:08,  2.80s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:15<00:08,  2.80s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:15<00:05,  2.80s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:15<00:00, 15.30s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 6
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  442.49 - val_cvae_loss:  442.49
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 28 clusters.
 |████████████████████| 100.0%  - val_loss:  444.95 - val_cvae_loss:  444.95 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.32


Computing neighbors: 100%|██████████| 1/1 [00:06<00:00,  6.06s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.41it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.41it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.57s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.57s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.01s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.01s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:03,  1.

Evaluating fold 7
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4488.53 - val_cvae_loss: 4488.53
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 20 clusters.
 |████████████████████| 100.0%  - val_loss: 4460.61 - val_cvae_loss: 4460.61 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.34


Computing neighbors: 100%|██████████| 1/1 [00:02<00:00,  2.12s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 19.64it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.15it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.15it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:00<00:03,  2.15it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:02,  2.15it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:01<00:02,  2.15it/s, Batch correction: ilisi_knn]       [A
Metrics:  60%|[34m██████    [0m| 6/10 [00:01<00:00,  6

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  6.97it/s, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:01<00:00,  4.53it/s, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:01<00:00,  1.81s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 8
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss:  443.51 - val_cvae_loss:  443.51
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 27 clusters.
 |████████████████████| 100.0%  - val_loss:  445.06 - val_cvae_loss:  445.06 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.33


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.75s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.40it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.40it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.97s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:15,  1.97s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.41s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.41s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:18<00:14,  4.83s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:18<00:14,  4.83s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:18<00:09,  4.83s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:18<00:00, 18.80s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 9
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4602.40 - val_cvae_loss: 4602.40
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 28 clusters.
 |████████████████████| 100.0%  - val_loss: 4631.22 - val_cvae_loss: 4631.22 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.34


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.57s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.27it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.27it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:14,  1.84s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:14,  1.84s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.22s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:08,  1.22s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:04,  1.

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:08<00:04,  1.40s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:08<00:04,  1.40s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:08<00:02,  1.40s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:08<00:00,  8.16s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 10
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4541.82 - val_cvae_loss: 4541.82
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 31 clusters.
 |████████████████████| 100.0%  - val_loss: 4535.54 - val_cvae_loss: 4535.54 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.61


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.42s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.55it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.55it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.49s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.49s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.06it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:06,  1.06it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:05,  1.

In [47]:
df['trainer'] = 'barlow32'
process_dataframe(df)

Unnamed: 0_level_0,Isolated labels,KMeans NMI,KMeans ARI,Silhouette label,cLISI,Silhouette batch,iLISI,KBET,Graph connectivity,PCR comparison,Batch correction,Bio conservation,Total
trainer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
barlow32,0.652015,0.680492,0.477119,0.587336,0.990164,0.840041,0.256472,0.413043,0.904417,0.701767,0.623148,0.677425,0.655714


# recheck barlow performance (no cross validation)

In [15]:
barlow = ScpoliProtBarlowTrainer(ds)
barlow.experiment_name = 'barlow-recheck'

scvi_ = ScviTrainer(dataset = ds)
scpoli_ = OriginalTrainer(ds)

trainers = [scvi_, scpoli_, barlow]
names = [trainer.get_model_name() for trainer in trainers]

In [21]:
all_results_new = pd.DataFrame()
for trainer, name in zip(trainers, names):
    trainer.fine_tuning_epochs = 50
    df = trainer.query_scib_metrics(True)
    df['trainer'] = name
    
    all_results_new = pd.concat([all_results_new, df], ignore_index=True)

[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/scvi-latent_dim8/[0m[95mmodel.pt[0m already downloaded         


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:15<00:00,  3.25it/s, loss=7.14e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:15<00:00,  3.33it/s, loss=7.14e+03, v_num=1]
[34mINFO    [0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup                             
[34mINFO    [0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup                             


Computing neighbors: 100%|██████████| 1/1 [00:02<00:00,  2.17s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 14.00it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.27it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.27it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:00<00:03,  2.27it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:00<00:02,  2.27it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:00<00:00,  6.03it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:00<00:00,  6

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  4.57it/s, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  4.57it/s, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:01<00:00,  1.62s/it]atch correction: pcr_comparison]    [A

                                                                                         [AINFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4331.25 - val_cvae_loss: 4331.25
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 19 clusters.
 |████████████████████| 100.0%  - val_loss: 4313.89 - val_cvae_loss: 4313.89 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.22


Computing neighbors: 100%|██████████| 1/1 [00:02<00:00,  2.07s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 20.77it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.31it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.31it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:00<00:03,  2.31it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:00<00:01,  4.70it/s, Bio conservation: clisi_knn][A
Metrics:  40%|[34m████      [0m| 4/10 [00:00<00:01,  4.70it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:00<00:01,  4.70it/s

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  4.78it/s, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  4.78it/s, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:01<00:00,  1.63s/it]atch correction: pcr_comparison]    [A

                                                                                         [A

Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4528.57 - val_cvae_loss: 4528.57
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 20 clusters.
 |████████████████████| 100.0%  - val_loss: 4499.09 - val_cvae_loss: 4499.09 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.39


Computing neighbors: 100%|██████████| 1/1 [00:01<00:00,  1.90s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 21.20it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.23it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.23it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:00<00:03,  2.23it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:00<00:02,  2.23it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:00<00:02,  2.23it/s, Batch correction: ilisi_knn]       [A
Metrics:  60%|[34m██████    [0m| 6/10 [00:01<00:00,  7

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  7.31it/s, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:01<00:00,  7.31it/s, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:01<00:00,  1.62s/it]atch correction: pcr_comparison][A

                                                                                         [A

In [25]:
drop_redundant_rows(all_results_new)

Unnamed: 0,Isolated labels,KMeans NMI,KMeans ARI,Silhouette label,cLISI,Silhouette batch,iLISI,KBET,Graph connectivity,PCR comparison,Batch correction,Bio conservation,Total,trainer
0,0.612536,0.700514,0.513212,0.563858,0.986493,0.688066,1.192093e-07,0.636134,0.908591,0.688592,0.584277,0.675323,0.638904,scvi-latent_dim8
2,0.662764,0.677367,0.473438,0.622212,0.988258,0.623953,1.192093e-07,0.622111,0.887424,0.5537,0.537437,0.684808,0.62586,scpoli-original-latent_dim8
4,0.6179,0.72478,0.548696,0.576359,0.981188,0.693845,1.192093e-07,0.615567,0.924107,0.590495,0.564803,0.689785,0.639792,barlow-recheck-num-prot-16_hidden-64_bs-64


# classification metrics, no cross val

In [27]:
def linear_classification(self):
    _, _, X = self.get_ref_query_latent(True)
    labels = self.dataset.adata.obs.cell_type.values
    return train_linear_classifier(X, labels)

In [28]:
all_results_new = pd.DataFrame()
for trainer, name in zip(trainers, names):
    trainer.fine_tuning_epochs = 10
    df = linear_classification(trainer)
    df['trainer'] = name
    
    all_results_new = pd.concat([all_results_new, df], ignore_index=True)

[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/scvi-latent_dim8/[0m[95mmodel.pt[0m already downloaded         


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 10/10: 100%|██████████| 10/10 [00:03<00:00,  3.31it/s, loss=7.09e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 10/10: 100%|██████████| 10/10 [00:03<00:00,  3.10it/s, loss=7.09e+03, v_num=1]
[34mINFO    [0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup                             
[34mINFO    [0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup                             


INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████████| 100.0%  - val_loss: 4441.87 - val_cvae_loss: 4441.87


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████████| 100.0%  - val_loss: 4834.74 - val_cvae_loss: 4834.74


In [29]:
all_results_new

Unnamed: 0,CD10+ B cells,CD14+ Monocytes,CD16+ Monocytes,CD20+ B cells,CD4+ T cells,CD8+ T cells,Erythrocytes,Erythroid progenitors,HSPCs,Megakaryocyte progenitors,Monocyte progenitors,Monocyte-derived dendritic cells,NK cells,NKT cells,Plasma cells,Plasmacytoid dendritic cells,accuracy,macro avg,weighted avg,trainer
0,0.95122,0.961334,0.881443,0.998249,0.872758,0.696246,0.932692,0.890244,0.776699,0.836364,0.73494,0.868852,0.879518,0.792279,0.96,1.0,0.889138,0.877052,0.88514,scvi-latent_dim8
1,0.95122,0.977641,0.859296,0.991304,0.950045,0.466819,0.97,0.784946,0.842105,0.851852,0.709302,0.787129,0.795207,0.785064,0.923077,0.977011,0.889138,0.851376,0.889138,scvi-latent_dim8
2,0.95122,0.969419,0.870229,0.994764,0.909763,0.558904,0.95098,0.834286,0.808081,0.844037,0.721893,0.825974,0.83524,0.788655,0.941176,0.988372,0.889138,0.862062,0.884811,scvi-latent_dim8
3,41.0,1297.0,199.0,575.0,2202.0,437.0,300.0,93.0,95.0,54.0,86.0,202.0,459.0,549.0,26.0,87.0,0.889138,6702.0,6702.0,scvi-latent_dim8
4,0.952381,0.98986,0.81982,0.998252,0.895976,0.738462,0.954545,0.792079,0.828829,0.926829,0.881579,0.903743,0.84188,0.833977,0.0,0.761062,0.902268,0.819955,0.897248,scpoli-original-latent_dim8
5,0.97561,0.978412,0.914573,0.993043,0.9505,0.549199,0.98,0.860215,0.968421,0.703704,0.77907,0.836634,0.858388,0.786885,0.0,0.988506,0.902268,0.820197,0.902268,scpoli-original-latent_dim8
6,0.963855,0.984102,0.864608,0.995641,0.922433,0.629921,0.967105,0.824742,0.893204,0.8,0.82716,0.868895,0.850054,0.809747,0.0,0.86,0.902268,0.816342,0.897942,scpoli-original-latent_dim8
7,41.0,1297.0,199.0,575.0,2202.0,437.0,300.0,93.0,95.0,54.0,86.0,202.0,459.0,549.0,26.0,87.0,0.902268,6702.0,6702.0,scpoli-original-latent_dim8
8,0.96875,0.95302,0.846154,0.991319,0.842084,0.62069,0.953488,0.888889,0.892473,0.777778,0.888889,0.852632,0.868182,0.805447,1.0,0.966292,0.878693,0.882255,0.870579,barlow-recheck-num-prot-16_hidden-64_bs-64
9,0.756098,0.985351,0.829146,0.993043,0.954133,0.28833,0.956667,0.774194,0.873684,0.907407,0.651163,0.80198,0.832244,0.754098,1.0,0.988506,0.878693,0.834128,0.878693,barlow-recheck-num-prot-16_hidden-64_bs-64


# check using less fine-tuning (fine_tuning_epochs = 5)

In [11]:
from functools import partial

trainers = [scvi, scpoli]
names = [trainer.get_model_name() for trainer in trainers]

all_results_new = pd.DataFrame()
for trainer, name in zip(trainers, names):
    trainer.fine_tuning_epochs = 5
    eval_fn = partial(trainer.query_scib_metrics, fine_tuning=True)

    df = trainer.evaluate_custom_cross_val_models(eval_fn)
    df['trainer'] = name
    
    all_results_new = pd.concat([all_results_new, df], ignore_index=True)
process_dataframe(all_results_new)

running custom cross validation evaluation
Evaluating fold 1
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-1/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 5/5: 100%|██████████| 5/5 [00:11<00:00,  1.57s/it, loss=993, v_num=1]     

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 5/5: 100%|██████████| 5/5 [00:11<00:00,  2.24s/it, loss=993, v_num=1]
[34mINFO    [0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup                             
[34mINFO    [0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup                             


Computing neighbors: 100%|██████████| 1/1 [00:26<00:00, 26.14s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:19<02:57, 19.75s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:19<02:57, 19.75s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:22<01:17,  9.66s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:22<01:17,  9.66s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:23<00:38,  5.55s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:23<00:38,  5.55s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:24<00:22,  3.

Evaluating fold 2
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-2/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 5/5: 100%|██████████| 5/5 [00:05<00:00,  1.01it/s, loss=1.22e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 5/5: 100%|██████████| 5/5 [00:05<00:00,  1.03s/it, loss=1.22e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.03s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:15,  1.71s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:15,  1.71s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.02s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:16,  2.02s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.51s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:10,  1.51s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:05<00:07,  1.

Evaluating fold 3
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-3/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 5/5: 100%|██████████| 5/5 [00:06<00:00,  1.35s/it, loss=890, v_num=1]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 5/5: 100%|██████████| 5/5 [00:06<00:00,  1.32s/it, loss=890, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.46s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:09,  1.07s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:09,  1.07s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.45s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.45s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.09s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.09s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

Evaluating fold 4
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-4/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 5/5: 100%|██████████| 5/5 [00:03<00:00,  1.43it/s, loss=3.29e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 5/5: 100%|██████████| 5/5 [00:03<00:00,  1.43it/s, loss=3.29e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.52s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.32it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.32it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:10,  1.26s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:10,  1.26s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.25it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:05,  1.25it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:05,  1.

Evaluating fold 5
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-5/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 5/5: 100%|██████████| 5/5 [00:05<00:00,  1.18s/it, loss=1.12e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 5/5: 100%|██████████| 5/5 [00:05<00:00,  1.18s/it, loss=1.12e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.61s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:10,  1.16s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:10,  1.16s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.53s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.53s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.03s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.03s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:18<00:13,  4.38s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:18<00:13,  4.38s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:18<00:08,  4.38s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:19<00:00, 19.68s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 6
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-6/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 5/5: 100%|██████████| 5/5 [00:03<00:00,  1.29it/s, loss=631, v_num=1]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 5/5: 100%|██████████| 5/5 [00:03<00:00,  1.28it/s, loss=631, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.86s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:05,  1.52it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:05,  1.52it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.52s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:12,  1.52s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:06,  1.01it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:06,  1.01it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:06,  1.

Evaluating fold 7
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-7/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 5/5: 100%|██████████| 5/5 [00:01<00:00,  3.55it/s, loss=7.57e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 5/5: 100%|██████████| 5/5 [00:01<00:00,  3.65it/s, loss=7.57e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:02<00:00,  2.20s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:04,  2.16it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:04,  2.16it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:05,  1.41it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:05,  1.41it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.41it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:02,  2.21it/s, Bio conservation: clisi_knn][A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:02,  2.21it/s,

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:05<00:02,  1.09it/s, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:05<00:02,  1.09it/s, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:05<00:01,  1.09it/s, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:06<00:00,  6.28s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 8
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-8/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 5/5: 100%|██████████| 5/5 [00:06<00:00,  1.23s/it, loss=1e+03, v_num=1]   

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 5/5: 100%|██████████| 5/5 [00:06<00:00,  1.24s/it, loss=1e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.55s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:02<00:22,  2.54s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:02<00:22,  2.54s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:05<00:20,  2.51s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:05<00:20,  2.51s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:05<00:11,  1.67s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:05<00:11,  1.67s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:06<00:07,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:25<00:18,  6.06s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:25<00:18,  6.06s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:25<00:12,  6.06s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:26<00:00, 26.62s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 9
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-9/[0m[95mmodel.pt[0m already 
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 5/5: 100%|██████████| 5/5 [00:03<00:00,  1.32it/s, loss=3.63e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 5/5: 100%|██████████| 5/5 [00:03<00:00,  1.28it/s, loss=3.63e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.56s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.24it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:07,  1.24it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.45s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:11,  1.45s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:06,  1.05it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:06,  1.05it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:12<00:07,  2.56s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:12<00:07,  2.56s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:12<00:05,  2.56s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:13<00:00, 13.22s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)


Evaluating fold 10
[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/[0m[35m/scvi-latent_dim8_ccross-val/fold-10/[0m[95mmodel.pt[0m already
         downloaded                                                                                                


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [MIG-887fd1e0-585a-56c2-81f3-94f71fe4e1e3]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 5/5: 100%|██████████| 5/5 [00:03<00:00,  1.36it/s, loss=3.53e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 5/5: 100%|██████████| 5/5 [00:03<00:00,  1.33it/s, loss=3.53e+03, v_num=1]
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        
[34mINFO    [0m AnnData object appears to be a copy. Attempting to transfer setup.                                        


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.21s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:05,  1.63it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:05,  1.63it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.04it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.04it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.63it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.63it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:03,  1.

running custom cross validation evaluation
Evaluating fold 1
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████████| 100.0%  - val_loss:  443.12 - val_cvae_loss:  443.12


  c = torch.tensor(label_tensor, device=device).T
Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.83s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.57it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.57it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.20it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:06,  1.20it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.60it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.60it/s, Bio conservation: clisi_knn]       [A
Metrics

Evaluating fold 2
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'CD10+ B cells', 'Monocyte progenitors', 'Plasma cells', 'Erythroid progenitors', 'HSPCs', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Plasma cells', 'HSPCs'}
Therefore integer value of those labels is set to -1
The missing labels are: {'CD10+ B cells', 'Monocyte progenitors', 'Plasma cells', 'Erythroid progenitors', 'HSPCs', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
T

Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.62s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:09,  1.02s/it, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:01<00:09,  1.02s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.74s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.74s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.31s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:04<00:09,  1.31s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

Evaluating fold 3
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
Initializing dataloaders
Starting training
 |████████████████████| 100.0%  - val_loss:  450.10 - val_cvae_loss:  450.10


Computing neighbors: 100%|██████████| 1/1 [00:07<00:00,  7.84s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.46it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.46it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:10,  1.29s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:02<00:10,  1.29s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.01s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:07,  1.01s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:03<00:05,  1.

Evaluating fold 4
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Monocyte progenitors', 'Erythrocytes'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Erythrocytes', 'Monocyte progenitors', 'CD10+ B cells', 'Erythroid progenitors'}
Therefore integer value of those labels is set to -1
The missing labels are: {'Monocyte p

Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.90s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.60it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.60it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.01s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:08,  1.01s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.45it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.45it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:02,  2.

Evaluating fold 5
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████████| 100.0%  - val_loss:  901.88 - val_cvae_loss:  901.88


Computing neighbors: 100%|██████████| 1/1 [00:06<00:00,  6.16s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.42it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:03,  2.42it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.01it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.01it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.43it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:02<00:04,  1.43it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:02<00:02,  2.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:11<00:09,  3.23s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:11<00:09,  3.23s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:11<00:00, 11.93s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)
INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 6
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████████| 100.0%  - val_loss:  454.61 - val_cvae_loss:  454.61


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.28s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.06it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.06it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:05,  1.34it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:05,  1.34it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:03,  1.97it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:03,  1.97it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:03,  1.

Evaluating fold 7
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████████| 100.0%  - val_loss: 4453.24 - val_cvae_loss: 4453.24


Computing neighbors: 100%|██████████| 1/1 [00:02<00:00,  2.12s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 21.50it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.38it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:03,  2.38it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:00<00:02,  2.38it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:00<00:02,  2.38it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:00<00:02,  2.38it/s, Batch correction: ilisi_knn]       [A
Metrics:  60%|[34m██████    [0m| 6/10 [00:00<00:00,  7

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  7.64it/s, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:01<00:00,  7.64it/s, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:01<00:00,  1.93s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)
INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 8
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████████| 100.0%  - val_loss:  449.82 - val_cvae_loss:  449.82


Computing neighbors: 100%|██████████| 1/1 [00:08<00:00,  8.24s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.37it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:06,  1.37it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.65s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:03<00:13,  1.65s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:09,  1.33s/it, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:03<00:09,  1.33s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:04<00:05,  1.

[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:19<00:15,  5.07s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:19<00:15,  5.07s/it, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:19<00:10,  5.07s/it, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:19<00:00, 19.38s/it]atch correction: pcr_comparison][A

  overall_df = overall_df.append(fold_df, ignore_index=True)
INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 9
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████████| 100.0%  - val_loss: 4001.24 - val_cvae_loss: 4001.24


Computing neighbors: 100%|██████████| 1/1 [00:05<00:00,  5.09s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.71it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  3.71it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.13it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.13it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.68it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.68it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:03,  1.

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m HSPCs consists of a single batch or is too small. Skip.                                                   
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               
[34mINFO    [0m Plasma cells consists of a single batch or is too small. Skip.                                            



Metrics:  70%|[34m███████   [0m| 7/10 [00:06<00:03,  1.27s/it, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:06<00:03,  1.27s/it, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:07<00:00,  7.00s/it]atch correction: pcr_comparison]    [A

  overall_df = overall_df.append(fold_df, ignore_index=True)
INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Evaluating fold 10
Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████████| 100.0%  - val_loss: 4376.90 - val_cvae_loss: 4376.90


Computing neighbors: 100%|██████████| 1/1 [00:04<00:00,  4.23s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.23it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:02,  4.23it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.11it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:01<00:07,  1.11it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.72it/s, Bio conservation: silhouette_label][A
Metrics:  30%|[34m███       [0m| 3/10 [00:01<00:04,  1.72it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:01<00:03,  1.

Unnamed: 0_level_0,Isolated labels,KMeans NMI,KMeans ARI,Silhouette label,cLISI,Silhouette batch,iLISI,KBET,Graph connectivity,PCR comparison,Batch correction,Bio conservation,Total
trainer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
scpoli-original-latent_dim8,0.679777,0.699521,0.518735,0.622976,0.990951,0.770398,0.298799,0.487457,0.895026,0.715642,0.633465,0.702392,0.674821
scvi-latent_dim8,0.646377,0.682785,0.484271,0.583196,0.992193,0.817921,0.221473,0.372711,0.912141,0.667839,0.598417,0.677764,0.646025


# check with 100 epochs, barlow training folds not finished

In [11]:
process_dataframe(deepcopy(all_results_new[all_results_new.fold < 5]))

Unnamed: 0_level_0,Isolated labels,KMeans NMI,KMeans ARI,Silhouette label,cLISI,Silhouette batch,iLISI,KBET,Graph connectivity,PCR comparison,Batch correction,Bio conservation,Total
trainer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
barlow-num-prot-16_hidden-64_bs-64,0.570349,0.623349,0.372065,0.551221,0.981922,0.877867,0.297556,0.372879,0.888788,0.746649,0.636748,0.619781,0.626568
scpoli-original-latent_dim8,0.625722,0.655648,0.438375,0.595093,0.985167,0.827733,0.371286,0.512998,0.885838,0.823416,0.684254,0.660001,0.669702
scvi-latent_dim8,0.574992,0.643776,0.433792,0.560379,0.986908,0.870927,0.238971,0.34075,0.907496,0.624128,0.596455,0.639969,0.622563


# custom cross validation 50 epochs

In [24]:
trainers = [scvi, scpoli, barlow]
names = [trainer.get_model_name() for trainer in trainers]

all_results_new = pd.DataFrame()
for trainer, name in zip(trainers, names):
    df = trainer.evaluate_custom_cross_val_models(trainer.query_scib_metrics)
    df['trainer'] = name
    
    all_results_new = pd.concat([all_results_new, df], ignore_index=True)
process_dataframe(deepcopy(all_results_new))

Unnamed: 0_level_0,Isolated labels,KMeans NMI,KMeans ARI,Silhouette label,cLISI,Silhouette batch,iLISI,KBET,Graph connectivity,PCR comparison,Batch correction,Bio conservation,Total
trainer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
barlow-num-prot-16_hidden-64_bs-64,0.645673,0.670994,0.464293,0.585861,0.990595,0.841663,0.258208,0.413697,0.912443,0.691157,0.623433,0.671483,0.652263
scpoli-original-latent_dim8,0.6695,0.717453,0.581726,0.626976,0.987958,0.764335,0.332197,0.562585,0.880954,0.743357,0.656686,0.716722,0.692708
scvi-latent_dim8,0.643422,0.68285,0.488541,0.585997,0.991952,0.802524,0.230281,0.388897,0.902866,0.706712,0.606256,0.678552,0.649634


# compare barlow on query, no cross validatio

In [37]:
ds = ImmuneDataset()
scvi_ = ScviTrainer(dataset = ds)
barlow_ = ScpoliProtBarlowTrainer(ds)
scpoli_ = OriginalTrainer(ds)

trainers = [scvi_, scpoli_, barlow_]
names = [trainer.get_model_name() for trainer in trainers]

all_results_new = pd.DataFrame()
for trainer, name in zip(trainers, names):
    df = trainer.query_scib_metrics()
    df['trainer'] = name
    
    all_results_new = pd.concat([all_results_new, df], ignore_index=True)

[34mINFO    [0m File [35m/home/icb/fatemehs.hashemig/models/[0m[35m/pbmc-immune/scvi-latent_dim8/[0m[95mmodel.pt[0m already downloaded         


  self.pid = os.fork()
Multiprocessing is handled by SLURM.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  self.pid = os.fork()
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
SLURM auto-requeueing enabled. Setting signal handlers.


Epoch 50/50: 100%|██████████| 50/50 [00:14<00:00,  6.63it/s, loss=7.14e+03, v_num=1]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:14<00:00,  3.55it/s, loss=7.14e+03, v_num=1]
[34mINFO    [0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup                             
[34mINFO    [0m Input AnnData not setup with scvi-tools. attempting to transfer AnnData setup                             


Computing neighbors: 100%|██████████| 1/1 [00:02<00:00,  2.01s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.65it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:01,  4.65it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:05<00:27,  3.41s/it, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:05<00:27,  3.41s/it, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:05<00:23,  3.41s/it, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:05<00:20,  3.41s/it, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:06<00:05,  1.

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:06<00:02,  1.29it/s, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:06<00:02,  1.29it/s, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:06<00:01,  1.29it/s, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:07<00:00,  7.17s/it]atch correction: pcr_comparison][A

                                                                                         [A

Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4335.18 - val_cvae_loss: 4335.18
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 18 clusters.
 |████████████████████| 100.0%  - val_loss: 4316.55 - val_cvae_loss: 4316.55 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.21


Computing neighbors: 100%|██████████| 1/1 [00:01<00:00,  1.86s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 29.68it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:02,  3.02it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:02,  3.02it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:00<00:02,  3.02it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:00<00:01,  3.02it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:00<00:01,  3.02it/s, Batch correction: ilisi_knn]       [A
Metrics:  60%|[34m██████    [0m| 6/10 [00:00<00:00,  9

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  9.52it/s, Batch correction: graph_connectivity][A
Metrics:  80%|[34m████████  [0m| 8/10 [00:01<00:00,  9.52it/s, Batch correction: pcr_comparison]    [A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:01<00:00,  1.23s/it]atch correction: pcr_comparison][A

                                                                                         [A

Embedding dictionary:
 	Num conditions: [3]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 



INFO:scarches.trainers.scpoli.trainer:GPU available: True, GPU used: True


Embedding dictionary:
 	Num conditions: [5]
 	Embedding dim: [10]
Encoder Architecture:
	Input Layer in, out and cond: 4000 64 10
	Mean/Var Layer in/out: 64 8
Decoder Architecture:
	First Layer in, out and cond:  8 64 10
	Output Layer in/out:  64 4000 

Initializing dataloaders
Starting training
 |████████████████----| 80.0%  - val_loss: 4495.09 - val_cvae_loss: 4495.09
Initializing unlabeled prototypes with Leiden with an unknown number of  clusters.
Clustering succesful. Found 20 clusters.
 |████████████████████| 100.0%  - val_loss: 4467.01 - val_cvae_loss: 4467.01 - val_prototype_loss:    0.00 - val_unlabeled_loss:    0.38


Computing neighbors: 100%|██████████| 1/1 [00:01<00:00,  1.69s/it]
Embeddings:   0%|[32m          [0m| 0/1 [00:00<?, ?it/s]
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s][A
Metrics:   0%|[34m          [0m| 0/10 [00:00<?, ?it/s, Bio conservation: isolated_labels][A
Metrics:  10%|[34m█         [0m| 1/10 [00:00<00:00, 30.03it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:02,  3.02it/s, Bio conservation: nmi_ari_cluster_labels_kmeans][A
Metrics:  20%|[34m██        [0m| 2/10 [00:00<00:02,  3.02it/s, Bio conservation: silhouette_label]             [A
Metrics:  30%|[34m███       [0m| 3/10 [00:00<00:02,  3.02it/s, Bio conservation: clisi_knn]       [A
Metrics:  40%|[34m████      [0m| 4/10 [00:00<00:01,  3.02it/s, Batch correction: silhouette_batch][A
Metrics:  50%|[34m█████     [0m| 5/10 [00:00<00:01,  3.02it/s, Batch correction: ilisi_knn]       [A
Metrics:  60%|[34m██████    [0m| 6/10 [00:00<00:01,  3

[34mINFO    [0m CD20+ B cells consists of a single batch or is too small. Skip.                                           
[34mINFO    [0m CD4+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m CD8+ T cells consists of a single batch or is too small. Skip.                                            
[34mINFO    [0m Megakaryocyte progenitors consists of a single batch or is too small. Skip.                               
[34mINFO    [0m Monocyte-derived dendritic cells consists of a single batch or is too small. Skip.                        
[34mINFO    [0m NK cells consists of a single batch or is too small. Skip.                                                
[34mINFO    [0m NKT cells consists of a single batch or is too small. Skip.                                               



Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  6.36it/s, Batch correction: kbet_per_label][A
Metrics:  70%|[34m███████   [0m| 7/10 [00:01<00:00,  6.36it/s, Batch correction: graph_connectivity][A
Embeddings: 100%|[32m██████████[0m| 1/1 [00:01<00:00,  1.23s/it]atch correction: pcr_comparison]    [A

                                                                                         [A

In [38]:
all_results_new

Unnamed: 0,Isolated labels,KMeans NMI,KMeans ARI,Silhouette label,cLISI,Silhouette batch,iLISI,KBET,Graph connectivity,PCR comparison,Batch correction,Bio conservation,Total,trainer
0,0.612536,0.700514,0.513212,0.563858,0.986493,0.688066,0.0,0.636134,0.908591,0.688592,0.584277,0.675323,0.638904,scvi-latent_dim8
1,Bio conservation,Bio conservation,Bio conservation,Bio conservation,Bio conservation,Batch correction,Batch correction,Batch correction,Batch correction,Batch correction,Aggregate score,Aggregate score,Aggregate score,scvi-latent_dim8
2,0.662704,0.677848,0.473281,0.621808,0.988315,0.624368,0.0,0.622111,0.888645,0.550187,0.537062,0.684791,0.6257,scpoli-original-latent_dim8
3,Bio conservation,Bio conservation,Bio conservation,Bio conservation,Bio conservation,Batch correction,Batch correction,Batch correction,Batch correction,Batch correction,Aggregate score,Aggregate score,Aggregate score,scpoli-original-latent_dim8
4,0.616918,0.738735,0.582646,0.576947,0.980729,0.656629,0.0,0.605909,0.932919,0.560846,0.551261,0.699195,0.640021,barlow-num-prot-16_hidden-64_bs-64
5,Bio conservation,Bio conservation,Bio conservation,Bio conservation,Bio conservation,Batch correction,Batch correction,Batch correction,Batch correction,Batch correction,Aggregate score,Aggregate score,Aggregate score,barlow-num-prot-16_hidden-64_bs-64
