In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import multigrate as mtg
import scanpy as sc

sc.logging.print_versions()

# Metrics

## Gayoso 2020

In [2]:
latent = sc.read('../data/integrated/multigrate/gayoso-multigrate.h5ad')
latent

AnnData object with n_obs × n_vars = 30293 × 20
    obs: 'batch_indices', 'n_genes', 'percent_mito', 'leiden_subclusters', 'cell_types', 'tissue', 'batch', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts'
    uns: 'cell_types_colors', 'modality_colors', 'neighbors', 'tissue_colors', 'umap'
    obsm: 'X_umap'
    obsp: 'connectivities', 'distances'

In [3]:
latent.obsm['latent'] = latent.X

In [4]:
metrics = mtg.metrics.metrics(None, latent,
                      batch_key='batch_indices',
                      label_key='cell_types',
                      isolated_label_f1=False,
                      pcr_batch=False,
                      embed='latent', 
                      save='gayoso-multigrate.csv', 
                      method='multigrate')
metrics

Clustering...
ASW label/batch...
Graph connectivity...
ASW label...
NMI cluster/label...
ARI cluster/label...
Isolated label silhouette...


Unnamed: 0,score
ASW_label/batch,0.925038
graph_conn,0.804275
ASW_label,0.5089
NMI_cluster/label,0.656237
ARI_cluster/label,0.596656
isolated_label_silhouette,0.588113


## Kotliarov 2020

In [5]:
latent = sc.read('../data/integrated/multigrate/kotliarov-multigrate.h5ad')
latent

AnnData object with n_obs × n_vars = 52117 × 20
    obs: 'batch', 'cluster_level2', 'cluster_level3', 'sample', 'cell_type', 'n_genes', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt'
    uns: 'cell_type_colors', 'cluster_level2_colors', 'cluster_level3_colors', 'modality_colors', 'neighbors', 'umap'
    obsm: 'X_umap'
    obsp: 'connectivities', 'distances'

In [6]:
latent.obsm['latent'] = latent.X

In [7]:
metrics = mtg.metrics.metrics(None, latent,
                      batch_key='batch',
                      label_key='cell_type',
                      isolated_label_f1=False,
                      pcr_batch=False,
                      embed='latent', 
                      save='kotliarov-multigrate.csv', 
                      method='multigrate')
metrics

Clustering...
ASW label/batch...
Graph connectivity...
ASW label...
NMI cluster/label...
ARI cluster/label...
Isolated label silhouette...


Unnamed: 0,score
ASW_label/batch,0.913678
graph_conn,0.998576
ASW_label,0.697682
NMI_cluster/label,0.938181
ARI_cluster/label,0.940018
isolated_label_silhouette,0.578582


## Hao 2020

In [8]:
latent = sc.read('../data/integrated/multigrate/hao-multigrate.h5ad')
latent

AnnData object with n_obs × n_vars = 161764 × 20
    obs: 'nCount_ADT', 'nFeature_ADT', 'nCount_RNA', 'nFeature_RNA', 'orig.ident', 'lane', 'donor', 'time', 'celltype.l1', 'celltype.l2', 'celltype.l3', 'Phase', 'cell_type'
    uns: 'Phase_colors', 'cell_type_colors', 'celltype.l1_colors', 'celltype.l2_colors', 'celltype.l3_colors', 'modality_colors', 'neighbors', 'umap'
    obsm: 'X_umap'
    obsp: 'connectivities', 'distances'

In [9]:
latent.obsm['latent'] = latent.X

In [10]:
metrics = mtg.metrics.metrics(None, latent,
                      batch_key='donor',
                      label_key='cell_type',
                      isolated_label_f1=False,
                      pcr_batch=False,
                      embed='latent', 
                      save='hao-multigrate.csv', 
                      method='multigrate')
metrics

Clustering...
ASW label/batch...
Graph connectivity...
ASW label...
NMI cluster/label...
ARI cluster/label...
Isolated label silhouette...


Unnamed: 0,score
ASW_label/batch,0.866077
graph_conn,0.872918
ASW_label,0.69321
NMI_cluster/label,0.918143
ARI_cluster/label,0.926838
isolated_label_silhouette,0.591423


## 10xpbmc10k 2020

In [11]:
latent = sc.read('../data/integrated/multigrate/10xpbmc10k-multigrate.h5ad')
latent

AnnData object with n_obs × n_vars = 10000 × 20
    obs: 'modality', 'barcode', 'cell_type', 'batch'
    uns: 'cell_type_colors', 'modality_colors', 'neighbors', 'umap'
    obsm: 'X_umap'
    obsp: 'connectivities', 'distances'

In [12]:
latent.obsm['latent'] = latent.X

In [13]:
metrics = mtg.metrics.metrics(None, latent,
                      batch_key=None,
                      label_key='cell_type',
                      isolated_label_f1=False,
                      isolated_label_asw=False,
                      pcr_batch=False,
                      asw_batch=False,
                      embed='latent', 
                      save='10xpbmc10k-multigrate.csv', 
                      method='multigrate')
metrics

Clustering...
Graph connectivity...
ASW label...
NMI cluster/label...
ARI cluster/label...


Unnamed: 0,score
graph_conn,0.992794
ASW_label,0.62667
NMI_cluster/label,0.817137
ARI_cluster/label,0.744744


## Chen 2019

In [14]:
latent = sc.read('../data/integrated/multigrate/chen-multigrate.h5ad')
latent

AnnData object with n_obs × n_vars = 4793 × 20
    obs: 'Batch', 'Barcode', 'cell_type', 'n_genes', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt'
    uns: 'cell_type_colors', 'modality_colors', 'neighbors', 'umap'
    obsm: 'X_umap'
    obsp: 'connectivities', 'distances'

In [15]:
latent.obsm['latent'] = latent.X

In [16]:
metrics = mtg.metrics.metrics(None, latent,
                      batch_key=None,
                      label_key='cell_type',
                      isolated_label_f1=False,
                      isolated_label_asw=False,
                      pcr_batch=False,
                      asw_batch=False,
                      embed='latent', 
                      save='chen-multigrate.csv', 
                      method='multigrate')
metrics

Clustering...
Graph connectivity...
ASW label...
NMI cluster/label...
ARI cluster/label...


Unnamed: 0,score
graph_conn,0.775878
ASW_label,0.508659
NMI_cluster/label,0.435501
ARI_cluster/label,0.316429


## Cao 2018

In [17]:
latent = sc.read('../data/integrated/multigrate/cao-multigrate.h5ad')
latent

AnnData object with n_obs × n_vars = 7362 × 20
    obs: 'source', 'replicate', 'experiment', 'tsne_1', 'tsne_2', 'cell_type', 'n_genes', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'total_counts_mt', 'log1p_total_counts_mt', 'pct_counts_mt'
    uns: 'cell_type_colors', 'modality_colors', 'neighbors', 'umap'
    obsm: 'X_umap'
    obsp: 'connectivities', 'distances'

In [18]:
latent.obsm['latent'] = latent.X

In [19]:
metrics = mtg.metrics.metrics(None, latent,
                      batch_key='replicate',
                      label_key='cell_type',
                      isolated_label_f1=False,
                      pcr_batch=False,
                      embed='latent', 
                      save='cao-multigrate.csv', 
                      method='multigrate')
metrics

Clustering...
ASW label/batch...
Graph connectivity...
ASW label...
NMI cluster/label...
ARI cluster/label...
Isolated label silhouette...


Unnamed: 0,score
ASW_label/batch,0.939367
graph_conn,0.736478
ASW_label,0.569671
NMI_cluster/label,0.623155
ARI_cluster/label,0.567548
isolated_label_silhouette,0.545964
