In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import scanpy as sc
import pandas as pd
import sklearn.metrics as metrics

Import true and predicted Lymphoma data and preprocess

In [None]:
true_RNA = sc.read_h5ad("/workspace/scButterfly/data/lymphoma/lymphoma_RNA_pp.h5ad") # all genes
true_ATAC = sc.read_h5ad("/workspace/scButterfly/data/lymphoma/lymphoma_ATAC_pp.h5ad") #all peaks
peak_cell_counts = np.array((true_ATAC.X > 0).sum(axis=0)).flatten()
cell_fraction = peak_cell_counts / true_ATAC.n_obs
min_fraction = 0.005
keep_peaks = cell_fraction >= min_fraction
true_ATAC = true_ATAC[:, keep_peaks]
sc.pp.pca(true_ATAC, n_comps=50)
sc.pp.neighbors(true_ATAC, n_neighbors=15, use_rep='X_pca')
sc.tl.umap(true_ATAC, random_state=22)

pred_RNA_sb = sc.read_h5ad("/workspace/scButterfly/data/lymphoma//predicted/pred_RNA_lymphoma2.h5ad")
pred_ATAC_sb = sc.read_h5ad("/workspace/scButterfly/data/lymphoma//predicted/pred_ATAC_lymphoma.h5ad")
sc.pp.pca(pred_ATAC_sb, n_comps=50)
sc.pp.neighbors(pred_ATAC_sb, n_neighbors=15, use_rep='X_pca')
sc.tl.umap(pred_ATAC_sb, random_state=22)
true_ATAC_sb = true_ATAC[pred_ATAC_sb.obs_names].copy()

true_RNA_sb = true_RNA[pred_RNA_sb.obs_names].copy()
true_ATAC_sb = true_ATAC[pred_ATAC_sb.obs_names].copy()

pred_RNA_babel = sc.read_h5ad("/workspace/babel/mymodel/atac_rna_test_preds_pp.h5ad") #hv
pred_ATAC_babel = sc.read_h5ad("/workspace/babel/mymodel/rna_atac_test_preds.h5ad")
sc.pp.pca(pred_ATAC_babel, n_comps=50)
sc.pp.neighbors(pred_ATAC_babel, n_neighbors=15, use_rep='X_pca')
sc.tl.umap(pred_ATAC_babel, random_state=22)
pred_ATAC_babel.obs = pd.concat([pred_ATAC_babel.obs, true_ATAC.obs.loc[pred_ATAC_babel.obs_names,'Cell Types']], axis=1)
true_ATAC_babel = true_ATAC[pred_ATAC_babel.obs_names].copy()

pred_RNA_polar = sc.read_text('/workspace/Polarbear/data/output_first_run/outputfirst_train_test_rnanorm_pred.txt')
test_barcodes = pd.read_csv('/workspace/Polarbear/data/output_first_run/outputfirst_train_test_barcodes.txt', sep='\t', header=None)
pred_RNA_polar.obs_names = test_barcodes[0].values
pred_RNA_polar.obs = pd.concat([pred_RNA_polar.obs, true_RNA.obs.loc[pred_RNA_polar.obs_names,'Cell Types']], axis=1)
pred_RNA_polar.var_names = true_RNA.var_names
pred_RNA_polar.var = pd.concat([pred_RNA_polar.var, true_RNA.var[['gene_ids', 'feature_types', 'genome']]], axis=1)
sc.pp.highly_variable_genes(pred_RNA_polar, n_top_genes=3000)
sc.pp.pca(pred_RNA_polar, n_comps=50)
sc.pp.neighbors(pred_RNA_polar, n_neighbors=15, use_rep='X_pca')
sc.tl.umap(pred_RNA_polar, random_state=22)


group_mapping = {
'Tumor B': 'Tumour B-cells',
 'T': 'T-cells', 
 'T cycling': 'T-cells',
 'Mono': 'Monocytes',
 'low GEX mix': 'Mixed cells',
 'Tumor B cycling': 'Tumour B-cells',
 'B': 'B-cells',
 'Mono/B mix': 'Mixed cells',
 ' B/T mix': 'Mixed cells',
 'Mono/T mix': 'Monocytes',
 'unknown mix': 'Mixed cells',
 'pDC': 'Dendritic cells',
 'Stromal cells': 'Stromal cells',
 'Fibroblasts': 'Fibroblasts'
}

true_RNA_sb.obs['Cell Type Plot'] = true_RNA_sb.obs['Cell Types'].map(group_mapping)
true_ATAC_sb.obs['Cell Type Plot'] = true_ATAC_sb.obs['Cell Types'].map(group_mapping)
pred_RNA_sb.obs['Cell Type Plot'] = pred_RNA_sb.obs['Cell Types'].map(group_mapping)
pred_ATAC_sb.obs['Cell Type Plot'] = pred_ATAC_sb.obs['Cell Types'].map(group_mapping)
pred_RNA_babel.obs['Cell Type Plot'] = pred_RNA_babel.obs['Cell Types'].map(group_mapping)
pred_ATAC_babel.obs['Cell Type Plot'] = pred_ATAC_babel.obs['Cell Types'].map(group_mapping)
pred_RNA_polar.obs['Cell Type Plot'] = pred_RNA_polar.obs['Cell Types'].map(group_mapping)

Import true and predicted BMMC data and preprocess

In [None]:
true_RNA_bmmc = sc.read_h5ad("/workspace/scButterfly/data/bmmc/ds_RNA_bmmc.h5ad") # all genes
true_ATAC_bmmc = sc.read_h5ad("/workspace/scButterfly/data/bmmc/ds_ATAC_bmmc.h5ad") # all peaks
peak_cell_counts = np.array((true_ATAC_bmmc.X > 0).sum(axis=0)).flatten()
cell_fraction = peak_cell_counts / true_ATAC_bmmc.n_obs
min_fraction = 0.005
keep_peaks = cell_fraction >= min_fraction
true_ATAC_bmmc = true_ATAC_bmmc[:, keep_peaks]
sc.pp.pca(true_ATAC_bmmc, n_comps=50)
sc.pp.neighbors(true_ATAC_bmmc, n_neighbors=15, use_rep='X_pca')
sc.tl.umap(true_ATAC_bmmc, random_state=22)

pred_RNA_bmmc_sb = sc.read_h5ad("/workspace/scButterfly/data/bmmc/pred_RNA_bmmc.h5ad")
pred_ATAC_bmmc_sb = sc.read_h5ad("/workspace/scButterfly/data/bmmc/pred_ATAC_bmmc.h5ad")
sc.pp.pca(pred_RNA_bmmc_sb, n_comps=50)
sc.pp.neighbors(pred_RNA_bmmc_sb, n_neighbors=15, use_rep='X_pca')
sc.tl.umap(pred_RNA_bmmc_sb, random_state=22)
sc.pp.pca(pred_ATAC_bmmc_sb, n_comps=50)
sc.pp.neighbors(pred_ATAC_bmmc_sb, n_neighbors=15, use_rep='X_pca')
sc.tl.umap(pred_ATAC_bmmc_sb, random_state=22)

true_RNA_bmmc_test = true_RNA_bmmc[pred_RNA_bmmc_sb.obs_names].copy()
true_ATAC_bmmc_test = true_ATAC_bmmc[pred_ATAC_bmmc_sb.obs_names].copy()

pred_RNA_bmmc_polar = sc.read_text('/workspace/Polarbear/data/output_bmmc_run/output_bmmc_train_test_rnanorm_pred.txt')
test_barcodes = pd.read_csv('/workspace/Polarbear/data/output_bmmc_run/output_bmmc_train_test_barcodes.txt', sep='\t', header=None)
pred_RNA_bmmc_polar.obs_names = test_barcodes[0].values
pred_RNA_bmmc_polar.obs = pd.concat([pred_RNA_bmmc_polar.obs, true_RNA_bmmc.obs.loc[pred_RNA_bmmc_polar.obs_names,'cell_type']], axis=1)
pred_RNA_bmmc_polar.var_names = true_RNA_bmmc.var_names
pred_RNA_bmmc_polar.var = pd.concat([pred_RNA_bmmc_polar.var, true_RNA_bmmc.var[['gene_id', 'feature_types']]], axis=1)
sc.pp.log1p(pred_RNA_bmmc_polar)
sc.pp.highly_variable_genes(pred_RNA_bmmc_polar, n_top_genes=3000)
sc.pp.pca(pred_RNA_bmmc_polar, n_comps=50)
sc.pp.neighbors(pred_RNA_bmmc_polar, n_neighbors=15, use_rep='X_pca')
sc.tl.umap(pred_RNA_bmmc_polar, random_state=22)

pred_ATAC_polar_embed = sc.read_text('/workspace/Polarbear/data/output_bmmc_run/output_bmmc_train_test_atac_embedding_on_atacVAE.txt')
test_barcodes = pd.read_csv('/workspace/Polarbear/data/output_bmmc_run/output_bmmc_train_test_barcodes.txt', sep='\t', header=None)
pred_ATAC_polar_embed.obs_names = test_barcodes[0].values
pred_ATAC_polar_embed.obs = pd.concat([pred_ATAC_polar_embed.obs, true_ATAC_bmmc.obs.loc[pred_ATAC_polar_embed.obs_names,'cell_type']], axis=1)
sc.pp.highly_variable_genes(pred_ATAC_polar_embed, n_top_genes=3000)
sc.pp.pca(pred_ATAC_polar_embed, n_comps=10)
sc.pp.neighbors(pred_ATAC_polar_embed, n_neighbors=15, use_rep='X_pca')
sc.tl.umap(pred_ATAC_polar_embed, random_state=22)

group_mapping_bmmc = {
'B1 B':'B1 B-cells',
 'CD4+ T activated':'CD4+ T-cells',
 'CD4+ T naive': 'CD4+ T-cells',
 'CD8+ T': 'CD8+ T-cells',
 'CD8+ T naive': 'CD8+ T-cells',
 'CD14+ Mono': ' CD14+ Monocytes',
 'CD16+ Mono': 'CD16+ Monocytes',
 'Erythroblast': 'Erythroblasts',
 'G/M prog': 'Progenitor cells',
 'HSC': 'Hematopoietic Stem Cells',
 'ID2-hi myeloid prog': 'Progenitor cells',
 'ILC': 'Innate Lymphoid Cells',
 'Lymph prog': 'Progenitor cells',
 'MK/E prog': 'Progenitor cells',
 'NK': 'NK cells',
 'Naive CD20+ B': 'CD20+ B-cells',
 'Normoblast': 'Normoblasts',
 'Plasma cell': 'Plasma cells',
 'Proerythroblast': 'Proerythroblasts',
 'Transitional B': 'CD20+ B-cells',
 'cDC2': 'Dendritic cells',
 'pDC': 'Dendritic cells',
}


true_RNA_bmmc_test.obs['Cell Type Plot'] = true_RNA_bmmc_test.obs['cell_type'].map(group_mapping_bmmc)
true_ATAC_bmmc_test.obs['Cell Type Plot'] = true_ATAC_bmmc_test.obs['cell_type'].map(group_mapping_bmmc)
pred_RNA_bmmc_sb.obs['Cell Type Plot'] = pred_RNA_bmmc_sb.obs['cell_type'].map(group_mapping_bmmc)
pred_ATAC_bmmc_sb.obs['Cell Type Plot'] = pred_ATAC_bmmc_sb.obs['cell_type'].map(group_mapping_bmmc)
pred_RNA_bmmc_polar.obs['Cell Type Plot'] = pred_RNA_bmmc_polar.obs['cell_type'].map(group_mapping_bmmc)
pred_ATAC_polar_embed.obs['Cell Type Plot'] = pred_ATAC_polar_embed.obs['cell_type'].map(group_mapping_bmmc)

Define colour palette for UMAPs

In [None]:
paired_colors = [
    '#fb9a99',  # light red/B cells
    '#33a02c',  # green/dendritic
    '#6a3d9a',  # purple/ Fibroblasts
    '#ff7f00',  # orange/Mixed
    '#b2df8a',  # light green/ mono
    '#a6cee3',  # light blue/Stromal
    '#fdbf6f',  # light orange/T cells
    '#cab2d6',  # light purple/Tumour B cells
    '#1f78b4',  # blue
    '#e31a1c',  # red
    '#b15928',  # brown
    '#ffff99',  # soft yellow
    '#8dd3c7',  # turquoise / calming
    '#bc80bd',  # mauve / dusty purple
    '#fb8072',  # coral / warm salmon
    '#80b1d3',  # steel blue / cool tone
]

Visualize Lymphoma RNA UMAPs

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(12, 8))

axs = axs.flatten()

palette = paired_colors 


sc.pl.umap(true_RNA_sb, color='Cell Type Plot', palette=palette, ax=axs[0], show=False, frameon=False, legend_loc=None)
sc.pl.umap(pred_RNA_babel, color='Cell Type Plot', palette=palette, ax=axs[1], show=False, frameon=False, legend_loc=None)
sc.pl.umap(pred_RNA_sb, color='Cell Type Plot', palette=palette, ax=axs[2], show=False, frameon=False, legend_loc=None)
sc.pl.umap(pred_RNA_polar, color='Cell Type Plot', palette=palette, ax=axs[3], show=False, frameon=False, legend_loc=None)

titles = ['True RNA', 'Predicted BABEL', 'Predicted scButterfly', 'Predicted Polarbear']

for ax, title in zip(axs, titles):
    ax.set_title(title, fontsize=16, fontname='Times New Roman')
    ax.axis('off')


cell_types = true_RNA_sb.obs['Cell Type Plot'].cat.categories
colors = true_RNA_sb.uns['Cell Type Plot_colors']

handles = [
    plt.Line2D([], [], marker="s", linestyle="", color=color, label=label, markersize=8)
    for label, color in zip(cell_types, colors)
]


fig.legend(
    handles=handles,
    loc='center right',
    bbox_to_anchor=(1.02, 0.5),
    ncol=1,
    frameon=False,
    labelspacing=1.5,
    handletextpad=0.5,
    fontsize=16
)

plt.tight_layout()
plt.subplots_adjust(right=0.85)
plt.savefig('/workspace/Benchmarking/figures/umap_rna_lymphoma.svg', format='svg', bbox_inches='tight')
plt.show()

Visualize Lymphoma ATAC UMAPs

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(14, 10))
axs = axs.flatten()

palette = paired_colors

sc.pl.umap(true_ATAC_sb, color='Cell Type Plot', palette=palette, ax=axs[0], show=False, frameon=False, legend_loc=None)
sc.pl.umap(pred_ATAC_babel, color='Cell Type Plot', palette=palette, ax=axs[1], show=False, frameon=False, legend_loc=None)
sc.pl.umap(pred_ATAC_sb, color='Cell Type Plot', palette=palette, ax=axs[2], show=False, frameon=False, legend_loc=None)

titles = ['True ATAC', 'Predicted BABEL', 'Predicted scButterfly']
for ax, title in zip(axs[:3], titles):
    ax.set_title(title, fontsize=16)
    ax.axis('off')

cell_types = true_RNA_test.obs['Cell Type Plot'].cat.categories
colors = true_RNA_test.uns['Cell Type Plot_colors']

handles = [
    plt.Line2D([], [], marker="s", linestyle="", color=color, label=label, markersize=8)
    for label, color in zip(cell_types, colors)
]

axs[3].axis('off')
legend = axs[3].legend(
    handles=handles,
    loc='center',
    frameon=False,
    labelspacing=1.3,
    handletextpad=0.5,
    ncol=2,
    fontsize=16
)

plt.tight_layout()
plt.subplots_adjust(right=0.85)
plt.savefig('/workspace/Benchmarking/figures/umap_atac_lymphoma.svg', format='svg', bbox_inches='tight')
plt.show()

Visualize BMMC RNA UMAPs

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(14, 10))
axs = axs.flatten()

palette = paired_colors 

sc.pl.umap(true_RNA_bmmc_test, color='Cell Type Plot', palette=palette, ax=axs[0], show=False, frameon=False, legend_loc=None)
sc.pl.umap(pred_RNA_bmmc_sb, color='Cell Type Plot', palette=palette, ax=axs[1], show=False, frameon=False, legend_loc=None)
sc.pl.umap(pred_RNA_bmmc_polar, color='Cell Type Plot', palette=palette, ax=axs[2], show=False, frameon=False, legend_loc=None)
sc.pl.umap(pred_ATAC_polar_embed, color='Cell Type Plot', palette=palette, ax=axs[3], show=False, frameon=False, legend_loc=None)

titles = ['True RNA', 'Predicted scButterfly', 'Predicted Polarbear', 'ATAC Embeddings Polarbear']

for ax, title in zip(axs, titles):
    ax.set_title(title, fontsize=16)
    ax.axis('off')


cell_types = true_RNA_bmmc_test.obs['Cell Type Plot'].cat.categories
colors = true_RNA_bmmc_test.uns['Cell Type Plot_colors']

handles = [
    plt.Line2D([], [], marker="s", linestyle="", color=color, label=label, markersize=8)
    for label, color in zip(cell_types, colors)
]

fig.legend(
    handles=handles,
    loc='center right',
    bbox_to_anchor=(1.1, 0.5),
    ncol=1,
    frameon=False,
    labelspacing=1.5,
    handletextpad=0.5,
    fontsize=16
)

plt.tight_layout()
plt.subplots_adjust(right=0.85)
plt.savefig('/workspace/Benchmarking/figures/umap_rna_bmmc.svg', format='svg', bbox_inches='tight')
plt.show()

Visualize BMMC ATAC UMAPs

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(9, 4))


axs = axs.flatten()

palette = paired_colors 

sc.pl.umap(true_ATAC_bmmc_test, color='Cell Type Plot', palette=palette, ax=axs[0], show=False, frameon=False, legend_loc=None)
sc.pl.umap(pred_ATAC_bmmc_sb, color='Cell Type Plot', palette=palette, ax=axs[1], show=False, frameon=False, legend_loc=None)

titles = ['True ATAC', 'Predicted scButterfly']
for ax, title in zip(axs, titles):
    ax.set_title(title, fontsize=16)
    ax.axis('off')

cell_types = true_ATAC_bmmc_test.obs['Cell Type Plot'].cat.categories
colors = true_ATAC_bmmc_test.uns['Cell Type Plot_colors']

handles = [
    plt.Line2D([], [], marker="s", linestyle="", color=color, label=label, markersize=8)
    for label, color in zip(cell_types, colors)
]
labels = list(cell_types) 

fig.legend(
    handles=handles,
    labels=labels,
    loc='center right',
    bbox_to_anchor=(1.64, 0.5), 
    ncol=2,
    frameon=False,
    fontsize=16,
)
plt.tight_layout()
plt.subplots_adjust(right=0.85)
plt.savefig('/workspace/Benchmarking/figures/umap_atac_bmmc.svg', format='svg', bbox_inches='tight')
plt.show()

Calculate the classical clustering metrics ARI, AMI, NMI and HOM for Lymphoma predictions

In [None]:
# Remove NA values from 'Cell Types' column (from predicted data) and do Leiden clustering
pred_RNA_sb_c = pred_RNA_sb[~pred_RNA_sb.obs['Cell Types'].isna()].copy()
pred_ATAC_sb_c = pred_ATAC_sb[~pred_ATAC_sb.obs['Cell Types'].isna()].copy()
pred_RNA_babel_c = pred_RNA_babel[~pred_RNA_babel.obs['Cell Types'].isna()].copy()
pred_ATAC_babel_c = pred_ATAC_babel[~pred_ATAC_babel.obs['Cell Types'].isna()].copy()
pred_RNA_polar_c = pred_RNA_polar[~pred_RNA_polar.obs['Cell Types'].isna()].copy()

sc.tl.leiden(pred_RNA_sb_c, resolution=0.5, key_added='leiden_0.5')
sc.tl.leiden(pred_ATAC_babel_c, resolution=0.5, key_added='leiden_0.5')
sc.tl.leiden(pred_ATAC_sb_c, resolution=0.5, key_added='leiden_0.5')
sc.tl.leiden(pred_RNA_babel_c, resolution=0.5, key_added='leiden_0.5')
sc.tl.leiden(pred_RNA_polar_c, resolution=0.5, key_added='leiden_0.5')

In [None]:
#RNA sb
ARI = metrics.adjusted_rand_score(pred_RNA_sb_c.obs['Cell Types'], pred_RNA_sb_c.obs['leiden_0.5'])
AMI = metrics.adjusted_mutual_info_score(pred_RNA_sb_c.obs['Cell Types'], pred_RNA_sb_c.obs['leiden_0.5'])
NMI = metrics.normalized_mutual_info_score(pred_RNA_sb_c.obs['Cell Types'], pred_RNA_sb_c.obs['leiden_0.5'])
HOM = metrics.homogeneity_score(pred_RNA_sb_c.obs['Cell Types'], pred_RNA_sb_c.obs['leiden_0.5'])
print('RNA_sb: ARI: %.3f, \tAMI: %.3f, \tNMI: %.3f, \tHOM: %.3f' % (ARI, AMI, NMI, HOM))

#ATAC sb
ARI = metrics.adjusted_rand_score(pred_ATAC_sb_c.obs['Cell Types'], pred_ATAC_sb_c.obs['leiden_0.5'])
AMI = metrics.adjusted_mutual_info_score(pred_ATAC_sb_c.obs['Cell Types'], pred_ATAC_sb_c.obs['leiden_0.5'])
NMI = metrics.normalized_mutual_info_score(pred_ATAC_sb_c.obs['Cell Types'], pred_ATAC_sb_c.obs['leiden_0.5'])
HOM = metrics.homogeneity_score(pred_ATAC_sb_c.obs['Cell Types'], pred_ATAC_sb_c.obs['leiden_0.5'])
print('ATAC_sb: ARI: %.3f, \tAMI: %.3f, \tNMI: %.3f, \tHOM: %.3f' % (ARI, AMI, NMI, HOM))

#RNA babel
ARI = metrics.adjusted_rand_score(pred_RNA_babel_c.obs['Cell Types'], pred_RNA_babel_c.obs['leiden_0.5'])
AMI = metrics.adjusted_mutual_info_score(pred_RNA_babel_c.obs['Cell Types'], pred_RNA_babel_c.obs['leiden_0.5'])
NMI = metrics.normalized_mutual_info_score(pred_RNA_babel_c.obs['Cell Types'], pred_RNA_babel_c.obs['leiden_0.5'])
HOM = metrics.homogeneity_score(pred_RNA_babel_c.obs['Cell Types'], pred_RNA_babel_c.obs['leiden_0.5'])
print('RNA_babel: ARI: %.3f, \tAMI: %.3f, \tNMI: %.3f, \tHOM: %.3f' % (ARI, AMI, NMI, HOM))

#ATAC babel
ARI = metrics.adjusted_rand_score(pred_ATAC_babel_c.obs['Cell Types'], pred_ATAC_babel_c.obs['leiden_0.5'])
AMI = metrics.adjusted_mutual_info_score(pred_ATAC_babel_c.obs['Cell Types'], pred_ATAC_babel_c.obs['leiden_0.5'])
NMI = metrics.normalized_mutual_info_score(pred_ATAC_babel_c.obs['Cell Types'], pred_ATAC_babel_c.obs['leiden_0.5'])
HOM = metrics.homogeneity_score(pred_ATAC_babel_c.obs['Cell Types'], pred_ATAC_babel_c.obs['leiden_0.5'])
print('ATAC_babel: ARI: %.3f, \tAMI: %.3f, \tNMI: %.3f, \tHOM: %.3f' % (ARI, AMI, NMI, HOM))

#RNA polar
ARI = metrics.adjusted_rand_score(pred_RNA_polar_c.obs['Cell Types'], pred_RNA_polar_c.obs['leiden_0.5'])
AMI = metrics.adjusted_mutual_info_score(pred_RNA_polar_c.obs['Cell Types'], pred_RNA_polar_c.obs['leiden_0.5'])
NMI = metrics.normalized_mutual_info_score(pred_RNA_polar_c.obs['Cell Types'], pred_RNA_polar_c.obs['leiden_0.5'])
HOM = metrics.homogeneity_score(pred_RNA_polar_c.obs['Cell Types'], pred_RNA_polar_c.obs['leiden_0.5'])
print('RNA_polar: ARI: %.3f, \tAMI: %.3f, \tNMI: %.3f, \tHOM: %.3f' % (ARI, AMI, NMI, HOM))

Calculate the classical clustering metrics ARI, AMI, NMI and HOM for BMMC predictions

In [None]:
pred_RNA_bmmc_sb_c = pred_RNA_bmmc_sb[~pred_RNA_bmmc_sb.obs['cell_type'].isna()].copy()
pred_ATAC_bmmc_sb_c = pred_ATAC_bmmc_sb[~pred_ATAC_bmmc_sb.obs['cell_type'].isna()].copy()
pred_RNA_bmmc_polar_c = pred_RNA_bmmc_polar[~pred_RNA_bmmc_polar.obs['cell_type'].isna()].copy()

sc.tl.leiden(pred_ATAC_bmmc_sb_c, resolution=0.5, key_added='leiden_0.5')
sc.tl.leiden(pred_RNA_bmmc_polar_c, resolution=0.5, key_added='leiden_0.5')
sc.tl.leiden(pred_RNA_bmmc_sb_c, resolution=0.5, key_added='leiden_0.5')

In [None]:
#RNA sb
ARI = metrics.adjusted_rand_score(pred_RNA_bmmc_sb_c.obs['cell_type'], pred_RNA_bmmc_sb_c.obs['leiden_0.5'])
AMI = metrics.adjusted_mutual_info_score(pred_RNA_bmmc_sb_c.obs['cell_type'], pred_RNA_bmmc_sb_c.obs['leiden_0.5'])
NMI = metrics.normalized_mutual_info_score(pred_RNA_bmmc_sb_c.obs['cell_type'], pred_RNA_bmmc_sb_c.obs['leiden_0.5'])
HOM = metrics.homogeneity_score(pred_RNA_bmmc_sb_c.obs['cell_type'], pred_RNA_bmmc_sb_c.obs['leiden_0.5'])
print('RNA_sb: ARI: %.3f, \tAMI: %.3f, \tNMI: %.3f, \tHOM: %.3f' % (ARI, AMI, NMI, HOM))

#ATAC sb
ARI = metrics.adjusted_rand_score(pred_ATAC_bmmc_sb_c.obs['cell_type'], pred_ATAC_bmmc_sb_c.obs['leiden_0.5'])
AMI = metrics.adjusted_mutual_info_score(pred_ATAC_bmmc_sb_c.obs['cell_type'], pred_ATAC_bmmc_sb_c.obs['leiden_0.5'])
NMI = metrics.normalized_mutual_info_score(pred_ATAC_bmmc_sb_c.obs['cell_type'], pred_ATAC_bmmc_sb_c.obs['leiden_0.5'])
HOM = metrics.homogeneity_score(pred_ATAC_bmmc_sb_c.obs['cell_type'], pred_ATAC_bmmc_sb_c.obs['leiden_0.5'])
print('ATAC_sb: ARI: %.3f, \tAMI: %.3f, \tNMI: %.3f, \tHOM: %.3f' % (ARI, AMI, NMI, HOM))

#RNA polar
ARI = metrics.adjusted_rand_score(pred_RNA_bmmc_polar_c.obs['cell_type'], pred_RNA_bmmc_polar_c.obs['leiden_0.5'])
AMI = metrics.adjusted_mutual_info_score(pred_RNA_bmmc_polar_c.obs['cell_type'], pred_RNA_bmmc_polar_c.obs['leiden_0.5'])
NMI = metrics.normalized_mutual_info_score(pred_RNA_bmmc_polar_c.obs['cell_type'], pred_RNA_bmmc_polar_c.obs['leiden_0.5'])
HOM = metrics.homogeneity_score(pred_RNA_bmmc_polar_c.obs['cell_type'], pred_RNA_bmmc_polar_c.obs['leiden_0.5'])
print('RNA_babel: ARI: %.3f, \tAMI: %.3f, \tNMI: %.3f, \tHOM: %.3f' % (ARI, AMI, NMI, HOM))