In [1]:
import pandas as pd
import scanpy as sc
import numpy as np
import h5py

import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
from matplotlib.pyplot import plot,savefig
from sklearn import metrics

import warnings
warnings.filterwarnings("ignore")
from read_count import read_data

import seaborn as sns

In [2]:
def plot_cluster(df, method_name, n, y_true, ax):
    
    """
        n: n-th method in [scScope, scDeepCluster, DESC, graph-sc, SCCAF, ADClust, scAce]
    """
    
    if method_name in ['Seurat', 'CIDR']:  
        y_pred = np.array(df['cluster'])
        tsne_1 = np.array(df['tSNE_1']).reshape(len(y_pred), 1)
        tsne_2 = np.array(df['tSNE_2']).reshape(len(y_pred), 1)
        tsne = np.concatenate((tsne_1, tsne_2), axis=1)
    
    elif method_name == 'scAce':
        tsne = tsne_all[n]
        y_pred = df['Clusters'][-1][-1]
        
    elif method_name in ['SCCAF', 'ADClust']:
        tsne = tsne_all[n]
        y_pred = df['Clusters'][-1]
        
    else:
        tsne = tsne_all[n]
        y_pred = df['Clusters']
    
    
    y_pred = np.asarray(y_pred, dtype='int').squeeze()
    ari = np.round(metrics.adjusted_rand_score(y_pred, y_true), 2)
    nmi = np.round(metrics.normalized_mutual_info_score(y_pred, y_true), 2)
    print('Method: {}, ARI={}, NMI={}'.format(method_name, ari, nmi))
    
        
    adata = sc.AnnData(pd.DataFrame(np.random.rand(len(y_pred), 1)))
    adata.obs['pred'] = y_pred
    adata.obs['pred'] = adata.obs['pred'].astype(str).astype('category')

    adata.obsm['X_tsne'] = tsne
    
    K = len(np.unique(y_pred))

    sc.pl.tsne(adata, color=['pred'], ax=ax, show=False, legend_loc='None', size=20)
    ax.set_title('K={}, ARI={}'.format(K, ari), fontsize=15, family='Arial')
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)

In [3]:
fig = plt.figure(figsize=(25, 13))
sub_figs = fig.subfigures(6, 1)
axs = []

for i, sub_fig in enumerate(sub_figs):    
    axs.append(sub_fig.subplots(1, 9))
    
axs = np.array(axs)

## Human1

In [4]:
data_mat = h5py.File('dataset/Human1.h5')
y_true = np.array(data_mat['Y'], dtype='int')
data_mat.close()

In [5]:
seurat = pd.read_csv('results/default/Human1/Seurat_wo_sample.csv', header=0, index_col=0)
cidr = pd.read_csv('results/default/Human1/CIDR_wo_sample.csv', header=0, index_col=0)
scscope = np.load('results/default/Human1/scScope_wo_sample.npz')
scd = np.load('results/default/Human1/scDeepCluster_wo_sample.npz')
desc = np.load('results/default/Human1/DESC_wo_sample.npz')
graphsc = np.load('results/default/Human1/graphsc_wo_sample.npz')
sccaf = np.load('results/default/Human1/SCCAF_wo_sample.npz')
adclust = np.load('results/default/Human1/ADClust_wo_sample.npz')
scace = np.load('results/default/Human1/scAce_wo_sample.npz')

In [6]:
methods = [scscope, scd, desc, graphsc, sccaf, adclust, scace]
embedding = []
for i, method in enumerate(methods):
    
    if i == len(methods) - 2 or i == len(methods) - 1:
        embedding.append(method['Embedding'][-1])
    else:
        embedding.append(method['Embedding'])
        
tsne_all = []
for i in range(len(embedding)):
    adata = sc.AnnData(embedding[i])
    sc.tl.tsne(adata, random_state=0)
    tsne_all.append(np.array(adata.obsm['X_tsne']))

         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.


In [7]:
plot_cluster(scscope, 'scScope', 0, y_true, axs[0][0])
plot_cluster(cidr, 'CIDR', None, y_true, axs[0][1])
plot_cluster(sccaf, 'SCCAF', 4, y_true, axs[0][2])
plot_cluster(seurat, 'Seurat', None, y_true, axs[0][3])
plot_cluster(scd, 'scDeepCluster', 1, y_true, axs[0][4])
plot_cluster(desc, 'DESC', 2, y_true, axs[0][5])
plot_cluster(adclust, 'ADClust', 5, y_true, axs[0][6])
plot_cluster(graphsc, 'graph-sc', 3, y_true, axs[0][7])
plot_cluster(scace, 'scAce', 6, y_true, axs[0][8])

Method: scScope, ARI=0.32, NMI=0.61
Method: CIDR, ARI=0.3, NMI=0.44
Method: SCCAF, ARI=0.42, NMI=0.73
Method: Seurat, ARI=0.62, NMI=0.8
Method: scDeepCluster, ARI=0.54, NMI=0.76
Method: DESC, ARI=0.94, NMI=0.92
Method: ADClust, ARI=0.88, NMI=0.83
Method: graph-sc, ARI=0.89, NMI=0.88
Method: scAce, ARI=0.88, NMI=0.87


In [8]:
fig

<Figure size 2500x1300 with 54 Axes>

## Human2

In [9]:
data_mat = h5py.File('dataset/Human2.h5')
y_true = np.array(data_mat['Y'], dtype='int')
data_mat.close()

In [10]:
seurat = pd.read_csv('results/default/Human2/Seurat_wo_sample.csv', header=0, index_col=0)
cidr = pd.read_csv('results/default/Human2/CIDR_wo_sample.csv', header=0, index_col=0)
scscope = np.load('results/default/Human2/scScope_wo_sample.npz')
scd = np.load('results/default/Human2/scDeepCluster_wo_sample.npz')
desc = np.load('results/default/Human2/DESC_wo_sample.npz')
graphsc = np.load('results/default/Human2/graphsc_wo_sample.npz')
sccaf = np.load('results/default/Human2/SCCAF_wo_sample.npz')
adclust = np.load('results/default/Human2/ADClust_wo_sample.npz')
scace = np.load('results/default/Human2/scAce_wo_sample.npz')

In [11]:
methods = [scscope, scd, desc, graphsc, sccaf, adclust, scace]
embedding = []
for i, method in enumerate(methods):
    
    if i == len(methods) - 2 or i == len(methods) - 1:
        embedding.append(method['Embedding'][-1])
    else:
        embedding.append(method['Embedding'])
        
tsne_all = []
for i in range(len(embedding)):
    adata = sc.AnnData(embedding[i])
    sc.tl.tsne(adata, random_state=0)
    tsne_all.append(np.array(adata.obsm['X_tsne']))

         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.


In [12]:
plot_cluster(scscope, 'scScope', 0, y_true, axs[1][0])
plot_cluster(cidr, 'CIDR', None, y_true, axs[1][1])
plot_cluster(sccaf, 'SCCAF', 4, y_true, axs[1][2])
plot_cluster(seurat, 'Seurat', None, y_true, axs[1][3])
plot_cluster(scd, 'scDeepCluster', 1, y_true, axs[1][4])
plot_cluster(desc, 'DESC', 2, y_true, axs[1][5])
plot_cluster(adclust, 'ADClust', 5, y_true, axs[1][6])
plot_cluster(graphsc, 'graph-sc', 3, y_true, axs[1][7])
plot_cluster(scace, 'scAce', 6, y_true, axs[1][8])

Method: scScope, ARI=0.45, NMI=0.7
Method: CIDR, ARI=0.56, NMI=0.65
Method: SCCAF, ARI=0.54, NMI=0.77
Method: Seurat, ARI=0.57, NMI=0.78
Method: scDeepCluster, ARI=0.55, NMI=0.77
Method: DESC, ARI=0.57, NMI=0.8
Method: ADClust, ARI=0.8, NMI=0.78
Method: graph-sc, ARI=0.71, NMI=0.82
Method: scAce, ARI=0.89, NMI=0.87


## Human3

In [13]:
data_mat = h5py.File('dataset/Human3.h5')
y_true = np.array(data_mat['Y'], dtype='int')
data_mat.close()

In [14]:
seurat = pd.read_csv('results/default/Human3/Seurat_wo_sample.csv', header=0, index_col=0)
cidr = pd.read_csv('results/default/Human3/CIDR_wo_sample.csv', header=0, index_col=0)
scscope = np.load('results/default/Human3/scScope_wo_sample.npz')
scd = np.load('results/default/Human3/scDeepCluster_wo_sample.npz')
desc = np.load('results/default/Human3/DESC_wo_sample.npz')
graphsc = np.load('results/default/Human3/graphsc_wo_sample.npz')
sccaf = np.load('results/default/Human3/SCCAF_wo_sample.npz')
adclust = np.load('results/default/Human3/ADClust_wo_sample.npz')
scace = np.load('results/default/Human3/scAce_wo_sample.npz')

In [15]:
methods = [scscope, scd, desc, graphsc, sccaf, adclust, scace]
embedding = []
for i, method in enumerate(methods):
    
    if i == len(methods) - 2 or i == len(methods) - 1:
        embedding.append(method['Embedding'][-1])
    else:
        embedding.append(method['Embedding'])
        
tsne_all = []
for i in range(len(embedding)):
    adata = sc.AnnData(embedding[i])
    sc.tl.tsne(adata, random_state=0)
    tsne_all.append(np.array(adata.obsm['X_tsne']))

         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.


In [16]:
plot_cluster(scscope, 'scScope', 0, y_true, axs[2][0])
plot_cluster(cidr, 'CIDR', None, y_true, axs[2][1])
plot_cluster(sccaf, 'SCCAF', 4, y_true, axs[2][2])
plot_cluster(seurat, 'Seurat', None, y_true, axs[2][3])
plot_cluster(scd, 'scDeepCluster', 1, y_true, axs[2][4])
plot_cluster(desc, 'DESC', 2, y_true, axs[2][5])
plot_cluster(adclust, 'ADClust', 5, y_true, axs[2][6])
plot_cluster(graphsc, 'graph-sc', 3, y_true, axs[2][7])
plot_cluster(scace, 'scAce', 6, y_true, axs[2][8])

Method: scScope, ARI=0.3, NMI=0.58
Method: CIDR, ARI=0.64, NMI=0.66
Method: SCCAF, ARI=0.6, NMI=0.8
Method: Seurat, ARI=0.62, NMI=0.8
Method: scDeepCluster, ARI=0.48, NMI=0.75
Method: DESC, ARI=0.91, NMI=0.9
Method: ADClust, ARI=0.85, NMI=0.84
Method: graph-sc, ARI=0.85, NMI=0.84
Method: scAce, ARI=0.92, NMI=0.91


## Mouse1

In [17]:
mat, obs, var, uns = read_data('dataset/Mouse1.h5', sparsify=False, skip_exprs=False)
cell_name = np.array(obs["cell_type1"])
cell_type, cell_label = np.unique(cell_name, return_inverse=True)
y_true = cell_label

In [18]:
seurat = pd.read_csv('results/default/Mouse1/Seurat_wo_sample.csv', header=0, index_col=0)
cidr = pd.read_csv('results/default/Mouse1/CIDR_wo_sample.csv', header=0, index_col=0)
scscope = np.load('results/default/Mouse1/scScope_wo_sample.npz')
scd = np.load('results/default/Mouse1/scDeepCluster_wo_sample.npz')
desc = np.load('results/default/Mouse1/DESC_wo_sample.npz')
graphsc = np.load('results/default/Mouse1/graphsc_wo_sample.npz')
sccaf = np.load('results/default/Mouse1/SCCAF_wo_sample.npz')
adclust = np.load('results/default/Mouse1/ADClust_wo_sample.npz')
scace = np.load('results/default/Mouse1/scAce_wo_sample.npz')

In [19]:
methods = [scscope, scd, desc, graphsc, sccaf, adclust, scace]
embedding = []
for i, method in enumerate(methods):
    
    if i == len(methods) - 2 or i == len(methods) - 1:
        embedding.append(method['Embedding'][-1])
    else:
        embedding.append(method['Embedding'])
        
tsne_all = []
for i in range(len(embedding)):
    adata = sc.AnnData(embedding[i])
    sc.tl.tsne(adata, random_state=0)
    tsne_all.append(np.array(adata.obsm['X_tsne']))

         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.


In [20]:
plot_cluster(scscope, 'scScope', 0, y_true, axs[3][0])
plot_cluster(cidr, 'CIDR', None, y_true, axs[3][1])
plot_cluster(sccaf, 'SCCAF', 4, y_true, axs[3][2])
plot_cluster(seurat, 'Seurat', None, y_true, axs[3][3])
plot_cluster(scd, 'scDeepCluster', 1, y_true, axs[3][4])
plot_cluster(desc, 'DESC', 2, y_true, axs[3][5])
plot_cluster(adclust, 'ADClust', 5, y_true, axs[3][6])
plot_cluster(graphsc, 'graph-sc', 3, y_true, axs[3][7])
plot_cluster(scace, 'scAce', 6, y_true, axs[3][8])

Method: scScope, ARI=0.05, NMI=0.13
Method: CIDR, ARI=0.05, NMI=0.12
Method: SCCAF, ARI=0.56, NMI=0.74
Method: Seurat, ARI=0.64, NMI=0.78
Method: scDeepCluster, ARI=0.78, NMI=0.81
Method: DESC, ARI=0.64, NMI=0.8
Method: ADClust, ARI=0.45, NMI=0.58
Method: graph-sc, ARI=0.6, NMI=0.71
Method: scAce, ARI=0.92, NMI=0.9


## Mouse2

In [21]:
mat, obs, var, uns = read_data('dataset/Mouse2.h5', sparsify=False, skip_exprs=False)
cell_name = np.array(obs["cell_type1"])
cell_type, cell_label = np.unique(cell_name, return_inverse=True)
y_true = cell_label

In [22]:
seurat = pd.read_csv('results/default/Mouse2/Seurat_wo_sample.csv', header=0, index_col=0)
cidr = pd.read_csv('results/default/Mouse2/CIDR_wo_sample.csv', header=0, index_col=0)
scscope = np.load('results/default/Mouse2/scScope_wo_sample.npz')
scd = np.load('results/default/Mouse2/scDeepCluster_wo_sample.npz')
desc = np.load('results/default/Mouse2/DESC_wo_sample.npz')
graphsc = np.load('results/default/Mouse2/graphsc_wo_sample.npz')
sccaf = np.load('results/default/Mouse2/SCCAF_wo_sample.npz')
adclust = np.load('results/default/Mouse2/ADClust_wo_sample.npz')
scace = np.load('results/default/Mouse2/scAce_wo_sample.npz')

In [23]:
methods = [scscope, scd, desc, graphsc, sccaf, adclust, scace]
embedding = []
for i, method in enumerate(methods):
    
    if i == len(methods) - 2 or i == len(methods) - 1:
        embedding.append(method['Embedding'][-1])
    else:
        embedding.append(method['Embedding'])
        
tsne_all = []
for i in range(len(embedding)):
    adata = sc.AnnData(embedding[i])
    sc.tl.tsne(adata, random_state=0)
    tsne_all.append(np.array(adata.obsm['X_tsne']))

         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.


In [24]:
plot_cluster(scscope, 'scScope', 0, y_true, axs[4][0])
plot_cluster(cidr, 'CIDR', None, y_true, axs[4][1])
plot_cluster(sccaf, 'SCCAF', 4, y_true, axs[4][2])
plot_cluster(seurat, 'Seurat', None, y_true, axs[4][3])
plot_cluster(scd, 'scDeepCluster', 1, y_true, axs[4][4])
plot_cluster(desc, 'DESC', 2, y_true, axs[4][5])
plot_cluster(adclust, 'ADClust', 5, y_true, axs[4][6])
plot_cluster(graphsc, 'graph-sc', 3, y_true, axs[4][7])
plot_cluster(scace, 'scAce', 6, y_true, axs[4][8])

Method: scScope, ARI=0.53, NMI=0.74
Method: CIDR, ARI=0.64, NMI=0.67
Method: SCCAF, ARI=0.44, NMI=0.73
Method: Seurat, ARI=0.51, NMI=0.78
Method: scDeepCluster, ARI=0.77, NMI=0.87
Method: DESC, ARI=0.44, NMI=0.75
Method: ADClust, ARI=0.92, NMI=0.89
Method: graph-sc, ARI=0.97, NMI=0.95
Method: scAce, ARI=0.99, NMI=0.99


## Mouse3

In [25]:
data_mat = h5py.File('dataset/Mouse3.h5')
y_true = np.array(data_mat['Y'], dtype='int')
data_mat.close()

In [26]:
seurat = pd.read_csv('results/default/Mouse3/Seurat_wo_sample.csv', header=0, index_col=0)
cidr = pd.read_csv('results/default/Mouse3/CIDR_wo_sample.csv', header=0, index_col=0)
scscope = np.load('results/default/Mouse3/scScope_wo_sample.npz')
scd = np.load('results/default/Mouse3/scDeepCluster_wo_sample.npz')
desc = np.load('results/default/Mouse3/DESC_wo_sample.npz')
graphsc = np.load('results/default/Mouse3/graphsc_wo_sample.npz')
sccaf = np.load('results/default/Mouse3/SCCAF_wo_sample.npz')
adclust = np.load('results/default/Mouse3/ADClust_wo_sample.npz')
scace = np.load('results/default/Mouse3/scAce_wo_sample.npz')

In [27]:
methods = [scscope, scd, desc, graphsc, sccaf, adclust, scace]
embedding = []
for i, method in enumerate(methods):
    
    if i == len(methods) - 2 or i == len(methods) - 1:
        embedding.append(method['Embedding'][-1])
    else:
        embedding.append(method['Embedding'])
        
tsne_all = []
for i in range(len(embedding)):
    adata = sc.AnnData(embedding[i])
    sc.tl.tsne(adata, random_state=0)
    tsne_all.append(np.array(adata.obsm['X_tsne']))

         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.
         Falling back to preprocessing with `sc.pp.pca` and default params.


In [28]:
plot_cluster(scscope, 'scScope', 0, y_true, axs[5][0])
plot_cluster(cidr, 'CIDR', None, y_true, axs[5][1])
plot_cluster(sccaf, 'SCCAF', 4, y_true, axs[5][2])
plot_cluster(seurat, 'Seurat', None, y_true, axs[5][3])
plot_cluster(scd, 'scDeepCluster', 1, y_true, axs[5][4])
plot_cluster(desc, 'DESC', 2, y_true, axs[5][5])
plot_cluster(adclust, 'ADClust', 5, y_true, axs[5][6])
plot_cluster(graphsc, 'graph-sc', 3, y_true, axs[5][7])
plot_cluster(scace, 'scAce', 6, y_true, axs[5][8])

Method: scScope, ARI=0.14, NMI=0.34
Method: CIDR, ARI=0.68, NMI=0.66
Method: SCCAF, ARI=0.82, NMI=0.85
Method: Seurat, ARI=0.51, NMI=0.72
Method: scDeepCluster, ARI=0.47, NMI=0.59
Method: DESC, ARI=0.9, NMI=0.9
Method: ADClust, ARI=0.72, NMI=0.71
Method: graph-sc, ARI=0.78, NMI=0.76
Method: scAce, ARI=0.98, NMI=0.97


In [29]:
fig

<Figure size 2500x1300 with 54 Axes>

In [30]:
plt.savefig('Figures/FigureS1A.svg', dpi=300, format='svg', bbox_inches='tight')