In [1]:
import pandas as pd
import scanpy as sc
import numpy as np
import h5py

import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
from matplotlib.pyplot import plot,savefig
from sklearn import metrics

import warnings
warnings.filterwarnings("ignore")
from read_count import read_data

import seaborn as sns
from matplotlib.colors import LinearSegmentedColormap
from sklearn import metrics
from collections import Counter

In [2]:
def plot_merge(df, method_name, n, y_true, ax):
    
    '''
      n: n-th interation of merging
      
    '''
    
    ax = plt.subplot(ax)
    
    if method_name == 'SCCAF':
        if len(np.unique(df['Clusters'][0])) == len(np.unique(df['Clusters'][-1])):
            y_pred = np.array(df['Clusters'][-1], dtype=int).squeeze()
        else:
            y_pred = np.array(df['Clusters_merge'][:,n], dtype=int).squeeze()
            
    elif method_name == 'ADClust':
            y_pred = np.array(df['Clusters_merge'][n], dtype=int).squeeze()
            
    else:
        if n == 0:
            y_pred = np.array(df['Clusters'][n], dtype=int).squeeze()
        else:
            y_pred = np.array(df['Clusters'][n][-1], dtype=int).squeeze()
        
    
    c_mat = []
    for i in range(len(np.unique(y_pred))):
        idx = np.where(y_pred == i)
        x = y_true[idx]
        count_t = []
        for m in range(len(np.unique(y_true))):
            count_t.append(Counter(x)[m])

        count_t = np.array(count_t).squeeze()
        c_mat.append(count_t)

    c_mat = pd.DataFrame(c_mat).T
    
    c_mat.columns = np.array(list(range(len(np.unique(y_pred))))) + 1
    c_mat.index = np.array(list(range(len(np.unique(y_true))))) + 1
    
    sns.heatmap(c_mat, cmap="PuBu", linewidths=0.5, linecolor="white")
    
    label_y = ax.get_yticklabels()
    plt.setp(label_y, rotation=0, horizontalalignment='right', family='Arial')
    label_x = ax.get_xticklabels()
    plt.setp(label_x, rotation=0)
    ax.set_title('Iteration {}'.format(n+1), fontsize=15, family='Arial')

# Human pancreas

In [3]:
sccaf = np.load('results/default/Human/SCCAF_wo_sample.npz')
scace = np.load('results/default/Human/scAce_wo_sample.npz')
adclust = np.load('results/default/Human/ADClust_wo_sample.npz')

In [4]:
data_mat = h5py.File('dataset/Human_p.h5')
y_true = np.array(data_mat['Y'], dtype='int')
data_mat.close()

## SCCAF

In [5]:
sccaf['Clusters'].shape

(2, 3605)

In [6]:
sccaf['Clusters_merge'].astype(int).squeeze()

array([ 0,  5,  0, ..., 12,  8, 12])

In [7]:
plt.figure(figsize=(3.6, 2))
plot_merge(sccaf, 'SCCAF', 0, y_true, 111)

In [8]:
plt.savefig('Figures/FigureS8A_human.svg', dpi=300, format='svg', bbox_inches='tight')

## ADClust

In [9]:
adclust['Clusters_merge'].shape

(7, 3605)

In [10]:
plt.figure(figsize=(25, 2))
plot_merge(adclust, 'ADClust', 0, y_true, 171)
plot_merge(adclust, 'ADClust', 1, y_true, 172)
plot_merge(adclust, 'ADClust', 2, y_true, 173)
plot_merge(adclust, 'ADClust', 3, y_true, 174)
plot_merge(adclust, 'ADClust', 4, y_true, 175)
plot_merge(adclust, 'ADClust', 5, y_true, 176)
plot_merge(adclust, 'ADClust', 6, y_true, 177)

In [11]:
plt.savefig('Figures/FigureS8B_human.svg', dpi=300, format='svg', bbox_inches='tight')

## scAce

In [12]:
scace['Clusters'].shape

(5,)

In [13]:
plt.figure(figsize=(14.3, 2))
plot_merge(scace, 'scAce', 0, y_true, 141)
plot_merge(scace, 'scAce', 1, y_true, 142)
plot_merge(scace, 'scAce', 2, y_true, 143)
plot_merge(scace, 'scAce', 3, y_true, 144)

In [14]:
plt.savefig('Figures/FigureS8C_human.svg', dpi=300, format='svg', bbox_inches='tight')

# Human PBMC

In [15]:
sccaf = np.load('results/default/PBMC/SCCAF_wo_sample.npz')
scace = np.load('results/default/PBMC/scAce_wo_sample.npz')
adclust = np.load('results/default/PBMC/ADClust_wo_sample.npz')

In [16]:
data_mat = h5py.File('dataset/Human_PBMC.h5')
y_true = np.array(data_mat['Y'], dtype='int')
data_mat.close()

In [17]:
sccaf['Clusters_merge'].astype(int).squeeze()

array([1, 1, 1, ..., 6, 4, 1])

In [18]:
plt.figure(figsize=(3.6, 2))
plot_merge(sccaf, 'SCCAF', 0, y_true, 111)

In [19]:
plt.savefig('Figures/FigureS8A_pbmc.svg', dpi=300, format='svg', bbox_inches='tight')

In [20]:
adclust['Clusters_merge'].shape

(4, 4271)

In [21]:
plt.figure(figsize=(14.3, 2))
plot_merge(adclust, 'ADClust', 0, y_true, 141)
plot_merge(adclust, 'ADClust', 1, y_true, 142)
plot_merge(adclust, 'ADClust', 2, y_true, 143)
plot_merge(adclust, 'ADClust', 3, y_true, 144)

In [22]:
plt.savefig('Figures/FigureS8B_pbmc.svg', dpi=300, format='svg', bbox_inches='tight')

In [23]:
scace['Clusters'].shape

(3,)

In [24]:
plt.figure(figsize=(7.144, 2))
plot_merge(scace, 'scAce', 0, y_true, 121)
plot_merge(scace, 'scAce', 1, y_true, 122)

In [25]:
plt.savefig('Figures/FigureS8C_pbmc.svg', dpi=300, format='svg', bbox_inches='tight')

# Human kidney

In [26]:
sccaf = np.load('results/default/kidney/SCCAF_wo_sample.npz')
scace = np.load('results/default/kidney/scAce_wo_sample.npz')
adclust = np.load('results/default/kidney/ADClust_wo_sample.npz')

In [27]:
data_mat = h5py.File('dataset/Human_k.h5')
y_true = np.array(data_mat['Y'], dtype='int')
data_mat.close()

In [28]:
sccaf['Clusters_merge'].astype(int).squeeze()

array([ 9,  4, 15, ..., 28,  1, 21])

In [29]:
plt.figure(figsize=(3.6, 2))
plot_merge(sccaf, 'SCCAF', 0, y_true, 111)

In [30]:
plt.savefig('Figures/FigureS8A_kidney.svg', dpi=300, format='svg', bbox_inches='tight')

In [31]:
adclust['Clusters_merge'].shape

(7, 5685)

In [32]:
plt.figure(figsize=(25, 2))
plot_merge(adclust, 'ADClust', 0, y_true, 161)
plot_merge(adclust, 'ADClust', 1, y_true, 162)
plot_merge(adclust, 'ADClust', 2, y_true, 163)
plot_merge(adclust, 'ADClust', 3, y_true, 164)
plot_merge(adclust, 'ADClust', 4, y_true, 165)
plot_merge(adclust, 'ADClust', 5, y_true, 166)

In [33]:
plt.savefig('Figures/FigureS8B_kidney.svg', dpi=300, format='svg', bbox_inches='tight')

In [34]:
scace['Clusters'].shape

(4,)

In [35]:
plt.figure(figsize=(10.7, 2))
plot_merge(scace, 'scAce', 0, y_true, 131)
plot_merge(scace, 'scAce', 1, y_true, 132)
plot_merge(scace, 'scAce', 2, y_true, 133)

In [36]:
plt.savefig('Figures/FigureS8C_kidney.svg', dpi=300, format='svg', bbox_inches='tight')

# Mouse ES

In [37]:
sccaf = np.load('results/default/Klein/SCCAF_wo_sample.npz')
scace = np.load('results/default/Klein/scAce_wo_sample.npz')
adclust = np.load('results/default/Klein/ADClust_wo_sample.npz')

In [38]:
mat, obs, var, uns = read_data('dataset/Mouse_E.h5', sparsify=False, skip_exprs=False)
cell_name = np.array(obs["cell_type1"])
cell_type, cell_label = np.unique(cell_name, return_inverse=True)
y_true = cell_label

In [39]:
sccaf['Clusters_merge'].astype(int)

array([[2, 0],
       [2, 0],
       [2, 0],
       ...,
       [0, 1],
       [0, 1],
       [0, 1]])

In [40]:
plt.figure(figsize=(7.14, 2))
plot_merge(sccaf, 'SCCAF', 0, y_true, 121)
plot_merge(sccaf, 'SCCAF', 1, y_true, 122)

In [41]:
plt.savefig('Figures/FigureS8A_klein.svg', dpi=300, format='svg', bbox_inches='tight')

In [42]:
adclust['Clusters_merge'].shape

(4, 2717)

In [43]:
plt.figure(figsize=(10.7, 2))
plot_merge(adclust, 'ADClust', 0, y_true, 141)
plot_merge(adclust, 'ADClust', 1, y_true, 142)
plot_merge(adclust, 'ADClust', 2, y_true, 143)
plot_merge(adclust, 'ADClust', 3, y_true, 144)

In [44]:
plt.savefig('Figures/FigureS8B_klein.svg', dpi=300, format='svg', bbox_inches='tight')

In [45]:
scace['Clusters'].shape

(4,)

In [46]:
plt.figure(figsize=(10.7, 2))
plot_merge(scace, 'scAce', 0, y_true, 131)
plot_merge(scace, 'scAce', 1, y_true, 132)
plot_merge(scace, 'scAce', 2, y_true, 133)

In [47]:
plt.savefig('Figures/FigureS8C_klein.svg', dpi=300, format='svg', bbox_inches='tight')

# Mouse hypothalamus

In [48]:
sccaf = np.load('results/default/Chen/SCCAF_wo_sample.npz')
scace = np.load('results/default/Chen/scAce_wo_sample.npz')
adclust = np.load('results/default/Chen/ADClust_wo_sample.npz')

In [49]:
mat, obs, var, uns = read_data('dataset/Mouse_h.h5', sparsify=False, skip_exprs=False)
x = np.array(mat.toarray())
cell_name = np.array(obs["cell_type1"])
cell_type, y_true_1 = np.unique(cell_name, return_inverse=True)

In [50]:
adata = sc.AnnData(x)
adata.obs['celltype'] = y_true_1
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.filter_cells(adata, min_genes=200)
y_true_2 = np.array(adata.obs['celltype']).squeeze()

In [51]:
sccaf['Clusters_merge'].astype(int)

array([[18],
       [ 4],
       [ 7],
       ...,
       [18],
       [10],
       [ 9]])

In [52]:
plt.figure(figsize=(3.6, 2))
plot_merge(sccaf, 'SCCAF', 0, y_true_2, 111)

In [53]:
plt.savefig('Figures/FigureS8A_chen.svg', dpi=300, format='svg', bbox_inches='tight')

In [54]:
adclust['Clusters_merge'].shape

(11, 12089)

In [55]:
plt.figure(figsize=(28.55, 2))
plot_merge(adclust, 'ADClust', 0, y_true_1, 151)
plot_merge(adclust, 'ADClust', 1, y_true_1, 152)
plot_merge(adclust, 'ADClust', 2, y_true_1, 153)
plot_merge(adclust, 'ADClust', 3, y_true_1, 154)
plot_merge(adclust, 'ADClust', 4, y_true_1, 155)

In [56]:
plt.savefig('Figures/FigureS8B_chen_1.svg', dpi=300, format='svg', bbox_inches='tight')

In [57]:
plt.figure(figsize=(34.26, 2))
plot_merge(adclust, 'ADClust', 5, y_true_1, 161)
plot_merge(adclust, 'ADClust', 6, y_true_1, 162)
plot_merge(adclust, 'ADClust', 7, y_true_1, 163)
plot_merge(adclust, 'ADClust', 8, y_true_1, 164)
plot_merge(adclust, 'ADClust', 9, y_true_1, 165)
plot_merge(adclust, 'ADClust', 10, y_true_1, 166)

In [58]:
plt.savefig('Figures/FigureS8B_chen_2.svg', dpi=300, format='svg', bbox_inches='tight')

In [59]:
scace['Clusters'].shape

(6,)

In [60]:
plt.figure(figsize=(28.55, 2))
plot_merge(scace, 'scAce', 0, y_true_2, 151)
plot_merge(scace, 'scAce', 1, y_true_2, 152)
plot_merge(scace, 'scAce', 2, y_true_2, 153)
plot_merge(scace, 'scAce', 3, y_true_2, 154)
plot_merge(scace, 'scAce', 4, y_true_2, 155)

In [61]:
plt.savefig('Figures/FigureS8C_chen.svg', dpi=300, format='svg', bbox_inches='tight')

# Turtle brain

In [62]:
sccaf = np.load('results/default/Turtle/SCCAF_wo_sample.npz')
scace = np.load('results/default/Turtle/scAce_wo_sample.npz')
adclust = np.load('results/default/Turtle/ADClust_wo_sample.npz')

In [63]:
mat, obs, var, uns = read_data('dataset/Turtle_b.h5', sparsify=False, skip_exprs=False)
x = np.array(mat.toarray())
cell_name = np.array(obs["cell_type1"])
cell_type, y_true = np.unique(cell_name, return_inverse=True)

In [64]:
sccaf['Clusters_merge'].astype(int)

array([[17],
       [ 1],
       [17],
       ...,
       [ 0],
       [ 0],
       [10]])

In [65]:
plt.figure(figsize=(3.6, 2))
plot_merge(sccaf, 'SCCAF', 0, y_true, 111)

In [66]:
plt.savefig('Figures/FigureS8A_turtle.svg', dpi=300, format='svg', bbox_inches='tight')

In [67]:
adclust['Clusters_merge'].shape

(6, 18664)

In [68]:
plt.figure(figsize=(34.26, 2))
plot_merge(adclust, 'ADClust', 0, y_true, 161)
plot_merge(adclust, 'ADClust', 1, y_true, 162)
plot_merge(adclust, 'ADClust', 2, y_true, 163)
plot_merge(adclust, 'ADClust', 3, y_true, 164)
plot_merge(adclust, 'ADClust', 4, y_true, 165)
plot_merge(adclust, 'ADClust', 5, y_true, 166)

In [69]:
plt.savefig('Figures/FigureS8B_turtle.svg', dpi=300, format='svg', bbox_inches='tight')

In [70]:
scace['Clusters'].shape

(3,)

In [71]:
plt.figure(figsize=(7.14, 2))
plot_merge(scace, 'scAce', 0, y_true, 121)
plot_merge(scace, 'scAce', 1, y_true, 122)

In [72]:
plt.savefig('Figures/FigureS8C_turtle.svg', dpi=300, format='svg', bbox_inches='tight')