In [1]:
import scanpy as sc
import anndata as ad
import scib
import numpy as np
import pandas as pd

In [2]:
%run ./custom_silhouette_functions.ipynb

[0;31mSignature:[0m
[0msilhouette_samples_custom[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mX[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlabels[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmetric[0m[0;34m=[0m[0;34m'euclidean'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbetween_cluster_distances[0m[0;34m=[0m[0;34m'nearest'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Compute the average silhouette score for the dataset X with the given labels.

Parameters:
X : array-like, shape (n_samples, n_features)
    Feature array.
labels : array-like, shape (n_samples,)
    Labels of each point.
    
metric : metric for distance calculation, default:"euclidean", alternatives, e.g., "cosine"

between_cluster_distances: one out of "mean_other", "furthest", "nearest"


Returns:
score : float
    The average silhouette score.
[0;31mFile:[0m      /tmp/7409106.1.all.q/ipykernel_3226237/4094074416.py
[0;31mType:[0m      functio

In [3]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=ad.OldFormatWarning)

In [4]:
scenarios = ['Liam_x1_minimal_example', 'Liam_x5_minimal_example', 'naiveIntegration_minimal_example', 'noIntegration_minimal_example']

In [5]:
np.random.seed(61)

# Collect computed scores, nested dict is simple to convert to pd.DataFrame
score_dict = {}
for scenario in scenarios:
    # Initialize nested dict
    score_dict[scenario] = {}
    
    adata = ad.read_h5ad('embeddings/{}.embedding.h5ad'.format(scenario))
    adata.obsm['embedding'] = adata.X
    
    sc.pp.neighbors(adata, use_rep='embedding')

    # Compute scores
    ## Level of evaluation: batch/sample
    ### asw_batch
    score = scib.me.silhouette_batch(
        adata,
        batch_key='batch',
        group_key='cell_type',
        embed='embedding',
        verbose=False
    )
    score_dict[scenario]['asw_batch'] = score
    
    score = scib.me.silhouette_batch(
        adata,
        batch_key='batch',
        group_key='cell_type',
        embed='embedding',
        metric='cosine',
        verbose=False
    )
    score_dict[scenario]['asw_batch_cosine'] = score
    
    
    ### asw_batch_mean_other
    score = silhouette_batch_custom(
        adata,
        batch_key='batch',
        group_key='cell_type',
        embed='embedding',
        between_cluster_distances='mean_other',
        verbose=False
    )
    score_dict[scenario]['asw_batch_mean_other'] = score
    
    score = silhouette_batch_custom(
        adata,
        batch_key='batch',
        group_key='cell_type',
        embed='embedding',
        between_cluster_distances='mean_other',
        metric='cosine',
        verbose=False
    )
    score_dict[scenario]['asw_batch_mean_other_cosine'] = score
    
    ### asw_batch_furthest
    score = silhouette_batch_custom(
        adata,
        batch_key='batch',
        group_key='cell_type',
        embed='embedding',
        between_cluster_distances='furthest',
        verbose=False
    )
    score_dict[scenario]['asw_batch_furthest'] = score
    
    score = silhouette_batch_custom(
        adata,
        batch_key='batch',
        group_key='cell_type',
        embed='embedding',
        between_cluster_distances='furthest',
        metric='cosine',
        verbose=False
    )
    score_dict[scenario]['asw_batch_furthest_cosine'] = score
    
    ### graph iLISI and cLISI on variable batch
    score_dict[scenario]['iLISI_batch'], score_dict[scenario]['cLISI_full'] =  scib.me.lisi.lisi_graph(adata, batch_key='batch', label_key='cell_type', type_='knn')
        
    means = []
    total = 0
    for cell_type in adata.obs['cell_type'].unique():
        tmp_adata = adata[adata.obs['cell_type']==cell_type]
        cell_type_iLISI = scib.metrics.ilisi_graph(tmp_adata, batch_key='batch', type_='knn')
        means += [cell_type_iLISI * tmp_adata.shape[0]]
        total += tmp_adata.shape[0]
        print(cell_type, cell_type_iLISI)
    print(means)
    print(np.nansum(means)/total)
    score_dict[scenario]['CiLISI_batch'] = np.nansum(means)/total
    
    ### asw_label
    score = scib.me.silhouette(
        adata,
        group_key='cell_type',
        embed='embedding',
    )
    score_dict[scenario]['asw_label'] = score
    
    score = scib.me.silhouette(
        adata,
        group_key='cell_type',
        embed='embedding',
        metric='cosine'
    )
    score_dict[scenario]['asw_label_cosine'] = score
    
    
    ### nmi    
    scib.metrics.cluster_optimal_resolution(
        adata,
        label_key='cell_type',
        cluster_key='cluster',
        metric=scib.me.nmi
    )
    
    score = scib.me.nmi(
        adata,
        group1='cluster',
        group2='cell_type'
    )
    
    score_dict[scenario]['nmi'] = score
    
    ### ari
    scib.metrics.cluster_optimal_resolution(
        adata,
        label_key='cell_type',
        cluster_key='cluster',
        metric=scib.me.ari
    )
    
    score = scib.me.ari(adata, cluster_key="cluster", label_key="cell_type")
    score_dict[scenario]['ari'] = score

  from .autonotebook import tqdm as notebook_tqdm


Chunk 334 does not have enough neighbors. Skipping...
Chunk 443 does not have enough neighbors. Skipping...
Chunk 1498 does not have enough neighbors. Skipping...
Chunk 1668 does not have enough neighbors. Skipping...
Chunk 1759 does not have enough neighbors. Skipping...
Chunk 2069 does not have enough neighbors. Skipping...
Chunk 2094 does not have enough neighbors. Skipping...
Chunk 2210 does not have enough neighbors. Skipping...
Chunk 2229 does not have enough neighbors. Skipping...
Chunk 2275 does not have enough neighbors. Skipping...
Naive CD20+ B 0.30846278625752016
Chunk 1259 does not have enough neighbors. Skipping...
Chunk 1562 does not have enough neighbors. Skipping...
CD14+ Mono 0.5422078389298246
Chunk 43 does not have enough neighbors. Skipping...
Chunk 396 does not have enough neighbors. Skipping...
Chunk 582 does not have enough neighbors. Skipping...
Chunk 4294 does not have enough neighbors. Skipping...
Chunk 5866 does not have enough neighbors. Skipping...
Chunk 5

  ilisi = (ilisi - 1) / (nbatches - 1)


Chunk 193 does not have enough neighbors. Skipping...
Chunk 391 does not have enough neighbors. Skipping...
Proerythroblast 0.23569247904438403
Chunk 1 does not have enough neighbors. Skipping...
Chunk 2 does not have enough neighbors. Skipping...
Chunk 3 does not have enough neighbors. Skipping...
Chunk 4 does not have enough neighbors. Skipping...
Chunk 5 does not have enough neighbors. Skipping...
Chunk 6 does not have enough neighbors. Skipping...
Chunk 7 does not have enough neighbors. Skipping...
Chunk 8 does not have enough neighbors. Skipping...
Chunk 9 does not have enough neighbors. Skipping...
Chunk 10 does not have enough neighbors. Skipping...
Chunk 11 does not have enough neighbors. Skipping...
Chunk 12 does not have enough neighbors. Skipping...
Chunk 13 does not have enough neighbors. Skipping...
Chunk 14 does not have enough neighbors. Skipping...
Chunk 15 does not have enough neighbors. Skipping...
Chunk 16 does not have enough neighbors. Skipping...
Chunk 17 does not

  ilisi = (ilisi - 1) / (nbatches - 1)


Chunk 176 does not have enough neighbors. Skipping...
Chunk 433 does not have enough neighbors. Skipping...
Chunk 502 does not have enough neighbors. Skipping...
Chunk 506 does not have enough neighbors. Skipping...
Chunk 622 does not have enough neighbors. Skipping...
Chunk 904 does not have enough neighbors. Skipping...
Chunk 965 does not have enough neighbors. Skipping...
Chunk 975 does not have enough neighbors. Skipping...
Chunk 1001 does not have enough neighbors. Skipping...
Chunk 1064 does not have enough neighbors. Skipping...
Chunk 1091 does not have enough neighbors. Skipping...
Proerythroblast 0.27024398834871727
Chunk 1 does not have enough neighbors. Skipping...
Chunk 2 does not have enough neighbors. Skipping...
Chunk 3 does not have enough neighbors. Skipping...
Chunk 4 does not have enough neighbors. Skipping...
Chunk 5 does not have enough neighbors. Skipping...
Chunk 6 does not have enough neighbors. Skipping...
Chunk 7 does not have enough neighbors. Skipping...
Chu

  ilisi = (ilisi - 1) / (nbatches - 1)


Proerythroblast 0.17001792364167337
Chunk 1 does not have enough neighbors. Skipping...
Chunk 2 does not have enough neighbors. Skipping...
Chunk 3 does not have enough neighbors. Skipping...
Chunk 4 does not have enough neighbors. Skipping...
Chunk 5 does not have enough neighbors. Skipping...
Chunk 6 does not have enough neighbors. Skipping...
Chunk 7 does not have enough neighbors. Skipping...
Chunk 8 does not have enough neighbors. Skipping...
Chunk 9 does not have enough neighbors. Skipping...
Chunk 10 does not have enough neighbors. Skipping...
Chunk 11 does not have enough neighbors. Skipping...
Chunk 12 does not have enough neighbors. Skipping...
Chunk 13 does not have enough neighbors. Skipping...
Chunk 14 does not have enough neighbors. Skipping...
Chunk 15 does not have enough neighbors. Skipping...
Chunk 16 does not have enough neighbors. Skipping...
Chunk 17 does not have enough neighbors. Skipping...
Chunk 18 does not have enough neighbors. Skipping...
Chunk 19 does not h

  ilisi = (ilisi - 1) / (nbatches - 1)


Proerythroblast 0.07465742750453737
Chunk 1 does not have enough neighbors. Skipping...
Chunk 2 does not have enough neighbors. Skipping...
Chunk 3 does not have enough neighbors. Skipping...
Chunk 4 does not have enough neighbors. Skipping...
Chunk 5 does not have enough neighbors. Skipping...
Chunk 6 does not have enough neighbors. Skipping...
Chunk 7 does not have enough neighbors. Skipping...
Chunk 8 does not have enough neighbors. Skipping...
Chunk 9 does not have enough neighbors. Skipping...
Chunk 10 does not have enough neighbors. Skipping...
Chunk 11 does not have enough neighbors. Skipping...
Chunk 12 does not have enough neighbors. Skipping...
Chunk 13 does not have enough neighbors. Skipping...
Chunk 14 does not have enough neighbors. Skipping...
Chunk 15 does not have enough neighbors. Skipping...
Chunk 16 does not have enough neighbors. Skipping...
Chunk 17 does not have enough neighbors. Skipping...
Chunk 18 does not have enough neighbors. Skipping...
Chunk 19 does not h

In [6]:
scores = pd.DataFrame(score_dict)

In [7]:
scores

Unnamed: 0,Liam_x1_minimal_example,Liam_x5_minimal_example,naiveIntegration_minimal_example,noIntegration_minimal_example
asw_batch,0.888666,0.90424,0.914673,0.857729
asw_batch_cosine,0.790701,0.819081,0.840553,0.763072
asw_batch_mean_other,0.8891,0.908567,0.865616,0.757414
asw_batch_mean_other_cosine,0.808191,0.837589,0.752585,0.618712
asw_batch_furthest,0.83065,0.852432,0.807425,0.661392
asw_batch_furthest_cosine,0.727832,0.757798,0.662525,0.511714
iLISI_batch,0.42294,0.490425,0.214593,0.098271
cLISI_full,0.994188,0.992298,0.987665,0.988227
CiLISI_batch,0.382898,0.421334,0.178911,0.088382
asw_label,0.595108,0.580359,0.541867,0.543457


In [8]:
pd.DataFrame(score_dict).to_csv("evaluation/batch_removal_scores_real_data_minimal_example.csv", index=True)