In [1]:
import pandas as pd
import numpy as np
import scanpy as sc
import os
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics.cluster import adjusted_mutual_info_score
from sklearn.metrics.cluster import homogeneity_score
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri

In [2]:
metadata = pd.read_csv('../input/metadata.tsv',sep='\t',index_col=0)
num_clusters = len(np.unique(metadata['label']))
print(num_clusters)

13


In [3]:
df_metrics = pd.DataFrame(columns=['ARI','AMI','Homogeneity'])

In [4]:
df_clusters = pd.DataFrame(index=metadata.index)
for dirpath, dirnames, filenames in os.walk("./"):
    for filename in [f for f in filenames if(f.endswith(".tsv") and f.startswith("clustering"))]:
        print(os.path.join(dirpath, filename))
        df = pd.read_csv(os.path.join(dirpath, filename),sep='\t',index_col=0)
        df_clusters = pd.merge(df_clusters, df, left_index=True, right_index=True)

./Cicero/clusteringSolution.tsv
./cisTopic/clusteringSolution.tsv
./Cusanovich2018/clusteringSolution.tsv
./scABC/clusteringSolution.tsv
./Scasat/clusteringSolution.tsv
./SnapATAC/clusteringSolution.tsv


In [5]:
df_clusters.head()

Unnamed: 0,Cicero,cisTopic,cusanovich2018,scABC,Scasat,SnapATAC
TCCGCGAACTAACTAGGTTGCTACGGTCATAGAGGC,5,1,4,9,1,1
TCCGCGAAAGGTCAGCTTTGCGGATAGTGTACTGAC,12,2,9,1,1,1
ATTACTCGTTGCCGTAGGCTTAATCTTGTATAGCCT,5,1,12,9,3,1
TCCGCGAAACCAGGCGCAAAGCTAGGTTGTACTGAC,8,2,5,8,1,9
ATTCAGAATCGTAGCATCGCGCAATGACCCTATCCT,9,2,3,1,3,11


In [6]:
for method in df_clusters.columns:
    print(method)

    #adjusted rank index
    ari = adjusted_rand_score(metadata['label'], df_clusters[method])

    #adjusted mutual information
    ami = adjusted_mutual_info_score(metadata['label'], df_clusters[method],average_method='arithmetic')
    
    #homogeneity
    homo = homogeneity_score(metadata['label'], df_clusters[method])

    df_metrics.loc[method,'ARI'] = ari
    df_metrics.loc[method,'AMI'] = ami
    df_metrics.loc[method,'Homogeneity'] = homo

Cicero
cisTopic
cusanovich2018
scABC
Scasat
SnapATAC


In [7]:
df_metrics

Unnamed: 0,ARI,AMI,Homogeneity
Cicero,0.294221,0.491658,0.477514
cisTopic,0.334379,0.584576,0.572242
cusanovich2018,0.364865,0.566409,0.562232
scABC,0.324509,0.464398,0.46399
Scasat,0.119738,0.351462,0.332869
SnapATAC,0.272518,0.562663,0.529729


In [8]:
df_metrics.to_csv('./clustering_scores.csv')