In [1]:
import numpy as np
import pandas as pd
import sys,os
import random
import copy
from time import time

import matplotlib.pyplot as plt
import seaborn as sns


from utils.method import read_bic_table

from utils.eval import make_ref_groups
from utils.eval import calculate_perfromance, compare_gene_clusters

# 1. Reading expressions and annotations

In [2]:
exprs_file_t = "data/preprocessed_v6/TCGA-BRCA_1079_17Kgenes.Xena_TCGA_PanCan.log2_exprs_z_v6.tsv"
exprs_t= pd.read_csv(exprs_file_t,sep = "\t",index_col=0)

exprs_file_m = "data/preprocessed_v6/METABRIC_1904_17Kgenes.log2_exprs_z_v6.tsv"
exprs_m= pd.read_csv(exprs_file_m,sep = "\t",index_col=0)

m_subtypes = pd.read_csv("data/preprocessed_v6/METABRIC_1904_17Kgenes.subtypes_and_signatures_v6.tsv",sep = "\t",index_col=0)
m_annotation = pd.read_csv("data/preprocessed_v6/METABRIC_1904.annotation_v6.tsv",sep = "\t",index_col=0)

t_subtypes = pd.read_csv("data/preprocessed_v6/TCGA-BRCA_1079_17Kgenes.Xena_TCGA_PanCan.subtypes_and_signatures_v6.tsv",sep = "\t",index_col=0)
t_annotation = pd.read_csv("data/preprocessed_v6/TCGA-BRCA_1079.Xena_TCGA_PanCan.annotation_v6.tsv",sep = "\t",index_col=0)

## 1.1 Preparing ground truth samples sets for performance evaluation

### Example of known_groups dictionary for TCGA-BRCA

*make_ref_groups(subtypes, annotation, exprs)*

**input:**
  - subtypes - subtypes dataframe
  - annotation - annotation dataframe
  - exprs - expression dataframe
  
**returns:**
  -  known_groups = {classificaton1:{"subt1":{s1,s2,...} , "subt2":{...}, "subt3":{...}, ...}, "classi2":{"subtA":{...}}, ... }
*known_groups* is a dictionary with known sample classifications. Each classification (e.g. PAM50 or IHC or Luminal) is a dict that can conatain one or several sample sets 
  -  all_samples = {} set of all samples in expression and annotation files; necessary for computing overlap p-values

In [3]:
known_groups_t, all_samples_t = make_ref_groups(t_subtypes, t_annotation,exprs_t)
known_groups_m, all_samples_m = make_ref_groups(m_subtypes, m_annotation,exprs_m)

# Example 1: 
## The sructure of known_groups dict for TCGA-BRCA:

We calculate performance for **classifications**:
    * PAM50 = [Luminal, Basal, Her2, Normal]
    * Intrinsic = [Luminal, Basal, Her2, Normal, Claudin-low]
    * PAM50_AB =  [LumA, LumB, Basal, Her2, Normal]
    * SCMOD2 = [ER-/HER2-, ER+/HER2- High Prolif, ER+/HER2- Low Prolif,  HER2+]
    * IHC = [IHC_HER2, IHC_ER, IHC_PR, IHC_TNBC]
And for **isolated sample sets** corresponding to Luminal, Basal, LumA, NEC subtypes etc. 

In [4]:
for cl in known_groups_t.keys():
    if len(known_groups_t[cl].keys())>1:
        print("classification", cl)
        print("\tsbtypes:"," ".join(known_groups_t[cl].keys()))
    else:
        print(" classification", cl, "(individual subtype)")

classification PAM50
	sbtypes: Basal Normal Her2 Luminal
classification Intrinsic
	sbtypes: Basal Normal Her2 Luminal Claudin-low
classification PAM50_AB
	sbtypes: Basal Normal Her2 LumB LumA
classification SCMOD2
	sbtypes: ER+/HER2- Low Prolif ER+/HER2- High Prolif HER2+ ER-/HER2-
classification IHC
	sbtypes: IHC_HER2 IHC_ER IHC_PR IHC_TNBC
 classification Luminal (individual subtype)
 classification Basal (individual subtype)
 classification Her2 (individual subtype)
 classification LumA (individual subtype)
 classification LumB (individual subtype)
 classification Normal (individual subtype)
 classification Claudin-low (individual subtype)
 classification IHC_HER2 (individual subtype)
 classification IHC_ER (individual subtype)
 classification IHC_PR (individual subtype)
 classification IHC_TNBC (individual subtype)
 classification NET_kmeans (individual subtype)
 classification NET_ward (individual subtype)


# Example 2: 
## evaluation of the resulting sample set (on the example of UnPaSt file) 
reading the results 

In [5]:
# biclsuter file 
file = "results_on_real_data_tuned_v2/TCGA_optimized/TCGA-BRCA.consensus_seed=42.bin=kmeans,pval=0.01,clust=WGCNA,direction=DOWN-UP,ds=3,dch=0.995,max_power=10,precluster=True.biclusters.tsv"
result = read_bic_table(file) # reading UnPaSt outputs
print("sample clusters: ", result.shape[0])
# drop clusters too small with < 5 samples
result = result.loc[result["samples"].apply(lambda x: len(x))>=5,:]
# drop detected <2 times 
if "detected_n_times" in result.columns:
    result = result.loc[result["detected_n_times"]>1,:]
print("sample clusters: ", result.shape[0])
result.head(2)

sample clusters:  297
sample clusters:  209


Unnamed: 0_level_0,SNR,n_genes,n_samples,genes,samples,genes_up,genes_down,gene_indexes,sample_indexes,direction,detected_n_times
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0,2.931461,2,38,"{CTAG1B, CTAG1A}","{TCGA-A2-A0YJ-01, TCGA-BH-A0RX-01, TCGA-A2-A4S...","{CTAG1B, CTAG1A}",{},"{3385, 3386}","{1038, 529, 917, 1047, 279, 797, 289, 291, 48,...",UP,3
1,2.857572,331,195,"{CSN3, MIR137HG, ESX1, ABCA13, LINC01625, RPRM...","{TCGA-A2-A04Q-01, TCGA-AR-A2LR-01, TCGA-BH-A0B...","{GLRA2, NR5A1, CSN3, MUC16, TPD52L3, MIR137HG,...","{DNAAF3, TMEM63C, HOXB2, C9orf152, LIMA1, HCAR...","{12288, 16397, 14358, 16411, 30, 51, 4148, 824...","{1024, 514, 1030, 519, 8, 1035, 524, 1039, 16,...",BOTH,10


* ensure that results file is a dataframe with "samples" column
* each row in samples column must contain a non-empty set of samples
## performance evaluation
* requires *known_groups* dict and *all_samples* set  
     - using *make_ref_groups()* is recommened for this breast cancer analysis
     - alternatively, *known_groups* dict and *all_samples* can be created manually
* if samples in (bi)clusters do not match *all_samples* set, trho

*calculate_perfromance(bi_clusters_df, annotation, exprs)*

**input:**
  - bi_clusters_df - a dataframe with sample clusters (sets in "sample" column)
  - *known_groups* is a dictionary with known sample classifications. Each classification (e.g. PAM50 or IHC or Luminal) is a dict that can conatain one or several sample sets 
  - *all_samples* = {} set of all samples in expression and annotation files; necessary for computing overlap p-values
  
**returns:**
  - performances - *pandas.Series* with overall perforamnce for each classification from *known_groups* 
  - best_matches - a dataframe with information about the best matching (bi)cluster for each sample set from *known_groups* (helpful for debugging and validation)

In [6]:
performances, best_matches = calculate_perfromance(result, known_groups_t,all_samples_t)
print(performances["PAM50"])
performances.head(3)

0.8428274283130457


PAM50        0.842827
Intrinsic    0.813312
PAM50_AB     0.630003
dtype: float64

In [7]:
best_matches.head(5)

Unnamed: 0,bm_id,Jaccard,weight,adj_pval,is_enriched,samples,n_samples,classification
Basal,1,0.930693,0.180723,0.0,True,"{TCGA-A2-A04Q-01, TCGA-AR-A2LR-01, TCGA-BH-A0B...",195,PAM50
Normal,134,0.1,0.029657,0.0,True,"{TCGA-D8-A145-01, TCGA-BH-A0DK-01, TCGA-D8-A3Z...",309,PAM50
Her2,29,0.486842,0.100093,0.0,True,"{TCGA-S3-AA14-01, TCGA-EW-A2FR-01, TCGA-AC-A3Q...",118,PAM50
Luminal,11,0.903423,0.689527,0.0,False,"{TCGA-A8-A08H-01, TCGA-A2-A04Q-01, TCGA-AR-A2L...",266,PAM50
Basal,1,0.930693,0.173488,0.0,True,"{TCGA-A2-A04Q-01, TCGA-AR-A2LR-01, TCGA-BH-A0B...",195,Intrinsic


In [8]:
performances, best_matches = calculate_perfromance(result, known_groups_t,all_samples_t,
                                                   performance_measure="ARI")
print(performances["PAM50"])
best_matches.head(5)

0.7234354711836593


Unnamed: 0,bm_id,ARI,weight,adj_pval,is_enriched,samples,n_samples,classification
Basal,1,0.938534,0.180723,0.000337,True,"{TCGA-A2-A04Q-01, TCGA-AR-A2LR-01, TCGA-BH-A0B...",195,PAM50
Normal,26,0.091334,0.029657,0.000137,True,"{TCGA-AR-A2LR-01, TCGA-W8-A86G-01, TCGA-E2-A1I...",191,PAM50
Her2,29,0.559805,0.100093,0.000149,True,"{TCGA-S3-AA14-01, TCGA-EW-A2FR-01, TCGA-AC-A3Q...",118,PAM50
Luminal,11,0.717999,0.689527,0.000602,False,"{TCGA-A8-A08H-01, TCGA-A2-A04Q-01, TCGA-AR-A2L...",266,PAM50
Basal,1,0.938534,0.173488,0.000337,True,"{TCGA-A2-A04Q-01, TCGA-AR-A2LR-01, TCGA-BH-A0B...",195,Intrinsic


In [14]:
def labels_to_clsuters(labels, all_samples):
    clust_results = {}
    for clust in set(labels):
        clust_results[clust] = {"samples":set(all_samples[np.where(labels == clust)[0]].values)}
        clust_results[clust]["n_samples"] = len(clust_results[clust]["samples"])
    clust_results = pd.DataFrame.from_dict(clust_results).T
    return clust_results

# selecting 5 seeds for probabilistic methods 
n_runs = 5
seeds = []
random.seed(42)
for i in range(n_runs):
    seeds.append(random.randint(0,1000000))
print("generate ",n_runs," seeds",seeds)

generate  5  seeds [670487, 116739, 26225, 777572, 288389]


# Inputs 

In [15]:
exprs = exprs_t
known_groups = known_groups_t
all_samples = all_samples_t
dataset = "TCGA-BRCA"
basedir = "clusterings_evaluation/"

In [24]:
exprs = exprs_m
known_groups = known_groups_m
all_samples = all_samples_m
dataset = "METABRIC"
basedir = "clusterings_evaluation/"


'TCGA-BRCA'

# Affinity Propagation

In [12]:
from sklearn.cluster import AffinityPropagation

In [13]:
damping_factors = list(np.arange(0.5,1.0,0.05))+[0.97,0.99] # [0.5, 1.0)
damping_factors = [round(x,2) for x in damping_factors]
n_iters = [200,1000]
len(n_iters)*len(damping_factors)

24

In [40]:
stats = {}
i=0
method = "AffinityPropagation"

if not os.path.exists(basedir+"/"+method+"/"):
    os.mkdir(basedir+"/"+method+"/")
    
for df in damping_factors:
    for n_iter in n_iters:
        for run in range(5):
            params = "damping="+str(df)+";max_iter="+str(n_iter)
            seed = seeds[run]
            labels = AffinityPropagation(damping=df, max_iter=n_iter,
                                         random_state=seed).fit_predict(exprs.T)

            clusters = labels_to_clsuters(labels,  exprs.columns)
            clusters = clusters.loc[clusters["n_samples"]>=5,:]
            performances, best_matches = calculate_perfromance(clusters, known_groups,all_samples,
                                                               performance_measure = "ARI")
            stats[i] = performances.to_dict()
            print(params,seed,stats[i]["PAM50"])
            stats[i]["parameters"]=params
            stats[i]["seed"]= seed
            
            # saving clusters 
            clusters["samples"] = clusters["samples"].apply(lambda row: " ".join(sorted(row)))
            clusters = clusters.loc[:,["n_samples","samples"]] 
            clusters.to_csv(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".seed="+str(seed)+".clusters.tsv",sep="\t")
            
            i+=1
stats = pd.DataFrame.from_dict(stats).T
# saving performances
stats.to_csv(basedir +"/"+method+"_"+dataset+".tsv",sep="\t")
stats.sort_values(by="PAM50",ascending=False)

damping=0.5;max_iter=200 670487 0.15948719230192882
damping=0.5;max_iter=200 116739 0.15948719230192882
damping=0.5;max_iter=200 26225 0.15948719230192882
damping=0.5;max_iter=200 777572 0.15948719230192882
damping=0.5;max_iter=200 288389 0.15948719230192882
damping=0.5;max_iter=1000 670487 0.15948719230192882
damping=0.5;max_iter=1000 116739 0.15948719230192882
damping=0.5;max_iter=1000 26225 0.15948719230192882
damping=0.5;max_iter=1000 777572 0.15948719230192882
damping=0.5;max_iter=1000 288389 0.15948719230192882
damping=0.55;max_iter=200 670487 0.15948719230192882
damping=0.55;max_iter=200 116739 0.15948719230192882
damping=0.55;max_iter=200 26225 0.15948719230192882
damping=0.55;max_iter=200 777572 0.15948719230192882
damping=0.55;max_iter=200 288389 0.15948719230192882
damping=0.55;max_iter=1000 670487 0.15948719230192882
damping=0.55;max_iter=1000 116739 0.15948719230192882
damping=0.55;max_iter=1000 26225 0.15948719230192882
damping=0.55;max_iter=1000 777572 0.1594871923019288

Unnamed: 0,PAM50,Intrinsic,PAM50_AB,SCMOD2,IHC,Luminal,Basal,Her2,LumA,LumB,Normal,Claudin-low,IHC_HER2,IHC_ER,IHC_PR,IHC_TNBC,NET_kmeans,NET_ward,parameters,seed
93,0.166511,0.169423,0.113845,0.097402,0.120645,0.11609,0.300697,0.29264,0.062559,0.02674,0.262005,0.23389,0.22551,0.15507,0.013887,0.230157,0.3026,0.388272,damping=0.95;max_iter=200,777572
100,0.166511,0.169423,0.114046,0.098626,0.120645,0.11609,0.300697,0.29264,0.062413,0.027352,0.262005,0.23389,0.22551,0.15507,0.013887,0.230157,0.3026,0.388272,damping=0.97;max_iter=200,670487
90,0.166511,0.169423,0.113845,0.097402,0.120645,0.11609,0.300697,0.29264,0.062559,0.02674,0.262005,0.23389,0.22551,0.15507,0.013887,0.230157,0.3026,0.388272,damping=0.95;max_iter=200,670487
91,0.166511,0.169423,0.113845,0.097402,0.120645,0.11609,0.300697,0.29264,0.062559,0.02674,0.262005,0.23389,0.22551,0.15507,0.013887,0.230157,0.3026,0.388272,damping=0.95;max_iter=200,116739
92,0.166511,0.169423,0.113845,0.097402,0.120645,0.11609,0.300697,0.29264,0.062559,0.02674,0.262005,0.23389,0.22551,0.15507,0.013887,0.230157,0.3026,0.388272,damping=0.95;max_iter=200,26225
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,0.149671,0.156926,0.121443,0.111155,0.098198,0.131508,0.2631,0.104573,0.132895,0.060075,0.299022,0.317557,0.081449,0.136945,0.018846,0.190133,0.11377,0.094568,damping=0.99;max_iter=200,288389
115,0.149671,0.156926,0.121443,0.111155,0.098198,0.131508,0.2631,0.104573,0.132895,0.060075,0.299022,0.317557,0.081449,0.136945,0.018846,0.190133,0.11377,0.094568,damping=0.99;max_iter=1000,670487
117,0.149671,0.156926,0.121443,0.111155,0.098198,0.131508,0.2631,0.104573,0.132895,0.060075,0.299022,0.317557,0.081449,0.136945,0.018846,0.190133,0.11377,0.094568,damping=0.99;max_iter=1000,26225
118,0.149671,0.156926,0.121443,0.111155,0.098198,0.131508,0.2631,0.104573,0.132895,0.060075,0.299022,0.317557,0.081449,0.136945,0.018846,0.190133,0.11377,0.094568,damping=0.99;max_iter=1000,777572


### Hierarchical clusteirng
Agglomerative clusteirng

In [44]:
from sklearn.cluster import AgglomerativeClustering 

linkages = ["ward", "complete", "average", "single"]
ks = list(range(2,21))
len(ks)*len(linkages)

76

In [45]:
stats = {}
i=0

method = "HierarchicalClustering"
if not os.path.exists(basedir+"/"+method+"/"):
    os.mkdir(basedir+"/"+method+"/")
    
for k in ks:
    for linkage in linkages:
        params = "k="+str(k)+";linkage="+str(linkage)
        labels =  AgglomerativeClustering(n_clusters=k,
                                          compute_full_tree='auto',
                                          linkage=linkage).fit_predict(exprs.T)
        clusters = labels_to_clsuters(labels,  exprs.columns)
        clusters = clusters.loc[clusters["n_samples"]>=5,:]
        performances, best_matches = calculate_perfromance(clusters, known_groups,all_samples,
                                                           performance_measure = "ARI")
        stats[i] = performances.to_dict()
        print(params,stats[i]["PAM50"])
        stats[i]["parameters"]=params
        # saving clusters 
        clusters["samples"] = clusters["samples"].apply(lambda row: " ".join(sorted(row)))
        clusters = clusters.loc[:,["n_samples","samples"]] 
        clusters.to_csv(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".seed="+str(seed)+".clusters.tsv",sep="\t")
        i+=1
stats = pd.DataFrame.from_dict(stats).T
# saving performances
stats.to_csv(basedir +"/"+method+"_"+dataset+".tsv",sep="\t")
stats.sort_values(by = "PAM50",ascending=False)

k=2;linkage=ward 0.43335484448806205
k=2;linkage=complete 0.0
k=2;linkage=average 0.00235646018430768
k=2;linkage=single 0.0
k=3;linkage=ward 0.4385604896386175
k=3;linkage=complete 0.0
k=3;linkage=average 0.00235646018430768
k=3;linkage=single 0.00235646018430768
k=4;linkage=ward 0.43546495595039086
k=4;linkage=complete 0.0323988505857944
k=4;linkage=average 0.0
k=4;linkage=single 0.008214422837779997
k=5;linkage=ward 0.4355970306029428
k=5;linkage=complete 0.0323988505857944
k=5;linkage=average 0.011644716818906636
k=5;linkage=single 0.010948864204035524
k=6;linkage=ward 0.4776220978043249
k=6;linkage=complete 0.12430909585028418
k=6;linkage=average 0.02246494502748311
k=6;linkage=single 0.013681541748270847
k=7;linkage=ward 0.4776220978043249
k=7;linkage=complete 0.12430909585028418
k=7;linkage=average 0.02312881699058949
k=7;linkage=single 0.016412506117806168
k=8;linkage=ward 0.2639698399920336
k=8;linkage=complete 0.31882607694572085
k=8;linkage=average 0.025746866523425206
k=8;l

Unnamed: 0,PAM50,Intrinsic,PAM50_AB,SCMOD2,IHC,Luminal,Basal,Her2,LumA,LumB,Normal,Claudin-low,IHC_HER2,IHC_ER,IHC_PR,IHC_TNBC,NET_kmeans,NET_ward,parameters
20,0.477622,0.461565,0.334065,0.345684,0.322449,0.42049,0.851772,0.319872,0.214842,0.208882,0.058364,0.076547,0.121945,0.4032,0.245442,0.498923,0.149488,0.114361,k=7;linkage=ward
16,0.477622,0.461565,0.334065,0.345684,0.322449,0.42049,0.851772,0.319872,0.214842,0.208882,0.058364,0.076547,0.121945,0.4032,0.245442,0.498923,0.118502,0.082501,k=6;linkage=ward
4,0.43856,0.424067,0.150439,0.163245,0.307481,0.42049,0.851772,-0.046752,0.02839,-0.031864,-0.02139,0.076547,-0.043375,0.4032,0.245442,0.498923,0.118502,0.082501,k=3;linkage=ward
12,0.435597,0.421222,0.244479,0.272155,0.314934,0.42049,0.851772,-0.092976,0.094375,0.208882,0.034693,0.076547,0.038942,0.4032,0.245442,0.498923,0.118502,0.082501,k=5;linkage=ward
8,0.435465,0.421095,0.245457,0.269576,0.314934,0.42049,0.851772,-0.092976,0.097187,0.208882,0.030239,0.076547,0.038942,0.4032,0.245442,0.498923,0.118502,0.082501,k=4;linkage=ward
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2,0.002356,0.002262,0.002356,0.002202,0.0,0.0,0.013039,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,k=2;linkage=average
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,k=3;linkage=complete
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,k=2;linkage=complete
3,0.0,0.0016,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03996,0.0,0.0,0.0,0.0,0.0,0.0,k=2;linkage=single


# k-means

In [25]:
from sklearn.cluster import KMeans

inits = ['k-means++','random']
max_iters=[300,1000]
tols = [1e-3,1e-4,1e-5]
ks = list(range(2,21))
len(ks)*len(inits)*len(max_iters)*len(tols)

228

In [26]:
stats = {}
i=0

method = "kmeans"
if not os.path.exists(basedir+"/"+method+"/"):
    os.mkdir(basedir+"/"+method+"/")
    
for init in inits:
    for n_iters in max_iters:
        for seed in seeds:
            for tol in tols:
                for k in ks:
                    params = "k="+str(k)+";max_iter="+str(n_iters)+";init="+str(init)+";tol="+str(tol)
                    labels = KMeans(n_clusters=k,  init=init, 
                           n_init=10, max_iter=n_iters, 
                           tol=tol,
                           random_state=seed,
                           algorithm='lloyd').fit_predict(exprs.T)
                    clusters = labels_to_clsuters(labels,  exprs.columns)
                    clusters = clusters.loc[clusters["n_samples"]>=5,:]
                    performances, best_matches = calculate_perfromance(clusters, known_groups,all_samples,
                                                                      performance_measure = "ARI")
                    stats[i] = performances.to_dict()
                    stats[i]["seed"]=seed
                    stats[i]["parameters"]=params
                    print(params,seed,stats[i]["PAM50"])
                    # saving clusters 
                    clusters["samples"] = clusters["samples"].apply(lambda row: " ".join(sorted(row)))
                    clusters = clusters.loc[:,["n_samples","samples"]] 
                    clusters.to_csv(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".seed="+str(seed)+".clusters.tsv",sep="\t")
                    i+=1
stats = pd.DataFrame.from_dict(stats).T
# saving performances
stats.to_csv(basedir +"/"+method+"_"+dataset+".tsv",sep="\t")
stats.sort_values(by = "PAM50",ascending=False)

k=2;max_iter=300;init=k-means++;tol=0.001 670487 0.41123148150788613
k=3;max_iter=300;init=k-means++;tol=0.001 670487 0.5256893548969211
k=4;max_iter=300;init=k-means++;tol=0.001 670487 0.5028401200817264
k=5;max_iter=300;init=k-means++;tol=0.001 670487 0.46767091275851724
k=6;max_iter=300;init=k-means++;tol=0.001 670487 0.48780923551486866
k=7;max_iter=300;init=k-means++;tol=0.001 670487 0.48920302981942565
k=8;max_iter=300;init=k-means++;tol=0.001 670487 0.361304271040795
k=9;max_iter=300;init=k-means++;tol=0.001 670487 0.3272839242750194
k=10;max_iter=300;init=k-means++;tol=0.001 670487 0.3887857116449982
k=11;max_iter=300;init=k-means++;tol=0.001 670487 0.3731945665059493
k=12;max_iter=300;init=k-means++;tol=0.001 670487 0.30700351922916835
k=13;max_iter=300;init=k-means++;tol=0.001 670487 0.25087223014976895
k=14;max_iter=300;init=k-means++;tol=0.001 670487 0.23914210849343026
k=15;max_iter=300;init=k-means++;tol=0.001 670487 0.20953987237737032
k=16;max_iter=300;init=k-means++;to

k=7;max_iter=300;init=k-means++;tol=0.001 26225 0.4990205998158106
k=8;max_iter=300;init=k-means++;tol=0.001 26225 0.36363255119092136
k=9;max_iter=300;init=k-means++;tol=0.001 26225 0.3131158605380072
k=10;max_iter=300;init=k-means++;tol=0.001 26225 0.3816232918046123
k=11;max_iter=300;init=k-means++;tol=0.001 26225 0.2954130809836477
k=12;max_iter=300;init=k-means++;tol=0.001 26225 0.2795676359693252
k=13;max_iter=300;init=k-means++;tol=0.001 26225 0.25569122629571395
k=14;max_iter=300;init=k-means++;tol=0.001 26225 0.22700954070153517
k=15;max_iter=300;init=k-means++;tol=0.001 26225 0.23521311970028225
k=16;max_iter=300;init=k-means++;tol=0.001 26225 0.16322574395899903
k=17;max_iter=300;init=k-means++;tol=0.001 26225 0.21887737364155574
k=18;max_iter=300;init=k-means++;tol=0.001 26225 0.21242983709321847
k=19;max_iter=300;init=k-means++;tol=0.001 26225 0.2566226635762301
k=20;max_iter=300;init=k-means++;tol=0.001 26225 0.17479639968210547
k=2;max_iter=300;init=k-means++;tol=0.0001 

k=12;max_iter=300;init=k-means++;tol=0.001 288389 0.23843939084863064
k=13;max_iter=300;init=k-means++;tol=0.001 288389 0.277859854487182
k=14;max_iter=300;init=k-means++;tol=0.001 288389 0.22718988990047273
k=15;max_iter=300;init=k-means++;tol=0.001 288389 0.23067423397302672
k=16;max_iter=300;init=k-means++;tol=0.001 288389 0.20991816218098186
k=17;max_iter=300;init=k-means++;tol=0.001 288389 0.3215455254717931
k=18;max_iter=300;init=k-means++;tol=0.001 288389 0.20618155416118863
k=19;max_iter=300;init=k-means++;tol=0.001 288389 0.2571931666119668
k=20;max_iter=300;init=k-means++;tol=0.001 288389 0.20414901093470023
k=2;max_iter=300;init=k-means++;tol=0.0001 288389 0.4231466434101364
k=3;max_iter=300;init=k-means++;tol=0.0001 288389 0.5254176670027062
k=4;max_iter=300;init=k-means++;tol=0.0001 288389 0.47694135325648757
k=5;max_iter=300;init=k-means++;tol=0.0001 288389 0.46767091275851724
k=6;max_iter=300;init=k-means++;tol=0.0001 288389 0.4770923868119825
k=7;max_iter=300;init=k-mea

k=16;max_iter=1000;init=k-means++;tol=0.001 116739 0.24851497961690155
k=17;max_iter=1000;init=k-means++;tol=0.001 116739 0.24761159324183374
k=18;max_iter=1000;init=k-means++;tol=0.001 116739 0.21105299256861232
k=19;max_iter=1000;init=k-means++;tol=0.001 116739 0.19908613203315784
k=20;max_iter=1000;init=k-means++;tol=0.001 116739 0.18092559565063826
k=2;max_iter=1000;init=k-means++;tol=0.0001 116739 0.5778080519260934
k=3;max_iter=1000;init=k-means++;tol=0.0001 116739 0.5292529899833144
k=4;max_iter=1000;init=k-means++;tol=0.0001 116739 0.48000795507850125
k=5;max_iter=1000;init=k-means++;tol=0.0001 116739 0.4700988432417838
k=6;max_iter=1000;init=k-means++;tol=0.0001 116739 0.4936838449166714
k=7;max_iter=1000;init=k-means++;tol=0.0001 116739 0.3761000286421021
k=8;max_iter=1000;init=k-means++;tol=0.0001 116739 0.4847721648556925
k=9;max_iter=1000;init=k-means++;tol=0.0001 116739 0.32560684007403334
k=10;max_iter=1000;init=k-means++;tol=0.0001 116739 0.22876905751140936
k=11;max_it

k=20;max_iter=1000;init=k-means++;tol=0.001 777572 0.18545139268816396
k=2;max_iter=1000;init=k-means++;tol=0.0001 777572 0.41631732537741156
k=3;max_iter=1000;init=k-means++;tol=0.0001 777572 0.5116550905596369
k=4;max_iter=1000;init=k-means++;tol=0.0001 777572 0.47726334822138305
k=5;max_iter=1000;init=k-means++;tol=0.0001 777572 0.48631849203017186
k=6;max_iter=1000;init=k-means++;tol=0.0001 777572 0.4824733828829334
k=7;max_iter=1000;init=k-means++;tol=0.0001 777572 0.4942736166798346
k=8;max_iter=1000;init=k-means++;tol=0.0001 777572 0.36190871220290166
k=9;max_iter=1000;init=k-means++;tol=0.0001 777572 0.2995750749685797
k=10;max_iter=1000;init=k-means++;tol=0.0001 777572 0.3870011672774454
k=11;max_iter=1000;init=k-means++;tol=0.0001 777572 0.3846072954662195
k=12;max_iter=1000;init=k-means++;tol=0.0001 777572 0.3104585226609029
k=13;max_iter=1000;init=k-means++;tol=0.0001 777572 0.24416438244324992
k=14;max_iter=1000;init=k-means++;tol=0.0001 777572 0.1876997787481284
k=15;max_

k=5;max_iter=300;init=random;tol=0.0001 670487 0.45608225748159326
k=6;max_iter=300;init=random;tol=0.0001 670487 0.48767140491493705
k=7;max_iter=300;init=random;tol=0.0001 670487 0.48253363337095656
k=8;max_iter=300;init=random;tol=0.0001 670487 0.3520953238439611
k=9;max_iter=300;init=random;tol=0.0001 670487 0.4532161258580707
k=10;max_iter=300;init=random;tol=0.0001 670487 0.23332340013421562
k=11;max_iter=300;init=random;tol=0.0001 670487 0.34861089301186876
k=12;max_iter=300;init=random;tol=0.0001 670487 0.23984541141784016
k=13;max_iter=300;init=random;tol=0.0001 670487 0.23984541141784016
k=14;max_iter=300;init=random;tol=0.0001 670487 0.2874923685227689
k=15;max_iter=300;init=random;tol=0.0001 670487 0.22531372573065733
k=16;max_iter=300;init=random;tol=0.0001 670487 0.3496821737662815
k=17;max_iter=300;init=random;tol=0.0001 670487 0.21485631497178803
k=18;max_iter=300;init=random;tol=0.0001 670487 0.21845303087358003
k=19;max_iter=300;init=random;tol=0.0001 670487 0.2087358

k=15;max_iter=300;init=random;tol=0.0001 26225 0.22777448404511902
k=16;max_iter=300;init=random;tol=0.0001 26225 0.23241940382853465
k=17;max_iter=300;init=random;tol=0.0001 26225 0.23789628553414022
k=18;max_iter=300;init=random;tol=0.0001 26225 0.23026577401303386
k=19;max_iter=300;init=random;tol=0.0001 26225 0.22129061266851174
k=20;max_iter=300;init=random;tol=0.0001 26225 0.21597014306179912
k=2;max_iter=300;init=random;tol=1e-05 26225 0.5614308607697318
k=3;max_iter=300;init=random;tol=1e-05 26225 0.5256893548969211
k=4;max_iter=300;init=random;tol=1e-05 26225 0.502699898726736
k=5;max_iter=300;init=random;tol=1e-05 26225 0.49038387230216085
k=6;max_iter=300;init=random;tol=1e-05 26225 0.48948837018272273
k=7;max_iter=300;init=random;tol=1e-05 26225 0.48435267930148995
k=8;max_iter=300;init=random;tol=1e-05 26225 0.3433309240566177
k=9;max_iter=300;init=random;tol=1e-05 26225 0.2874865161910125
k=10;max_iter=300;init=random;tol=1e-05 26225 0.3501767857202956
k=11;max_iter=300;i

k=6;max_iter=300;init=random;tol=1e-05 288389 0.46183788541099113
k=7;max_iter=300;init=random;tol=1e-05 288389 0.4891524194144218
k=8;max_iter=300;init=random;tol=1e-05 288389 0.4838737820325937
k=9;max_iter=300;init=random;tol=1e-05 288389 0.4832980944680245
k=10;max_iter=300;init=random;tol=1e-05 288389 0.4840734791077145
k=11;max_iter=300;init=random;tol=1e-05 288389 0.31768884667194763
k=12;max_iter=300;init=random;tol=1e-05 288389 0.30702553194042226
k=13;max_iter=300;init=random;tol=1e-05 288389 0.2713378925583187
k=14;max_iter=300;init=random;tol=1e-05 288389 0.2659767674219479
k=15;max_iter=300;init=random;tol=1e-05 288389 0.2635905265831965
k=16;max_iter=300;init=random;tol=1e-05 288389 0.27037388333767154
k=17;max_iter=300;init=random;tol=1e-05 288389 0.24759562921790942
k=18;max_iter=300;init=random;tol=1e-05 288389 0.2395293305094488
k=19;max_iter=300;init=random;tol=1e-05 288389 0.24133054098269355
k=20;max_iter=300;init=random;tol=1e-05 288389 0.24380265786387106
k=2;max

k=14;max_iter=1000;init=random;tol=1e-05 116739 0.23727607242706922
k=15;max_iter=1000;init=random;tol=1e-05 116739 0.23972675139090405
k=16;max_iter=1000;init=random;tol=1e-05 116739 0.2410562938409902
k=17;max_iter=1000;init=random;tol=1e-05 116739 0.24376133642474912
k=18;max_iter=1000;init=random;tol=1e-05 116739 0.18598586956571347
k=19;max_iter=1000;init=random;tol=1e-05 116739 0.23793355237172814
k=20;max_iter=1000;init=random;tol=1e-05 116739 0.22807294781986204
k=2;max_iter=1000;init=random;tol=0.001 26225 0.5614308607697318
k=3;max_iter=1000;init=random;tol=0.001 26225 0.5256893548969211
k=4;max_iter=1000;init=random;tol=0.001 26225 0.502699898726736
k=5;max_iter=1000;init=random;tol=0.001 26225 0.49038387230216085
k=6;max_iter=1000;init=random;tol=0.001 26225 0.48948837018272273
k=7;max_iter=1000;init=random;tol=0.001 26225 0.48435267930148995
k=8;max_iter=1000;init=random;tol=0.001 26225 0.3433309240566177
k=9;max_iter=1000;init=random;tol=0.001 26225 0.2874865161910125
k=1

k=4;max_iter=1000;init=random;tol=0.001 288389 0.48290041617512425
k=5;max_iter=1000;init=random;tol=0.001 288389 0.49884956156987714
k=6;max_iter=1000;init=random;tol=0.001 288389 0.46183788541099113
k=7;max_iter=1000;init=random;tol=0.001 288389 0.4891524194144218
k=8;max_iter=1000;init=random;tol=0.001 288389 0.4838737820325937
k=9;max_iter=1000;init=random;tol=0.001 288389 0.4832980944680245
k=10;max_iter=1000;init=random;tol=0.001 288389 0.4840734791077145
k=11;max_iter=1000;init=random;tol=0.001 288389 0.31768884667194763
k=12;max_iter=1000;init=random;tol=0.001 288389 0.30702553194042226
k=13;max_iter=1000;init=random;tol=0.001 288389 0.2713378925583187
k=14;max_iter=1000;init=random;tol=0.001 288389 0.2659767674219479
k=15;max_iter=1000;init=random;tol=0.001 288389 0.2635905265831965
k=16;max_iter=1000;init=random;tol=0.001 288389 0.27037388333767154
k=17;max_iter=1000;init=random;tol=0.001 288389 0.24759562921790942
k=18;max_iter=1000;init=random;tol=0.001 288389 0.23952933050

Unnamed: 0,PAM50,Intrinsic,PAM50_AB,SCMOD2,IHC,Luminal,Basal,Her2,LumA,LumB,Normal,Claudin-low,IHC_HER2,IHC_ER,IHC_PR,IHC_TNBC,NET_kmeans,NET_ward,seed,parameters
95,0.577808,0.557845,0.149836,0.154611,0.421438,0.62703,0.765629,0.070804,0.049438,-0.05136,0.0,0.079185,0.0,0.547174,0.375323,0.410084,-0.043854,-0.032562,116739,k=2;max_iter=300;init=k-means++;tol=1e-05
380,0.577808,0.557845,0.149836,0.154611,0.421438,0.62703,0.765629,0.070804,0.049438,-0.05136,0.0,0.079185,0.0,0.547174,0.375323,0.410084,-0.043854,-0.032562,116739,k=2;max_iter=1000;init=k-means++;tol=1e-05
342,0.577808,0.557845,0.149836,0.154611,0.421438,0.62703,0.765629,0.070804,0.049438,-0.05136,0.0,0.079185,0.0,0.547174,0.375323,0.410084,-0.043854,-0.032562,116739,k=2;max_iter=1000;init=k-means++;tol=0.001
76,0.577808,0.557845,0.149836,0.154611,0.421438,0.62703,0.765629,0.070804,0.049438,-0.05136,0.0,0.079185,0.0,0.547174,0.375323,0.410084,-0.043854,-0.032562,116739,k=2;max_iter=300;init=k-means++;tol=0.0001
57,0.577808,0.557845,0.149836,0.154611,0.421438,0.62703,0.765629,0.070804,0.049438,-0.05136,0.0,0.079185,0.0,0.547174,0.375323,0.410084,-0.043854,-0.032562,116739,k=2;max_iter=300;init=k-means++;tol=0.001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,0.163226,0.161852,0.191018,0.176292,0.115067,0.109247,0.280274,0.326962,0.125868,0.181282,0.152357,0.128921,0.204949,0.126072,0.05887,0.248534,0.150399,0.135788,26225,k=16;max_iter=300;init=k-means++;tol=0.0001
413,0.163226,0.161852,0.191018,0.176292,0.115067,0.109247,0.280274,0.326962,0.125868,0.181282,0.152357,0.128921,0.204949,0.126072,0.05887,0.248534,0.150399,0.135788,26225,k=16;max_iter=1000;init=k-means++;tol=0.001
432,0.163226,0.161852,0.191018,0.176292,0.115067,0.109247,0.280274,0.326962,0.125868,0.181282,0.152357,0.128921,0.204949,0.126072,0.05887,0.248534,0.150399,0.135788,26225,k=16;max_iter=1000;init=k-means++;tol=0.0001
128,0.163226,0.161852,0.191018,0.176292,0.115067,0.109247,0.280274,0.326962,0.125868,0.181282,0.152357,0.128921,0.204949,0.126072,0.05887,0.248534,0.150399,0.135788,26225,k=16;max_iter=300;init=k-means++;tol=0.001


# GMM

In [1]:
from sklearn.mixture import GaussianMixture

#init_params = ['kmeans','k-means++','random','random_from_data']
covariance_types = ["full", "spherical"]
n_inits = [1,10]
tols = [1e-3,1e-4,1e-5]
max_iters=[100,1000]
ks = list(range(2,21))
len(ks)*len(max_iters)*len(tols)*len(n_inits)*len(covariance_types)

456

In [None]:
stats = {}
i=0

method = "GMM"
if not os.path.exists(basedir+"/"+method+"/"):
    os.mkdir(basedir+"/"+method+"/")
    
for k in ks:
    for max_iter in max_iters:
        for tol in tols:
            for n_init in n_inits:
                for cov_type in covariance_types:
                    for seed in seeds:
                        params = "k="+str(k)+";max_iter="+str(max_iter)+";tol="+str(tol)
                        params += ";n_inits="+str(n_init)+";cov_type="+str(cov_type)
                        labels = GaussianMixture(n_components=k, 
                                        covariance_type=cov_type, 
                                        tol=tol, 
                                        max_iter=max_iter, 
                                        n_init=n_init, 
                                        #init_params='kmeans', 
                                        random_state=seed).fit_predict(exprs.T)
                        clusters = labels_to_clsuters(labels,  exprs.columns)
                        clusters = clusters.loc[clusters["n_samples"]>=5,:]
                        performances, best_matches = calculate_perfromance(clusters, known_groups,all_samples,
                                                                          performance_measure = "ARI")
                        stats[i] = performances.to_dict()
                        stats[i]["seed"]=seed
                        stats[i]["parameters"]=params
                        print(params,seed,stats[i]["PAM50"])
                        # saving clusters 
                        clusters["samples"] = clusters["samples"].apply(lambda row: " ".join(sorted(row)))
                        clusters = clusters.loc[:,["n_samples","samples"]] 
                        clusters.to_csv(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".seed="+str(seed)+".clusters.tsv",sep="\t")
                        i+=1
stats = pd.DataFrame.from_dict(stats).T
# saving performances
stats.to_csv(basedir +"/"+method+"_"+dataset+".tsv",sep="\t")
stats.sort_values(by = "PAM50",ascending=False)

# DBSCAN

In [21]:
from sklearn.cluster import DBSCAN

In [22]:
epsilons = [0.1,0.5,1,2,5] + list(np.arange(10,210,10))
min_samples = [3,5,10,20]
len(epsilons) * len(min_samples)

100

In [23]:
stats = {}
i=0
method = "DBSCAN"
if not os.path.exists(basedir+"/"+method+"/"):
    os.mkdir(basedir+"/"+method+"/")

for eps in epsilons:
    for min_s in min_samples:
        labels = DBSCAN(eps=eps, min_samples=min_s).fit_predict(exprs.T)
        params = "eps="+str(eps)+";min_samples="+str(min_s)
        clusters = labels_to_clsuters(labels,  exprs.columns)
        clusters = clusters.loc[clusters["n_samples"]>=5,:]
        performances, best_matches = calculate_perfromance(clusters, known_groups,all_samples,
                                                          performance_measure = "ARI")
        stats[i] = performances.to_dict()
        stats[i]["parameters"]=params
        print(params,stats[i]["PAM50"])
        # saving clusters 
        clusters["samples"] = clusters["samples"].apply(lambda row: " ".join(sorted(row)))
        clusters = clusters.loc[:,["n_samples","samples"]] 
        clusters.to_csv(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".clusters.tsv",sep="\t")
        i+=1
stats = pd.DataFrame.from_dict(stats).T
# saving performances
stats.to_csv(basedir +"/"+method+"_"+dataset+".tsv",sep="\t")
stats.sort_values(by = "PAM50",ascending=False)

eps=0.1;min_samples=3 0.0
eps=0.1;min_samples=5 0.0
eps=0.1;min_samples=10 0.0
eps=0.1;min_samples=20 0.0
eps=0.5;min_samples=3 0.0
eps=0.5;min_samples=5 0.0
eps=0.5;min_samples=10 0.0
eps=0.5;min_samples=20 0.0
eps=1;min_samples=3 0.0
eps=1;min_samples=5 0.0
eps=1;min_samples=10 0.0
eps=1;min_samples=20 0.0
eps=2;min_samples=3 0.0
eps=2;min_samples=5 0.0
eps=2;min_samples=10 0.0
eps=2;min_samples=20 0.0
eps=5;min_samples=3 0.0
eps=5;min_samples=5 0.0
eps=5;min_samples=10 0.0
eps=5;min_samples=20 0.0
eps=10;min_samples=3 0.0
eps=10;min_samples=5 0.0
eps=10;min_samples=10 0.0
eps=10;min_samples=20 0.0
eps=20;min_samples=3 0.0
eps=20;min_samples=5 0.0
eps=20;min_samples=10 0.0
eps=20;min_samples=20 0.0
eps=30;min_samples=3 0.0
eps=30;min_samples=5 0.0
eps=30;min_samples=10 0.0
eps=30;min_samples=20 0.0
eps=40;min_samples=3 0.0
eps=40;min_samples=5 0.0
eps=40;min_samples=10 0.0
eps=40;min_samples=20 0.0
eps=50;min_samples=3 0.0
eps=50;min_samples=5 0.0
eps=50;min_samples=10 0.0
eps=50;min

Unnamed: 0,PAM50,Intrinsic,PAM50_AB,SCMOD2,IHC,Luminal,Basal,Her2,LumA,LumB,Normal,Claudin-low,IHC_HER2,IHC_ER,IHC_PR,IHC_TNBC,NET_kmeans,NET_ward,parameters
75,0.217263,0.208565,0.088068,0.065293,0.191332,0.237341,0.277705,0.03419,0.087278,0.0,0.0,0.0,0.0,0.22075,0.210752,0.137233,0.03596,0.026728,eps=140;min_samples=20
76,0.208056,0.199726,0.063817,0.037913,0.162408,0.212049,0.342193,0.0,0.005003,0.0,0.0,0.0,0.0,0.199343,0.151218,0.198001,0.055248,0.042462,eps=150;min_samples=3
77,0.200124,0.192112,0.054538,0.035653,0.155291,0.203481,0.330998,0.0,-0.013375,0.0,0.0,0.0,0.0,0.191018,0.143991,0.190061,0.051975,0.039981,eps=150;min_samples=5
78,0.197699,0.189784,0.054708,0.035919,0.153987,0.200696,0.328201,0.0,-0.011666,0.0,0.0,0.0,0.0,0.188593,0.144505,0.183909,0.04947,0.0,eps=150;min_samples=10
79,0.196837,0.188956,0.055133,0.04163,0.154507,0.199604,0.3276,0.0,-0.010314,0.0,0.0,0.0,0.0,0.187807,0.146445,0.18568,0.047656,0.0,eps=150;min_samples=20
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,-0.053317,-0.051182,-0.003389,0.020198,-0.052856,-0.046396,-0.089214,-0.081512,0.092455,-0.062997,0.099691,0.0,-0.058883,-0.066855,-0.031436,-0.075279,-0.055336,0.0,eps=100;min_samples=10
62,-0.054285,-0.052112,0.06502,0.084589,-0.040375,-0.043206,-0.097009,-0.084153,0.264416,-0.050493,0.049298,0.0,-0.057974,-0.058092,-0.008451,-0.083541,-0.048148,-0.032516,eps=110;min_samples=10
57,-0.055639,-0.053412,0.004273,0.028652,-0.043864,-0.048419,-0.093199,-0.084288,0.116604,-0.066203,0.102078,0.0,-0.058267,-0.068266,-0.006357,-0.078622,-0.056288,0.0,eps=100;min_samples=5
63,-0.058242,-0.05591,0.0639,0.078591,-0.044113,-0.047235,-0.099847,-0.089081,0.267583,-0.054533,0.043456,0.0,-0.065071,-0.062175,-0.011403,-0.08496,-0.048986,-0.033025,eps=110;min_samples=20


# HDBSCAN

In [43]:
from sklearn.cluster import HDBSCAN

min_samples = [5]#[3,5,10,20]
cluster_selection_epsilon = [0.0,150]
alphas = [1.0]#[1.0,0.5,1.5]

len(min_samples)*len(alphas)

ImportError: cannot import name 'HDBSCAN' from 'sklearn.cluster' (/home/olya/anaconda3/lib/python3.8/site-packages/sklearn/cluster/__init__.py)

In [None]:
stats = {}
i=0
method = "HDBSCAN"
if not os.path.exists(basedir+"/"+method+"/"):
    os.mkdir(basedir+"/"+method+"/")

for eps in epsilons:
    for min_s in cluster_selection_epsilon:
        for alpha in alphas:
            
            labels = HDBSCAN(min_samples=5, 
                    cluster_selection_epsilon=0.0, 
                    max_cluster_size=None, 
                    alpha=1.0, 
                    algorithm='auto').fit_predict(exprs.T)
            
            params = "cluster_selection_epsilon="+str(eps)+";min_samples="+str(min_s)+";alpha="+str(alpha)
            clusters = labels_to_clsuters(labels,  exprs.columns)
            clusters = clusters.loc[clusters["n_samples"]>=5,:]
            performances, best_matches = calculate_perfromance(clusters, known_groups,all_samples,
                                                              performance_measure = "ARI")
            stats[i] = performances.to_dict()
            stats[i]["parameters"]=params
            print(params,stats[i]["PAM50"])
            # saving clusters 
            clusters["samples"] = clusters["samples"].apply(lambda row: " ".join(sorted(row)))
            clusters = clusters.loc[:,["n_samples","samples"]] 
            clusters.to_csv(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".clusters.tsv",sep="\t")
            i+=1
stats = pd.DataFrame.from_dict(stats).T
# saving performances
stats.to_csv(basedir +"/"+method+"_"+dataset+".tsv",sep="\t")
stats.sort_values(by = "PAM50",ascending=False)

### OPTICS - not working?

In [148]:
from sklearn.cluster import OPTICS

In [149]:
min_samples = [3,5,10,20]
clust_methods = ["dbscan","xi"]
xi_params = [0.01,0.05,0.1,0.5] #float between 0 and 1, default=0.05

In [None]:
labels = OPTICS(min_samples =5,
       metric='euclidean', 
       cluster_method='xi', xi=0.5, 
                min_cluster_size=5).fit_predict(exprs.T)
clusters = labels_to_clsuters(labels,  exprs.columns)
clusters = clusters.loc[clusters["n_samples"]>5,:]
performances, best_matches = calculate_perfromance(clusters, known_groups,all_samples)
performances

# Spectral

In [13]:
from sklearn.cluster import SpectralClustering

assign_labels = ["kmeans", "discretize", "cluster_qr"]
affinity = 'nearest_neighbors' # "rbf"
n_inits = [10]
n_neighbors = [3,5,10,20]
ks = list(range(2,21))
len(ks)*len(assign_labels)*len(n_inits)*len(n_neighbors)

228

In [14]:
stats = {}
i=0
method = "SpectralClustering"
if not os.path.exists(basedir+"/"+method+"/"):
    os.mkdir(basedir+"/"+method+"/")


for k in ks:
    for al in assign_labels:
        for n_init in n_inits:
            for n_neigh in n_neighbors:
                for seed in seeds:
                    params = "k="+str(k)+";n_neighbors="+str(n_neigh)
                    params += ";assign_labels="+str(al)+";n_inits="+str(n_init)
                    labels =  SpectralClustering(n_clusters=k,  n_init=n_init, 
                                       affinity='nearest_neighbors', 
                                       n_neighbors=n_neigh, 
                                       assign_labels=al,
                                       random_state=seed).fit_predict(exprs.T)
                    clusters = labels_to_clsuters(labels,  exprs.columns)
                    clusters = clusters.loc[clusters["n_samples"]>=5,:]
                    performances, best_matches = calculate_perfromance(clusters, known_groups,all_samples,
                                                                       performance_measure = "ARI")
                    stats[i] = performances.to_dict()
                    stats[i]["seed"]=seed
                    stats[i]["parameters"]=params
                    print(i,params,seed,stats[i]["PAM50"])
                    # saving clusters 
                    clusters["samples"] = clusters["samples"].apply(lambda row: " ".join(sorted(row)))
                    clusters = clusters.loc[:,["n_samples","samples"]] 
                    clusters.to_csv(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".seed="+str(seed)+".clusters.tsv",sep="\t")
                    i+=1
stats = pd.DataFrame.from_dict(stats).T
# saving performances
stats.to_csv(basedir +"/"+method+"_"+dataset+".tsv",sep="\t")
stats.sort_values(by = "PAM50",ascending=False)



0 k=2;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.0




1 k=2;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.0




2 k=2;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.0




3 k=2;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.0




4 k=2;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.0
5 k=2;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.0
6 k=2;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.0
7 k=2;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.0
8 k=2;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.0
9 k=2;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.0
10 k=2;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.0
11 k=2;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.0
12 k=2;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.0
13 k=2;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.0
14 k=2;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.0
15 k=2;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.39228157850935025
16 k=2;n_neighbors=20;assign_labels=kmeans;n_inits=10 116739 0.39228157850935025
17 k=2;n_neighbors=20;assign_labels=kmeans;n_inits=10 26225 0.39228157850935025
18 k=2;n_neighbors=20;assign_labels=kmeans;n_inits=10 777



20 k=2;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.0




21 k=2;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.0




22 k=2;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.0




23 k=2;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.0




24 k=2;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.0
25 k=2;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.0
26 k=2;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.0
27 k=2;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.0
28 k=2;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.0
29 k=2;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.0
30 k=2;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 -0.007164426699995744
31 k=2;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 -0.007164426699995744
32 k=2;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 -0.007164426699995744
33 k=2;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 -0.007164426699995744
34 k=2;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 -0.007164426699995744
35 k=2;n_neighbors=20;assign_labels=discretize;n_inits=10 670487 0.39849686833617176
36 k=2;n_neighbors=20;assign_labels=discretize;n_inits=10 116739 0.3968728



40 k=2;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.0




41 k=2;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.0




42 k=2;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.0




43 k=2;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.0




44 k=2;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.0
45 k=2;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.0
46 k=2;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.0
47 k=2;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.0
48 k=2;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.0
49 k=2;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.0
50 k=2;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 -0.007164426699995744
51 k=2;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 -0.007164426699995744
52 k=2;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 -0.007164426699995744
53 k=2;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 -0.007164426699995744
54 k=2;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 -0.007164426699995744
55 k=2;n_neighbors=20;assign_labels=cluster_qr;n_inits=10 670487 0.27234386943168654
56 k=2;n_neighbors=20;assign_labels=cluster_qr;n_inits=10 116739 0.2723438



60 k=3;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.3844284224435476




61 k=3;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.3844284224435476




62 k=3;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.3844284224435476




63 k=3;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.3844284224435476




64 k=3;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.3844284224435476
65 k=3;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.3913297381696849
66 k=3;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.3913297381696849
67 k=3;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.3913297381696849
68 k=3;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.3913297381696849
69 k=3;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.3913297381696849
70 k=3;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.3970910331992783
71 k=3;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.3970910331992783
72 k=3;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.3970910331992783
73 k=3;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.3970910331992783
74 k=3;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.3970910331992783
75 k=3;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.4003566439927625
76 k=3;n_neighbors=20;assign_labels=kmeans;n_ini



80 k=3;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.39391541030006244




81 k=3;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.39391541030006244




82 k=3;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.39391541030006244




83 k=3;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.39391541030006244




84 k=3;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.39391541030006244
85 k=3;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.4095265639623821
86 k=3;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.4095265639623821
87 k=3;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.4095265639623821
88 k=3;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.4095265639623821
89 k=3;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.4095265639623821
90 k=3;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.43583816543841924
91 k=3;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.43583816543841924
92 k=3;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.43583816543841924
93 k=3;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.43583816543841924
94 k=3;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.43583816543841924
95 k=3;n_neighbors=20;assign_labels=discretize;n_inits=10 670487 0.43970889009



100 k=3;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.39391541030006244




101 k=3;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.39391541030006244




102 k=3;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.39391541030006244




103 k=3;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.39391541030006244




104 k=3;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.39391541030006244
105 k=3;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.4278242528248099
106 k=3;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.4278242528248099
107 k=3;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.4278242528248099
108 k=3;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.4278242528248099
109 k=3;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.4278242528248099
110 k=3;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.4644643089630927
111 k=3;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.4644643089630927
112 k=3;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.4644643089630927
113 k=3;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.4644643089630927
114 k=3;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.4644643089630927
115 k=3;n_neighbors=20;assign_labels=cluster_qr;n_inits=10 670487 0.4494



120 k=4;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.3871656452433148




121 k=4;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.3871656452433148




122 k=4;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.3871656452433148




123 k=4;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.3871656452433148




124 k=4;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.3871656452433148
125 k=4;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.40026305826686703
126 k=4;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.39931569440444037
127 k=4;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.4005936470628758
128 k=4;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.39931569440444037
129 k=4;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.4005936470628758
130 k=4;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.40688168027026483
131 k=4;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.40688168027026483
132 k=4;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.40688168027026483
133 k=4;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.40688168027026483
134 k=4;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.40688168027026483
135 k=4;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.40221223970232217
136 k=4;n_neighbors=20;assi



140 k=4;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.39901945587638804




141 k=4;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.39901945587638804




142 k=4;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.39901945587638804




143 k=4;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.39956435633767995




144 k=4;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.39901945587638804
145 k=4;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.43772313744085173
146 k=4;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.43772313744085173
147 k=4;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.43772313744085173
148 k=4;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.43772313744085173
149 k=4;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.43772313744085173
150 k=4;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.4470682089149194
151 k=4;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.4470682089149194
152 k=4;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.4470682089149194
153 k=4;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.4470682089149194
154 k=4;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.4470682089149194
155 k=4;n_neighbors=20;assign_labels=discretize;n_inits=10 670487 0



160 k=4;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.3976925838475281




161 k=4;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.3976925838475281




162 k=4;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.3976925838475281




163 k=4;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.3976925838475281




164 k=4;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.3976925838475281
165 k=4;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.46374524486342494
166 k=4;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.46374524486342494
167 k=4;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.46374524486342494
168 k=4;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.46374524486342494
169 k=4;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.46374524486342494
170 k=4;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.4632304859580431
171 k=4;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.4632304859580431
172 k=4;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.4632304859580431
173 k=4;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.4632304859580431
174 k=4;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.4632304859580431
175 k=4;n_neighbors=20;assign_labels=cluster_qr;n_inits=10 670487 0.



180 k=5;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.3822448330605928




181 k=5;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.3822448330605928




182 k=5;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.3822448330605928




183 k=5;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.3822448330605928




184 k=5;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.3822448330605928
185 k=5;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.3945336902006512
186 k=5;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.3945336902006512
187 k=5;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.39475474967490215
188 k=5;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.3945336902006512
189 k=5;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.3945336902006512
190 k=5;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.3891908131725539
191 k=5;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.3891908131725539
192 k=5;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.3891908131725539
193 k=5;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.3891908131725539
194 k=5;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.3891908131725539
195 k=5;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.3556153811446793
196 k=5;n_neighbors=20;assign_label



200 k=5;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.3906056417588117




201 k=5;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.3906056417588117




202 k=5;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.3906056417588117




203 k=5;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.3906056417588117




204 k=5;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.3906056417588117
205 k=5;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.4368791145183558
206 k=5;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.4368791145183558
207 k=5;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.4368791145183558
208 k=5;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.4368791145183558
209 k=5;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.4368791145183558
210 k=5;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.43709047532860656
211 k=5;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.4289409921582689
212 k=5;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.42792160396740214
213 k=5;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.4279134256191306
214 k=5;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.42986184258152693
215 k=5;n_neighbors=20;assign_labels=discretize;n_inits=10 670487 0.42



220 k=5;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.3927786260019673




221 k=5;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.3927786260019673




222 k=5;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.3927786260019673




223 k=5;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.3927786260019673




224 k=5;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.3927786260019673
225 k=5;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.4450855345090282
226 k=5;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.4450855345090282
227 k=5;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.4450855345090282
228 k=5;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.4450855345090282
229 k=5;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.4450855345090282
230 k=5;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.42041263884139696
231 k=5;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.42041263884139696
232 k=5;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.42041263884139696
233 k=5;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.42041263884139696
234 k=5;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.42041263884139696
235 k=5;n_neighbors=20;assign_labels=cluster_qr;n_inits=10 670487 0.



240 k=6;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.3739745363269561




241 k=6;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.3739745363269561




242 k=6;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.3739745363269561




243 k=6;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.3739745363269561




244 k=6;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.3739745363269561
245 k=6;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.38923754913315833
246 k=6;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.39182771469854494
247 k=6;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.38923754913315833
248 k=6;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.38923754913315833
249 k=6;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.38923754913315833
250 k=6;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.38495147007211206
251 k=6;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.38495147007211206
252 k=6;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.38495147007211206
253 k=6;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.38495147007211206
254 k=6;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.38495147007211206
255 k=6;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.35911879101440847
256 k=6;n_neighbors=20;as



260 k=6;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.38667976950827354




261 k=6;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.38667976950827354




262 k=6;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.38667976950827354




263 k=6;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.38667976950827354




264 k=6;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.38667976950827354
265 k=6;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.4307773747469027
266 k=6;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.4307773747469027
267 k=6;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.4307773747469027
268 k=6;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.4307773747469027
269 k=6;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.4307773747469027
270 k=6;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.4244934370855481
271 k=6;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.42978343045820333
272 k=6;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.4244934370855481
273 k=6;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.42978343045820333
274 k=6;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.421519938376556
275 k=6;n_neighbors=20;assign_labels=discretize;n_inits=10 670487 0.393



280 k=6;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.38619443015719124




281 k=6;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.38619443015719124




282 k=6;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.38619443015719124




283 k=6;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.38619443015719124




284 k=6;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.38619443015719124
285 k=6;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.42156781243739744
286 k=6;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.42156781243739744
287 k=6;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.42156781243739744
288 k=6;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.42156781243739744
289 k=6;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.42156781243739744
290 k=6;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.39782648395503073
291 k=6;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.39782648395503073
292 k=6;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.39782648395503073
293 k=6;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.39782648395503073
294 k=6;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.39782648395503073
295 k=6;n_neighbors=20;assign_labels=cluster_qr;n_inits=10 670



300 k=7;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.37656400874035717




301 k=7;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.37656400874035717




302 k=7;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.37656400874035717




303 k=7;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.37656400874035717




304 k=7;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.37656400874035717
305 k=7;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.38675711714093036
306 k=7;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.38675711714093036
307 k=7;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.38675711714093036
308 k=7;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.38675711714093036
309 k=7;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.38675711714093036
310 k=7;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.38262303432930883
311 k=7;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.38262303432930883
312 k=7;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.38262303432930883
313 k=7;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.37779719508532794
314 k=7;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.38262303432930883
315 k=7;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.34648840315988844
316 k=7;n_neighbors=20;a



320 k=7;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.3823873830203569




321 k=7;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.3823873830203569




322 k=7;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.3823873830203569




323 k=7;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.3823873830203569




324 k=7;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.38333273961472847
325 k=7;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.41253394043897046
326 k=7;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.41253394043897046
327 k=7;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.41253394043897046
328 k=7;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.41239894139087524
329 k=7;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.41253394043897046
330 k=7;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.41241659259880864
331 k=7;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.41241659259880864
332 k=7;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.4107300547296455
333 k=7;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.4107300547296455
334 k=7;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.41241659259880864
335 k=7;n_neighbors=20;assign_labels=discretize;n_inits=10 67048



340 k=7;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.38534125534728375




341 k=7;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.38534125534728375




342 k=7;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.38534125534728375




343 k=7;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.38534125534728375




344 k=7;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.38534125534728375
345 k=7;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.39903974998022973
346 k=7;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.39903974998022973
347 k=7;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.39903974998022973
348 k=7;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.39903974998022973
349 k=7;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.39903974998022973
350 k=7;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.3932972800255611
351 k=7;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.3932972800255611
352 k=7;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.3932972800255611
353 k=7;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.3932972800255611
354 k=7;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.3932972800255611
355 k=7;n_neighbors=20;assign_labels=cluster_qr;n_inits=10 670487 0



360 k=8;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.3795880124174044




361 k=8;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.3795880124174044




362 k=8;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.3795880124174044




363 k=8;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.3795880124174044




364 k=8;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.3795880124174044
365 k=8;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.38305448550948806
366 k=8;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.38305448550948806
367 k=8;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.38305448550948806
368 k=8;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.38305448550948806
369 k=8;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.38305448550948806
370 k=8;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.38126906914379227
371 k=8;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.38126906914379227
372 k=8;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.38126906914379227
373 k=8;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.38126906914379227
374 k=8;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.38126906914379227
375 k=8;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.33992068185659474
376 k=8;n_neighbors=20;as



380 k=8;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.3909907656961251




381 k=8;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.3909907656961251




382 k=8;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.3909907656961251




383 k=8;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.3909907656961251




384 k=8;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.3909907656961251
385 k=8;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.41879745219068487
386 k=8;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.41974383466649834
387 k=8;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.41879745219068487
388 k=8;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.41800038680107027
389 k=8;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.41879745219068487
390 k=8;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.3877138237762904
391 k=8;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.3877138237762904
392 k=8;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.3877138237762904
393 k=8;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.3860127563087522
394 k=8;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.3877138237762904
395 k=8;n_neighbors=20;assign_labels=discretize;n_inits=10 670487 0.



400 k=8;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.38833372496264223




401 k=8;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.38833372496264223




402 k=8;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.38833372496264223




403 k=8;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.38833372496264223




404 k=8;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.38833372496264223
405 k=8;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.40810641748310933
406 k=8;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.40810641748310933
407 k=8;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.40810641748310933
408 k=8;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.40810641748310933
409 k=8;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.40810641748310933
410 k=8;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.4180959001454777
411 k=8;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.4180959001454777
412 k=8;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.4180959001454777
413 k=8;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.4180959001454777
414 k=8;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.4180959001454777
415 k=8;n_neighbors=20;assign_labels=cluster_qr;n_inits=10 670487 0



420 k=9;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.35707687280842376




421 k=9;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.35707687280842376




422 k=9;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.35707687280842376




423 k=9;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.35707687280842376




424 k=9;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.35707687280842376
425 k=9;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.3444466252809289
426 k=9;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.34481747312742583
427 k=9;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.34481747312742583
428 k=9;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.3444466252809289
429 k=9;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.3444466252809289
430 k=9;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.3378658648307843
431 k=9;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.337930491115141
432 k=9;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.337930491115141
433 k=9;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.3378658648307843
434 k=9;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.337930491115141
435 k=9;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.32789705944537306
436 k=9;n_neighbors=20;assign_label



440 k=9;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.3675776737759635




441 k=9;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.36930738899256904




442 k=9;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.36930738899256904




443 k=9;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.36930738899256904




444 k=9;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.36930738899256904
445 k=9;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.3716465412133854
446 k=9;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.37153332120449
447 k=9;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.3716465412133854
448 k=9;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.3716084788698474
449 k=9;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.3716465412133854
450 k=9;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.3758365987828688
451 k=9;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.37413438419958345
452 k=9;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.3739381668226359
453 k=9;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.37607845683769886
454 k=9;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.3758365987828688
455 k=9;n_neighbors=20;assign_labels=discretize;n_inits=10 670487 0.3513



460 k=9;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.36843110241186094




461 k=9;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.36843110241186094




462 k=9;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.36843110241186094




463 k=9;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.36843110241186094




464 k=9;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.36843110241186094
465 k=9;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.3837496988959347
466 k=9;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.3837496988959347
467 k=9;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.3837496988959347
468 k=9;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.3837496988959347
469 k=9;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.3837496988959347
470 k=9;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.37662160179392856
471 k=9;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.37662160179392856
472 k=9;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.37662160179392856
473 k=9;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.37662160179392856
474 k=9;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.37662160179392856
475 k=9;n_neighbors=20;assign_labels=cluster_qr;n_inits=10 670487 0



480 k=10;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.3713352827542305




481 k=10;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.37217479377954277




482 k=10;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.3715993997071458




483 k=10;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.3724905062066809




484 k=10;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.3724905062066809
485 k=10;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.3432034634598643
486 k=10;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.347269578476868
487 k=10;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.3432034634598643
488 k=10;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.3432034634598643
489 k=10;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.3432034634598643
490 k=10;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.3358315875395189
491 k=10;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.3358315875395189
492 k=10;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.3358315875395189
493 k=10;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.3358315875395189
494 k=10;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.3358315875395189
495 k=10;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.3558154530668657
496 k=10;n_neighbors=20;a



500 k=10;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.3726280635648733




501 k=10;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.3726280635648733




502 k=10;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.3726280635648733




503 k=10;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.3726280635648733




504 k=10;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.3726280635648733
505 k=10;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.3848821902929325
506 k=10;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.3848821902929325
507 k=10;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.3851328970007198
508 k=10;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.3845921808004783
509 k=10;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.3851328970007198
510 k=10;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.37320408614628037
511 k=10;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.37264219784096153
512 k=10;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.37320408614628037
513 k=10;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.37207844692306075
514 k=10;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.37207844692306075
515 k=10;n_neighbors=20;assign_labels=discretize;n_inits=



520 k=10;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.37113753663757587




521 k=10;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.37113753663757587




522 k=10;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.37113753663757587




523 k=10;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.37113753663757587




524 k=10;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.37113753663757587
525 k=10;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.39401296126343527
526 k=10;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.39401296126343527
527 k=10;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.39401296126343527
528 k=10;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.39401296126343527
529 k=10;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.39401296126343527
530 k=10;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.36510601967849987
531 k=10;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.36510601967849987
532 k=10;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.36510601967849987
533 k=10;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.36510601967849987
534 k=10;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.36510601967849987
535 k=10;n_neighbors=20;assign_labels=cluster_qr;n_



540 k=11;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.4107784748019416




541 k=11;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.4104206248627541




542 k=11;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.4103343500108192




543 k=11;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.4103343500108192




544 k=11;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.4098939979478651
545 k=11;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.34279204849839584
546 k=11;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.34279204849839584
547 k=11;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.34279204849839584
548 k=11;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.34279204849839584
549 k=11;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.34279204849839584
550 k=11;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.34313139967935313
551 k=11;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.34313139967935313
552 k=11;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.34313139967935313
553 k=11;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.34313139967935313
554 k=11;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.34313139967935313
555 k=11;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.3418510499055848
556 k=11;n_nei



560 k=11;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.40025324646271954




561 k=11;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.40025324646271954




562 k=11;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.40025324646271954




563 k=11;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.40025324646271954




564 k=11;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.40025324646271954
565 k=11;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.3825191625671155
566 k=11;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.3825191625671155
567 k=11;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.3825191625671155
568 k=11;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.3825191625671155
569 k=11;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.3825191625671155
570 k=11;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.3543035785638328
571 k=11;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.35404363056824417
572 k=11;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.3541183739597185
573 k=11;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.3540020782310034
574 k=11;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.3541183739597185
575 k=11;n_neighbors=20;assign_labels=discretize;n_inits=10 



580 k=11;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.4008442894398293




581 k=11;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.4008442894398293




582 k=11;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.4008442894398293




583 k=11;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.4008442894398293




584 k=11;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.4008442894398293
585 k=11;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.38306801621867853
586 k=11;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.38306801621867853
587 k=11;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.38306801621867853
588 k=11;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.38306801621867853
589 k=11;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.38306801621867853
590 k=11;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.347468730686897
591 k=11;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.347468730686897
592 k=11;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.347468730686897
593 k=11;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.347468730686897
594 k=11;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.347468730686897
595 k=11;n_neighbors=20;assign_labels=cluster_qr;n_inits=10 67



600 k=12;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.4087981459986585




601 k=12;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.4087981459986585




602 k=12;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.4087503924890753




603 k=12;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.4087981459986585




604 k=12;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.4092533838069673
605 k=12;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.34704025080538614
606 k=12;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.3454921671410264
607 k=12;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.3456280312633661
608 k=12;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.34691083945456924
609 k=12;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.34704025080538614
610 k=12;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.3625753438625204
611 k=12;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.3623785347733244
612 k=12;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.3623785347733244
613 k=12;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.36149256145304515
614 k=12;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.3616893705422412
615 k=12;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.3512690912654089
616 k=12;n_neighbors



620 k=12;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.399602282299069




621 k=12;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.39962896398212777




622 k=12;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.4018210256595861




623 k=12;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.39938718173583776




624 k=12;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.4003716215717697
625 k=12;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.36037509909023013
626 k=12;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.38033028775672373
627 k=12;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.34159109662854165
628 k=12;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.3453946352564467
629 k=12;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.3453946352564467
630 k=12;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.35506882540016316
631 k=12;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.35506882540016316
632 k=12;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.35506882540016316
633 k=12;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.35506882540016316
634 k=12;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.35506882540016316
635 k=12;n_neighbors=20;assign_labels=discretize;n_ini



640 k=12;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.3978802573181198




641 k=12;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.3978802573181198




642 k=12;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.3978802573181198




643 k=12;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.3978802573181198




644 k=12;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.3978802573181198
645 k=12;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.3558464352338001
646 k=12;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.3558464352338001
647 k=12;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.3558464352338001
648 k=12;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.3558464352338001
649 k=12;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.3558464352338001
650 k=12;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.35521511370133363
651 k=12;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.35521511370133363
652 k=12;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.35521511370133363
653 k=12;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.35521511370133363
654 k=12;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.35521511370133363
655 k=12;n_neighbors=20;assign_labels=cluster_qr;n_inits=



660 k=13;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.4078129012468532




661 k=13;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.4047006974285431




662 k=13;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.4096499026292587




663 k=13;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.4067906428686259




664 k=13;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.405148171285456
665 k=13;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.36981034748808417
666 k=13;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.36981034748808417
667 k=13;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.3714685213683686
668 k=13;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.36981034748808417
669 k=13;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.36981034748808417
670 k=13;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.343079636082509
671 k=13;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.36011062049348147
672 k=13;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.36011062049348147
673 k=13;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.35843516127370667
674 k=13;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.36011062049348147
675 k=13;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.3191335373183965
676 k=13;n_neighbo



680 k=13;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.39891307247814856




681 k=13;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.39891307247814856




682 k=13;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.39891307247814856




683 k=13;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.39891307247814856




684 k=13;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.39891307247814856
685 k=13;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.35796876840175995
686 k=13;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.357026440055772
687 k=13;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.3572173794827791
688 k=13;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.35850626669337327
689 k=13;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.35964222609956353
690 k=13;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.34949142468824684
691 k=13;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.34949142468824684
692 k=13;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.3494459621652317
693 k=13;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.34949142468824684
694 k=13;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.349537369000631
695 k=13;n_neighbors=20;assign_labels=discretize;n_inits=



700 k=13;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.399903892284595




701 k=13;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.399903892284595




702 k=13;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.399903892284595




703 k=13;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.399903892284595




704 k=13;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.399903892284595
705 k=13;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.36680543385842224
706 k=13;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.36680543385842224
707 k=13;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.36680543385842224
708 k=13;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.36680543385842224
709 k=13;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.36680543385842224
710 k=13;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.33342000112771664
711 k=13;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.33342000112771664
712 k=13;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.33342000112771664
713 k=13;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.33342000112771664
714 k=13;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.33342000112771664
715 k=13;n_neighbors=20;assign_labels=cluster_qr;n_in



720 k=14;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.39945836548637714




721 k=14;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.3999336965051916




722 k=14;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.39937622810327955




723 k=14;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.3999983443249824




724 k=14;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.40002835083139054
725 k=14;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.36725331693666385
726 k=14;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.3667414210408912
727 k=14;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.3668210640872314
728 k=14;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.3667414210408912
729 k=14;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.3668210640872314
730 k=14;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.3612731254489999
731 k=14;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.36092910055513006
732 k=14;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.3592788706783316
733 k=14;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.3592788706783316
734 k=14;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.36155979675301286
735 k=14;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.3515466819790989
736 k=14;n_neighbors



740 k=14;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.3978290161188319




741 k=14;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.39757289702927906




742 k=14;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.3982678235588484




743 k=14;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.3940434907504289




744 k=14;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.39734920535063567
745 k=14;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.35991335617693
746 k=14;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.3586365296778165
747 k=14;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.358279097231621
748 k=14;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.3586365296778165
749 k=14;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.3586755662775395
750 k=14;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.3506912477217704
751 k=14;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.34801502999297856
752 k=14;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.34880342996160696
753 k=14;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.3501165575802363
754 k=14;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.3506912477217704
755 k=14;n_neighbors=20;assign_labels=discretize;n_inits=10 67



760 k=14;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.3919817518342096




761 k=14;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.3919817518342096




762 k=14;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.3919817518342096




763 k=14;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.3919817518342096




764 k=14;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.3919817518342096
765 k=14;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.3496657371596198
766 k=14;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.3496657371596198
767 k=14;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.3496657371596198
768 k=14;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.3496657371596198
769 k=14;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.3496657371596198
770 k=14;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.35212371616443006
771 k=14;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.35212371616443006
772 k=14;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.35212371616443006
773 k=14;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.35212371616443006
774 k=14;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.35212371616443006
775 k=14;n_neighbors=20;assign_labels=cluster_qr;n_inits=



780 k=15;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.3963487171108495




781 k=15;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.40035106461559467




782 k=15;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.3963872297653957




783 k=15;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.39617823333864344




784 k=15;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.39617823333864344
785 k=15;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.368736656972833
786 k=15;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.3670606620246164
787 k=15;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.36911567958337466
788 k=15;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.3673800626997122
789 k=15;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.3688923545984344
790 k=15;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.3529959968220442
791 k=15;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.3584651273414252
792 k=15;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.3519447027992549
793 k=15;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.3550093140649391
794 k=15;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.3586344863173744
795 k=15;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.335116323670575
796 k=15;n_neighbors=20;



800 k=15;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.3978577451505595




801 k=15;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.39525720804082654




802 k=15;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.3976018375726378




803 k=15;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.3976018375726378




804 k=15;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.3974052617117054
805 k=15;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.35642643434608057
806 k=15;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.35900901359172394
807 k=15;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.3594583897753353
808 k=15;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.36001904313467137
809 k=15;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.35471317683269366
810 k=15;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.3430281938602542
811 k=15;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.34465304805636854
812 k=15;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.34597748324721317
813 k=15;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.34234942744272834
814 k=15;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.341595906631139
815 k=15;n_neighbors=20;assign_labels=discretize;n_inits



820 k=15;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.38666332811236415




821 k=15;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.38666332811236415




822 k=15;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.38666332811236415




823 k=15;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.38666332811236415




824 k=15;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.38666332811236415
825 k=15;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.36539926376556375
826 k=15;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.36539926376556375
827 k=15;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.36539926376556375
828 k=15;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.36539926376556375
829 k=15;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.36539926376556375
830 k=15;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.3507465090310078
831 k=15;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.3507465090310078
832 k=15;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.3507465090310078
833 k=15;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.3507465090310078
834 k=15;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.3507465090310078
835 k=15;n_neighbors=20;assign_labels=cluster_qr;n_inits



840 k=16;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.4041782288998466




841 k=16;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.40430360375792596




842 k=16;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.40123665452142865




843 k=16;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.40403430088362957




844 k=16;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.4046563211424271
845 k=16;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.361629081360185
846 k=16;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.36687294723281105
847 k=16;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.3611689867442038
848 k=16;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.361629081360185
849 k=16;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.3620604356583489
850 k=16;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.3680881962065822
851 k=16;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.3690850299427625
852 k=16;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.36929248278134186
853 k=16;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.3694890460790259
854 k=16;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.36867285449677156
855 k=16;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.33815507084710983
856 k=16;n_neighbors=2



860 k=16;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.39126279973100503




861 k=16;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.39126279973100503




862 k=16;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.39006766984616204




863 k=16;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.3615228455484704




864 k=16;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.39126279973100503
865 k=16;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.35350846757149035
866 k=16;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.35404493292348316
867 k=16;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.35388698726409323
868 k=16;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.3525584223170172
869 k=16;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.35486570126024586
870 k=16;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.3423130542160647
871 k=16;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.3460790233797645
872 k=16;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.3397748821389259
873 k=16;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.34380377468892
874 k=16;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.33888127071794893
875 k=16;n_neighbors=20;assign_labels=discretize;n_inits=1



880 k=16;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.3868773688338343




881 k=16;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.3868773688338343




882 k=16;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.3868773688338343




883 k=16;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.3868773688338343




884 k=16;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.3868773688338343
885 k=16;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.36569671362556705
886 k=16;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.36569671362556705
887 k=16;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.36569671362556705
888 k=16;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.36569671362556705
889 k=16;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.36569671362556705
890 k=16;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.35459711510331027
891 k=16;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.35459711510331027
892 k=16;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.35459711510331027
893 k=16;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.35459711510331027
894 k=16;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.35459711510331027
895 k=16;n_neighbors=20;assign_labels=cluster_qr;n_i



900 k=17;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.38053187966167323




901 k=17;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.3802835474499872




902 k=17;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.3771912954956726




903 k=17;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.37552074175301575




904 k=17;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.3752724095413297
905 k=17;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.3669670653243893
906 k=17;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.35889300224828424
907 k=17;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.36717846357816597
908 k=17;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.35946323587855955
909 k=17;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.35965788601068344
910 k=17;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.35804381644750727
911 k=17;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.36395078150397003
912 k=17;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.36395078150397003
913 k=17;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.36345719164228174
914 k=17;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.36395078150397003
915 k=17;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.34143624831015373
916 k=17;n_nei



920 k=17;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.3641786671337951




921 k=17;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.35760770000202224




922 k=17;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.3632982425999929




923 k=17;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.3622047911501296




924 k=17;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.36274665669230544
925 k=17;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.35464243452807864
926 k=17;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.34860915195462583
927 k=17;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.3528387204778064
928 k=17;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.354991408276001
929 k=17;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.3562297987024152
930 k=17;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.3508938883166717
931 k=17;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.3516626680162682
932 k=17;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.34790481003330254
933 k=17;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.3436320585739327
934 k=17;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.3438488329644967
935 k=17;n_neighbors=20;assign_labels=discretize;n_inits=10



940 k=17;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.35764697936368095




941 k=17;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.35764697936368095




942 k=17;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.35764697936368095




943 k=17;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.35764697936368095




944 k=17;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.35764697936368095
945 k=17;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.3394943648299149
946 k=17;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.3394943648299149
947 k=17;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.3394943648299149
948 k=17;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.3394943648299149
949 k=17;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.3394943648299149
950 k=17;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.3525440416968393
951 k=17;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.3525440416968393
952 k=17;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.3525440416968393
953 k=17;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.3525440416968393
954 k=17;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.3525440416968393
955 k=17;n_neighbors=20;assign_labels=cluster_qr;n_inits=10 6



960 k=18;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.3766571876762742




961 k=18;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.3766571876762742




962 k=18;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.37620771879700876




963 k=18;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.37698023708407535




964 k=18;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.37620771879700876
965 k=18;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.375382916413433
966 k=18;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.3720808480394248
967 k=18;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.3684157210442395
968 k=18;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.3673042997113628
969 k=18;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.37272618041400624
970 k=18;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.37351140740565447
971 k=18;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.3725419651991978
972 k=18;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.3761140594504825
973 k=18;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.3741091782178514
974 k=18;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.37496277615435863
975 k=18;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.3425974985887415
976 k=18;n_neighbors=



980 k=18;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.34896253295473534




981 k=18;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.34896253295473534




982 k=18;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.34896253295473534




983 k=18;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.3488013391312717




984 k=18;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.34896253295473534
985 k=18;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.358136539929619
986 k=18;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.3502307566116811
987 k=18;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.3498036384402149
988 k=18;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.35089130388585343
989 k=18;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.35338548744366793
990 k=18;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.3679299388024241
991 k=18;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.36250938607046723
992 k=18;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.3629800536248418
993 k=18;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.36188955787663
994 k=18;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.36138796309094723
995 k=18;n_neighbors=20;assign_labels=discretize;n_inits=10 



1000 k=18;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.34585886488838513




1001 k=18;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.34585886488838513




1002 k=18;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.34585886488838513




1003 k=18;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.34585886488838513




1004 k=18;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.34585886488838513
1005 k=18;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.33491899286741417
1006 k=18;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.33491899286741417
1007 k=18;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.33491899286741417
1008 k=18;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.33491899286741417
1009 k=18;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.33491899286741417
1010 k=18;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.3466779022690756
1011 k=18;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.3466779022690756
1012 k=18;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.3466779022690756
1013 k=18;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.3466779022690756
1014 k=18;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.3466779022690756
1015 k=18;n_neighbors=20;assign_labels=cluste



1020 k=19;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.37641252146577137




1021 k=19;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.376700162139354




1022 k=19;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.37738847338222575




1023 k=19;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.3762546632858823




1024 k=19;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.37776638246147054
1025 k=19;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.36720703012782596
1026 k=19;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.3615866804727357
1027 k=19;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.36514973687393176
1028 k=19;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.3616489411853084
1029 k=19;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.36564281895985534
1030 k=19;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.36832794291190185
1031 k=19;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.365035869229856
1032 k=19;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.370910302854972
1033 k=19;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.370149009750908
1034 k=19;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.369656741460752
1035 k=19;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.32926202810306615
1036 k=19;



1040 k=19;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.345543415399029




1041 k=19;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.3472176141616059




1042 k=19;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.34699383539355083




1043 k=19;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.3472176141616059




1044 k=19;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.345543415399029
1045 k=19;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.3598547045488052
1046 k=19;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.36341890346733274
1047 k=19;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.35941720555336865
1048 k=19;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.3536589845501067
1049 k=19;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.36277365044880294
1050 k=19;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.36167366250969174
1051 k=19;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.3631373424406096
1052 k=19;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.3570672780255387
1053 k=19;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.36329096903204916
1054 k=19;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.3597036263522706
1055 k=19;n_neighbors=20;assign_labels=discreti



1060 k=19;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.3466069580263652




1061 k=19;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.3466069580263652




1062 k=19;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.3466069580263652




1063 k=19;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.3466069580263652




1064 k=19;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.3466069580263652
1065 k=19;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.3208634326315313
1066 k=19;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.3208634326315313
1067 k=19;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.3208634326315313
1068 k=19;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.3208634326315313
1069 k=19;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.3208634326315313
1070 k=19;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.34959948150576947
1071 k=19;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.34959948150576947
1072 k=19;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.34959948150576947
1073 k=19;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.34959948150576947
1074 k=19;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.34959948150576947
1075 k=19;n_neighbors=20;assign_labels=cluster



1080 k=20;n_neighbors=3;assign_labels=kmeans;n_inits=10 670487 0.37850550473820666




1081 k=20;n_neighbors=3;assign_labels=kmeans;n_inits=10 116739 0.38182923821181347




1082 k=20;n_neighbors=3;assign_labels=kmeans;n_inits=10 26225 0.37941748061276226




1083 k=20;n_neighbors=3;assign_labels=kmeans;n_inits=10 777572 0.3790271851600793




1084 k=20;n_neighbors=3;assign_labels=kmeans;n_inits=10 288389 0.3789529986969468
1085 k=20;n_neighbors=5;assign_labels=kmeans;n_inits=10 670487 0.35389474886330374
1086 k=20;n_neighbors=5;assign_labels=kmeans;n_inits=10 116739 0.3545020981163718
1087 k=20;n_neighbors=5;assign_labels=kmeans;n_inits=10 26225 0.35396922715144674
1088 k=20;n_neighbors=5;assign_labels=kmeans;n_inits=10 777572 0.3620860045283105
1089 k=20;n_neighbors=5;assign_labels=kmeans;n_inits=10 288389 0.35446258167886335
1090 k=20;n_neighbors=10;assign_labels=kmeans;n_inits=10 670487 0.34458463397589323
1091 k=20;n_neighbors=10;assign_labels=kmeans;n_inits=10 116739 0.28519245405562477
1092 k=20;n_neighbors=10;assign_labels=kmeans;n_inits=10 26225 0.34830111286281307
1093 k=20;n_neighbors=10;assign_labels=kmeans;n_inits=10 777572 0.34479235682120357
1094 k=20;n_neighbors=10;assign_labels=kmeans;n_inits=10 288389 0.342899743608706
1095 k=20;n_neighbors=20;assign_labels=kmeans;n_inits=10 670487 0.32843872607985747
1096 



1100 k=20;n_neighbors=3;assign_labels=discretize;n_inits=10 670487 0.3443409329115577




1101 k=20;n_neighbors=3;assign_labels=discretize;n_inits=10 116739 0.3443409329115577




1102 k=20;n_neighbors=3;assign_labels=discretize;n_inits=10 26225 0.3464814423679996




1103 k=20;n_neighbors=3;assign_labels=discretize;n_inits=10 777572 0.3443409329115577




1104 k=20;n_neighbors=3;assign_labels=discretize;n_inits=10 288389 0.3464814423679996
1105 k=20;n_neighbors=5;assign_labels=discretize;n_inits=10 670487 0.34961543573133724
1106 k=20;n_neighbors=5;assign_labels=discretize;n_inits=10 116739 0.3499451563508674
1107 k=20;n_neighbors=5;assign_labels=discretize;n_inits=10 26225 0.34892579481427566
1108 k=20;n_neighbors=5;assign_labels=discretize;n_inits=10 777572 0.3238276575211048
1109 k=20;n_neighbors=5;assign_labels=discretize;n_inits=10 288389 0.34751903477469104
1110 k=20;n_neighbors=10;assign_labels=discretize;n_inits=10 670487 0.3425661716365991
1111 k=20;n_neighbors=10;assign_labels=discretize;n_inits=10 116739 0.34801630775128667
1112 k=20;n_neighbors=10;assign_labels=discretize;n_inits=10 26225 0.34105821861536023
1113 k=20;n_neighbors=10;assign_labels=discretize;n_inits=10 777572 0.30904857329794455
1114 k=20;n_neighbors=10;assign_labels=discretize;n_inits=10 288389 0.3470759747303539
1115 k=20;n_neighbors=20;assign_labels=discre



1120 k=20;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 670487 0.3480500215836192




1121 k=20;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 116739 0.3480500215836192




1122 k=20;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 26225 0.3480500215836192




1123 k=20;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 777572 0.3480500215836192




1124 k=20;n_neighbors=3;assign_labels=cluster_qr;n_inits=10 288389 0.3480500215836192
1125 k=20;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 670487 0.22984911401384978
1126 k=20;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 116739 0.22984911401384978
1127 k=20;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 26225 0.22984911401384978
1128 k=20;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 777572 0.22984911401384978
1129 k=20;n_neighbors=5;assign_labels=cluster_qr;n_inits=10 288389 0.22984911401384978
1130 k=20;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 670487 0.25073298821426937
1131 k=20;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 116739 0.25073298821426937
1132 k=20;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 26225 0.25073298821426937
1133 k=20;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 777572 0.25073298821426937
1134 k=20;n_neighbors=10;assign_labels=cluster_qr;n_inits=10 288389 0.25073298821426937
1135 k=20;n_neighbors=20;assign_labels=cl

Unnamed: 0,PAM50,Intrinsic,PAM50_AB,SCMOD2,IHC,Luminal,Basal,Her2,LumA,LumB,Normal,Claudin-low,IHC_HER2,IHC_ER,IHC_PR,IHC_TNBC,NET_kmeans,NET_ward,seed,parameters
114,0.464464,0.450479,0.082747,0.118875,0.364162,0.546893,0.421379,0.13564,-0.023273,0.041879,0.0659,0.140856,0.066582,0.534716,0.164248,0.441435,-0.036821,-0.02962,288389,k=3;n_neighbors=10;assign_labels=cluster_qr;n_...
110,0.464464,0.450479,0.082747,0.118875,0.364162,0.546893,0.421379,0.13564,-0.023273,0.041879,0.0659,0.140856,0.066582,0.534716,0.164248,0.441435,-0.036821,-0.02962,670487,k=3;n_neighbors=10;assign_labels=cluster_qr;n_...
111,0.464464,0.450479,0.082747,0.118875,0.364162,0.546893,0.421379,0.13564,-0.023273,0.041879,0.0659,0.140856,0.066582,0.534716,0.164248,0.441435,-0.036821,-0.02962,116739,k=3;n_neighbors=10;assign_labels=cluster_qr;n_...
112,0.464464,0.450479,0.082747,0.118875,0.364162,0.546893,0.421379,0.13564,-0.023273,0.041879,0.0659,0.140856,0.066582,0.534716,0.164248,0.441435,-0.036821,-0.02962,26225,k=3;n_neighbors=10;assign_labels=cluster_qr;n_...
113,0.464464,0.450479,0.082747,0.118875,0.364162,0.546893,0.421379,0.13564,-0.023273,0.041879,0.0659,0.140856,0.066582,0.534716,0.164248,0.441435,-0.036821,-0.02962,777572,k=3;n_neighbors=10;assign_labels=cluster_qr;n_...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50,-0.007164,-0.006855,0.000665,0.0,0.0,-0.010015,0.0,0.0,-0.009193,0.008852,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,670487,k=2;n_neighbors=10;assign_labels=cluster_qr;n_...
51,-0.007164,-0.006855,0.000665,0.0,0.0,-0.010015,0.0,0.0,-0.009193,0.008852,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,116739,k=2;n_neighbors=10;assign_labels=cluster_qr;n_...
52,-0.007164,-0.006855,0.000665,0.0,0.0,-0.010015,0.0,0.0,-0.009193,0.008852,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26225,k=2;n_neighbors=10;assign_labels=cluster_qr;n_...
53,-0.007164,-0.006855,0.000665,0.0,0.0,-0.010015,0.0,0.0,-0.009193,0.008852,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,777572,k=2;n_neighbors=10;assign_labels=cluster_qr;n_...


# BIRCH


In [12]:
from sklearn.cluster import Birch
ks = list(range(2,21))
branching_factors= [10,50,100] 
thresholds =[0.1,0.5,0.95]
len(ks)*len(branching_factors)*len(thresholds)

171

In [13]:
stats = {}
i=0
method = "BIRCH"

for k in ks:
    for bf in branching_factors:
        for t in thresholds:
            params = "k="+str(k)+";branching_factor="+str(bf)+";threshold="+str(t)
            if os.path.exists(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".clusters.tsv"):
                clusters = pd.read_csv(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".clusters.tsv",sep="\t",index_col=0)
                clusters["samples"] = clusters["samples"].apply(lambda row: set(row.split(" ")))
            else:
                labels = Birch(n_clusters=k, branching_factor=bf, threshold=t).fit_predict(exprs.T)
                clusters = labels_to_clsuters(labels,  exprs.columns)
            performances, best_matches = calculate_perfromance(clusters, known_groups,all_samples,
                                                              performance_measure = "ARI")
            stats[i] = performances.to_dict()
            stats[i]["parameters"]=params
            print(i,params,stats[i]["PAM50"])
            # saving clusters 
            clusters["samples"] = clusters["samples"].apply(lambda row: " ".join(sorted(row)))
            clusters = clusters.loc[:,["n_samples","samples"]] 
            clusters.to_csv(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".clusters.tsv",sep="\t")
            i+=1
stats = pd.DataFrame.from_dict(stats).T
# saving performances
stats.to_csv(basedir +"/"+method+"_"+dataset+".tsv",sep="\t")
stats.sort_values(by = "PAM50",ascending=False)

0 k=2;branching_factor=10;threshold=0.1 0.008537957262166858
1 k=2;branching_factor=10;threshold=0.5 0.008537957262166858
2 k=2;branching_factor=10;threshold=0.95 0.008537957262166858
3 k=2;branching_factor=50;threshold=0.1 0.008537957262166858
4 k=2;branching_factor=50;threshold=0.5 0.008537957262166858
5 k=2;branching_factor=50;threshold=0.95 0.008537957262166858
6 k=2;branching_factor=100;threshold=0.1 0.008537957262166858
7 k=2;branching_factor=100;threshold=0.5 0.008537957262166858
8 k=2;branching_factor=100;threshold=0.95 0.008537957262166858
9 k=3;branching_factor=10;threshold=0.1 0.48119671974942824
10 k=3;branching_factor=10;threshold=0.5 0.48119671974942824
11 k=3;branching_factor=10;threshold=0.95 0.48119671974942824
12 k=3;branching_factor=50;threshold=0.1 0.48119671974942824
13 k=3;branching_factor=50;threshold=0.5 0.48119671974942824
14 k=3;branching_factor=50;threshold=0.95 0.48119671974942824
15 k=3;branching_factor=100;threshold=0.1 0.48119671974942824
16 k=3;branching

133 k=16;branching_factor=100;threshold=0.5 0.3123305764269823
134 k=16;branching_factor=100;threshold=0.95 0.3123305764269823
135 k=17;branching_factor=10;threshold=0.1 0.3123305764269823
136 k=17;branching_factor=10;threshold=0.5 0.3123305764269823
137 k=17;branching_factor=10;threshold=0.95 0.3123305764269823
138 k=17;branching_factor=50;threshold=0.1 0.3123305764269823
139 k=17;branching_factor=50;threshold=0.5 0.3123305764269823
140 k=17;branching_factor=50;threshold=0.95 0.3123305764269823
141 k=17;branching_factor=100;threshold=0.1 0.3123305764269823
142 k=17;branching_factor=100;threshold=0.5 0.3123305764269823
143 k=17;branching_factor=100;threshold=0.95 0.3123305764269823
144 k=18;branching_factor=10;threshold=0.1 0.3123305764269823
145 k=18;branching_factor=10;threshold=0.5 0.3123305764269823
146 k=18;branching_factor=10;threshold=0.95 0.3123305764269823
147 k=18;branching_factor=50;threshold=0.1 0.3123305764269823
148 k=18;branching_factor=50;threshold=0.5 0.312330576426982

Unnamed: 0,PAM50,Intrinsic,PAM50_AB,SCMOD2,IHC,Luminal,Basal,Her2,LumA,LumB,Normal,Claudin-low,IHC_HER2,IHC_ER,IHC_PR,IHC_TNBC,NET_kmeans,NET_ward,parameters
38,0.482589,0.464339,0.181933,0.198069,0.403205,0.542279,0.538211,0.189716,0.106853,0.13382,0.049074,0.06031,0.0924,0.595563,0.15715,0.540227,0.13158,0.095657,k=6;branching_factor=10;threshold=0.95
36,0.482589,0.464339,0.181933,0.198069,0.403205,0.542279,0.538211,0.189716,0.106853,0.13382,0.049074,0.06031,0.0924,0.595563,0.15715,0.540227,0.13158,0.095657,k=6;branching_factor=10;threshold=0.1
29,0.482589,0.464339,0.176157,0.19207,0.403205,0.542279,0.538211,0.189716,0.106853,0.119424,0.049074,0.06031,0.0924,0.595563,0.15715,0.540227,0.015075,0.010863,k=5;branching_factor=10;threshold=0.95
31,0.482589,0.464339,0.176157,0.19207,0.403205,0.542279,0.538211,0.189716,0.106853,0.119424,0.049074,0.06031,0.0924,0.595563,0.15715,0.540227,0.015075,0.010863,k=5;branching_factor=50;threshold=0.5
32,0.482589,0.464339,0.176157,0.19207,0.403205,0.542279,0.538211,0.189716,0.106853,0.119424,0.049074,0.06031,0.0924,0.595563,0.15715,0.540227,0.015075,0.010863,k=5;branching_factor=50;threshold=0.95
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,0.008538,0.008169,0.008538,0.0,0.0,0.0,0.0,0.066624,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,k=2;branching_factor=10;threshold=0.5
6,0.008538,0.008169,0.008538,0.0,0.0,0.0,0.0,0.066624,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,k=2;branching_factor=100;threshold=0.1
7,0.008538,0.008169,0.008538,0.0,0.0,0.0,0.0,0.066624,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,k=2;branching_factor=100;threshold=0.5
8,0.008538,0.008169,0.008538,0.0,0.0,0.0,0.0,0.066624,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,k=2;branching_factor=100;threshold=0.95


# MeanShift

In [25]:
from sklearn.cluster import MeanShift

bandwidth=None
max_iters=[300,1000]
cluster_all= [True, False]
min_bin_freq = [1,2,5,10]
len(max_iters)*len(cluster_all)*len(min_bin_freq)

16

In [26]:
stats = {}
i=0
method = "MeanShift"

if not os.path.exists(basedir+"/"+method+"/"):
    os.mkdir(basedir+"/"+method+"/")

for max_iter in max_iters:
    for ca in cluster_all:
        for bf in min_bin_freq:
            params = "max_iters="+str(max_iter)+";cluster_all="+str(ca)+";min_bin_freq="+str(bf)
            if os.path.exists(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".clusters.tsv"):
                clusters = pd.read_csv(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".clusters.tsv",sep="\t",index_col=0)
                clusters["samples"] = clusters["samples"].apply(lambda row: set(row.split(" ")))
            else:
                labels = MeanShift(min_bin_freq= bf, cluster_all=ca, max_iter=max_iter).fit_predict(exprs.T)
                clusters = labels_to_clsuters(labels,  exprs.columns)
            performances, best_matches = calculate_perfromance(clusters, known_groups,all_samples,
                                                              performance_measure = "ARI")
            stats[i] = performances.to_dict()
            stats[i]["parameters"]=params
            print(i,params,stats[i]["PAM50"])
            # saving clusters 
            clusters["samples"] = clusters["samples"].apply(lambda row: " ".join(sorted(row)))
            clusters = clusters.loc[:,["n_samples","samples"]] 
            clusters.to_csv(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".clusters.tsv",sep="\t")
            i+=1
stats = pd.DataFrame.from_dict(stats).T
# saving performances
stats.to_csv(basedir +"/"+method+"_"+dataset+".tsv",sep="\t")
stats.sort_values(by = "PAM50",ascending=False)

0 max_iters=300;cluster_all=True;min_bin_freq=1 0.0518907757525554
1 max_iters=300;cluster_all=True;min_bin_freq=2 0.0518907757525554
2 max_iters=300;cluster_all=True;min_bin_freq=5 0.0518907757525554
3 max_iters=300;cluster_all=True;min_bin_freq=10 0.0518907757525554
4 max_iters=300;cluster_all=False;min_bin_freq=1 0.049857047673299876
5 max_iters=300;cluster_all=False;min_bin_freq=2 0.049857047673299876
6 max_iters=300;cluster_all=False;min_bin_freq=5 0.049857047673299876
7 max_iters=300;cluster_all=False;min_bin_freq=10 0.049857047673299876
8 max_iters=1000;cluster_all=True;min_bin_freq=1 0.0518907757525554
9 max_iters=1000;cluster_all=True;min_bin_freq=2 0.0518907757525554
10 max_iters=1000;cluster_all=True;min_bin_freq=5 0.0518907757525554
11 max_iters=1000;cluster_all=True;min_bin_freq=10 0.0518907757525554
12 max_iters=1000;cluster_all=False;min_bin_freq=1 0.049857047673299876
13 max_iters=1000;cluster_all=False;min_bin_freq=2 0.049857047673299876
14 max_iters=1000;cluster_all=F

Unnamed: 0,PAM50,Intrinsic,PAM50_AB,SCMOD2,IHC,Luminal,Basal,Her2,LumA,LumB,Normal,Claudin-low,IHC_HER2,IHC_ER,IHC_PR,IHC_TNBC,NET_kmeans,NET_ward,parameters
0,0.051891,0.049648,0.011445,0.008239,0.045074,0.049137,0.106493,0.024146,-0.024279,0.005803,0.0,0.0,0.0,0.067079,0.004907,0.108822,0.0,0.0,max_iters=300;cluster_all=True;min_bin_freq=1
1,0.051891,0.049648,0.011445,0.008239,0.045074,0.049137,0.106493,0.024146,-0.024279,0.005803,0.0,0.0,0.0,0.067079,0.004907,0.108822,0.0,0.0,max_iters=300;cluster_all=True;min_bin_freq=2
2,0.051891,0.049648,0.011445,0.008239,0.045074,0.049137,0.106493,0.024146,-0.024279,0.005803,0.0,0.0,0.0,0.067079,0.004907,0.108822,0.0,0.0,max_iters=300;cluster_all=True;min_bin_freq=5
3,0.051891,0.049648,0.011445,0.008239,0.045074,0.049137,0.106493,0.024146,-0.024279,0.005803,0.0,0.0,0.0,0.067079,0.004907,0.108822,0.0,0.0,max_iters=300;cluster_all=True;min_bin_freq=10
8,0.051891,0.049648,0.011445,0.008239,0.045074,0.049137,0.106493,0.024146,-0.024279,0.005803,0.0,0.0,0.0,0.067079,0.004907,0.108822,0.0,0.0,max_iters=1000;cluster_all=True;min_bin_freq=1
9,0.051891,0.049648,0.011445,0.008239,0.045074,0.049137,0.106493,0.024146,-0.024279,0.005803,0.0,0.0,0.0,0.067079,0.004907,0.108822,0.0,0.0,max_iters=1000;cluster_all=True;min_bin_freq=2
10,0.051891,0.049648,0.011445,0.008239,0.045074,0.049137,0.106493,0.024146,-0.024279,0.005803,0.0,0.0,0.0,0.067079,0.004907,0.108822,0.0,0.0,max_iters=1000;cluster_all=True;min_bin_freq=5
11,0.051891,0.049648,0.011445,0.008239,0.045074,0.049137,0.106493,0.024146,-0.024279,0.005803,0.0,0.0,0.0,0.067079,0.004907,0.108822,0.0,0.0,max_iters=1000;cluster_all=True;min_bin_freq=10
4,0.049857,0.047702,0.004544,0.008091,0.0472,0.051156,0.103499,0.0,-0.027761,0.0,0.0,0.0,0.0,0.06927,0.005591,0.11717,0.0,0.0,max_iters=300;cluster_all=False;min_bin_freq=1
5,0.049857,0.047702,0.004544,0.008091,0.0472,0.051156,0.103499,0.0,-0.027761,0.0,0.0,0.0,0.0,0.06927,0.005591,0.11717,0.0,0.0,max_iters=300;cluster_all=False;min_bin_freq=2


# Bisecting K-Means

In [21]:
from sklearn.cluster import BisectingKMeans

ks = list(range(2,21))
inits = ['k-means++']#['k-means++','random']
max_iters= [300] #[300,1000]
tols = [0.0001] #[1e-3,1e-4,1e-5]
bisecting_strategy = ["biggest_inertia", "largest_cluster"]
len(ks)*len(inits)*len(max_iters)*len(tols)*len(bisecting_strategy)

38

In [22]:
stats = {}
i=0

method = "BisectingKMeans"
if not os.path.exists(basedir+"/"+method+"/"):
    os.mkdir(basedir+"/"+method+"/")
    
for k in ks:
    for max_iter in max_iters:
        for tol in tols:
            for bs in bisecting_strategy:
                for seed in seeds:
                    params = "k="+str(k)+";max_iter="+str(max_iter)+";tol="+str(tol)+";bisecting_strategy="+str(bs)
                    labels = BisectingKMeans(n_clusters=k, init='k-means++', n_init=10, 
                           random_state=seed, max_iter= max_iter, tol=tol,
                           bisecting_strategy=bs).fit_predict(exprs.T)
                    clusters = labels_to_clsuters(labels,  exprs.columns)
                    clusters = clusters.loc[clusters["n_samples"]>=5,:]
                    performances, best_matches = calculate_perfromance(clusters, known_groups,all_samples,
                                                                      performance_measure = "ARI")
                    stats[i] = performances.to_dict()
                    stats[i]["seed"]=seed
                    stats[i]["parameters"]=params
                    print(params,seed,stats[i]["PAM50"])
                    # saving clusters 
                    clusters["samples"] = clusters["samples"].apply(lambda row: " ".join(sorted(row)))
                    clusters = clusters.loc[:,["n_samples","samples"]] 
                    clusters.to_csv(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".seed="+str(seed)+".clusters.tsv",sep="\t")
                    i+=1
stats = pd.DataFrame.from_dict(stats).T
# saving performances
stats.to_csv(basedir +"/"+method+"_"+dataset+".tsv",sep="\t")
stats.sort_values(by = "PAM50",ascending=False)    
                    

k=2;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 670487 0.41123148150788613
k=2;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 116739 0.5778080519260934
k=2;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 26225 0.4231466434101364
k=2;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 777572 0.41631732537741156
k=2;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 288389 0.4231466434101364
k=2;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 670487 0.41123148150788613
k=2;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 116739 0.5778080519260934
k=2;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 26225 0.4231466434101364
k=2;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 777572 0.41631732537741156
k=2;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 288389 0.4231466434101364
k=3;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 670487 0.4087092855276737
k=3;max_iter=300;to

k=11;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 26225 0.2543402180473662
k=11;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 777572 0.2806715051488485
k=11;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 288389 0.27102130152622783
k=11;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 670487 0.2979779607116787
k=11;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 116739 0.23626329371365873
k=11;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 26225 0.2758790927524392
k=11;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 777572 0.28567273007773586
k=11;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 288389 0.26012410032358046
k=12;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 670487 0.25598984410700876
k=12;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 116739 0.23862087879211852
k=12;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 26225 0.2543402180473662
k=12;ma

k=20;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 777572 0.1797643931148339
k=20;max_iter=300;tol=0.0001;bisecting_strategy=biggest_inertia 288389 0.16177876394564453
k=20;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 670487 0.19560347345111517
k=20;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 116739 0.23238819148123113
k=20;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 26225 0.21896466768749856
k=20;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 777572 0.21080236298309038
k=20;max_iter=300;tol=0.0001;bisecting_strategy=largest_cluster 288389 0.24387950175931145


Unnamed: 0,PAM50,Intrinsic,PAM50_AB,SCMOD2,IHC,Luminal,Basal,Her2,LumA,LumB,Normal,Claudin-low,IHC_HER2,IHC_ER,IHC_PR,IHC_TNBC,NET_kmeans,NET_ward,seed,parameters
66,0.594227,0.573987,0.245366,0.274381,0.440605,0.62703,0.765629,0.21253,0.108606,0.137808,0.075318,0.088663,0.211695,0.547174,0.375323,0.410084,0.118669,0.083416,116739,k=8;max_iter=300;tol=0.0001;bisecting_strategy...
56,0.593758,0.573157,0.272915,0.305478,0.440605,0.62703,0.765629,0.21253,0.179572,0.137808,0.059496,0.079185,0.211695,0.547174,0.375323,0.410084,0.118669,0.083416,116739,k=7;max_iter=300;tol=0.0001;bisecting_strategy...
46,0.593758,0.573157,0.278403,0.305917,0.440605,0.62703,0.765629,0.21253,0.193472,0.137808,0.059496,0.079185,0.211695,0.547174,0.375323,0.410084,0.118669,0.083416,116739,k=6;max_iter=300;tol=0.0001;bisecting_strategy...
36,0.59352,0.572928,0.278165,0.305917,0.440605,0.62703,0.765629,0.21253,0.193472,0.137808,0.051454,0.079185,0.211695,0.547174,0.375323,0.410084,0.096049,0.083416,116739,k=5;max_iter=300;tol=0.0001;bisecting_strategy...
26,0.579334,0.55931,0.288035,0.326877,0.428036,0.62703,0.765629,0.070804,0.193472,0.219433,0.051454,0.079185,0.072867,0.547174,0.375323,0.410084,0.049356,0.034194,116739,k=4;max_iter=300;tol=0.0001;bisecting_strategy...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
154,0.172281,0.171736,0.148284,0.146348,0.127904,0.138858,0.32152,0.14802,0.079143,0.13743,0.121819,0.158664,0.111772,0.152069,0.07592,0.291528,0.186798,0.166959,288389,k=17;max_iter=300;tol=0.0001;bisecting_strateg...
184,0.161779,0.161654,0.138694,0.144968,0.122552,0.137535,0.25231,0.17717,0.079143,0.13743,0.121819,0.158664,0.111772,0.152069,0.07438,0.219723,0.354863,0.372533,288389,k=20;max_iter=300;tol=0.0001;bisecting_strateg...
160,0.144055,0.145178,0.140567,0.136845,0.111963,0.109247,0.25301,0.207128,0.090803,0.122119,0.076534,0.172094,0.177042,0.126072,0.057392,0.248534,0.217684,0.196905,670487,k=18;max_iter=300;tol=0.0001;bisecting_strateg...
180,0.141785,0.142999,0.138297,0.139296,0.111963,0.109247,0.25301,0.207128,0.090803,0.122119,0.0,0.172094,0.177042,0.126072,0.057392,0.248534,0.217684,0.196905,670487,k=20;max_iter=300;tol=0.0001;bisecting_strateg...


#  Mini-Batch K-Means

In [18]:
from sklearn.cluster import MiniBatchKMeans

inits = ['k-means++']#['k-means++','random']
max_iters= [300]
max_no_improvements = [10,20]
ks = list(range(2,21))
batch_sizes = [100,500,1024]
reassignment_ratios = [0.005,0.01,0.1]
len(ks)*len(inits)*len(max_iters)*len(max_no_improvements)*len(batch_sizes)*len(reassignment_ratios)

342

In [19]:
stats = {}
i=0

method = "MiniBatchKMeans"
if not os.path.exists(basedir+"/"+method+"/"):
    os.mkdir(basedir+"/"+method+"/")
    
for k in ks:
    for max_iter in max_iters:
        for bs in batch_sizes:
            for mni in max_no_improvements:
                for rr in reassignment_ratios:
                    for seed in seeds:
                        params = "k="+str(k)+";max_iter="+str(max_iter)+";batch_size="+str(bs)
                        params += ";max_no_improvement="+str(mni)+";reassignment_ratio="+str(rr)
                        labels = MiniBatchKMeans(n_clusters=k, init='k-means++', n_init=10, tol=0.0,
                               random_state=seed, max_iter=max_iter, batch_size=bs,
                               max_no_improvement=mni,reassignment_ratio=rr).fit_predict(exprs.T)
                        clusters = labels_to_clsuters(labels,  exprs.columns)
                        clusters = clusters.loc[clusters["n_samples"]>=5,:]
                        performances, best_matches = calculate_perfromance(clusters, known_groups,all_samples,
                                                                          performance_measure = "ARI")
                        stats[i] = performances.to_dict()
                        stats[i]["seed"]=seed
                        stats[i]["parameters"]=params
                        print(params,seed,stats[i]["PAM50"])
                        # saving clusters 
                        clusters["samples"] = clusters["samples"].apply(lambda row: " ".join(sorted(row)))
                        clusters = clusters.loc[:,["n_samples","samples"]] 
                        clusters.to_csv(basedir+"/"+method+"/"+dataset+"."+method+"."+params+".seed="+str(seed)+".clusters.tsv",sep="\t")
                        i+=1
stats = pd.DataFrame.from_dict(stats).T
# saving performances
stats.to_csv(basedir +"/"+method+"_"+dataset+".tsv",sep="\t")
stats.sort_values(by = "PAM50",ascending=False)    
                    

k=2;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.005 670487 0.549350632689851
k=2;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.005 116739 0.026004171230874652
k=2;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.005 26225 0.020533201156199797
k=2;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.005 777572 0.16624736106666435
k=2;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.005 288389 0.5507951793920857
k=2;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.01 670487 0.549350632689851
k=2;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.01 116739 0.026004171230874652
k=2;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.01 26225 0.020533201156199797
k=2;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.01 777572 0.16624736106666435
k=2;max_iter=300;batch_size=100;max_no_improvement=10

k=2;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.005 288389 0.13142982830488814
k=2;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.01 670487 0.44754307156637996
k=2;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.01 116739 0.31908547633121753
k=2;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.01 26225 0.10829889436305287
k=2;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.01 777572 0.05292723716257573
k=2;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.01 288389 0.13142982830488814
k=2;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.1 670487 0.44754307156637996
k=2;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.1 116739 0.31908547633121753
k=2;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.1 26225 0.10829889436305287
k=2;max_iter=300;batch_size=1024;max_no_improvemen

k=3;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.01 777572 0.5340035506876831
k=3;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.01 288389 0.3877426180622155
k=3;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.1 670487 0.09156579535867632
k=3;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.1 116739 0.5368289827488446
k=3;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.1 26225 0.2859683820385769
k=3;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.1 777572 0.5340035506876831
k=3;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.1 288389 0.3999785545150426
k=3;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.005 670487 0.08977389246734058
k=3;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.005 116739 0.5329266119363103
k=3;max_iter=300;batch_size=1024;max_no_improvement=20;re

k=4;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.1 26225 0.11302294161727142
k=4;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.1 777572 0.46639332189283783
k=4;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.1 288389 0.5365804106096039
k=4;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.005 670487 0.49548925104328856
k=4;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.005 116739 0.5221254925546379
k=4;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.005 26225 0.45152509410640074
k=4;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.005 777572 0.4888122699200871
k=4;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.005 288389 0.45692701038426886
k=4;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.01 670487 0.49548925104328856
k=4;max_iter=300;batch_size=1024;max_no_improvement=

k=5;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.005 116739 0.46813652733148636
k=5;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.005 26225 0.47355913040419567
k=5;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.005 777572 0.4567217288482447
k=5;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.005 288389 0.5111097551631902
k=5;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.01 670487 0.4808605356120349
k=5;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.01 116739 0.46813652733148636
k=5;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.01 26225 0.47355913040419567
k=5;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.01 777572 0.4567217288482447
k=5;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.01 288389 0.5111097551631902
k=5;max_iter=300;batch_size=500;max_no_improvement=20;reas

k=6;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.01 670487 0.45319792441523865
k=6;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.01 116739 0.4679783771946502
k=6;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.01 26225 0.5034060389176824
k=6;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.01 777572 0.28026100209334376
k=6;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.01 288389 0.5029259394189989
k=6;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.1 670487 0.47129486950253763
k=6;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.1 116739 0.4679783771946502
k=6;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.1 26225 0.5041732938107658
k=6;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.1 777572 0.28026100209334376
k=6;max_iter=300;batch_size=500;max_no_improvement=10;reassignment

k=7;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.01 288389 0.4293976694246753
k=7;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.1 670487 0.5036714077196153
k=7;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.1 116739 0.4728070964686891
k=7;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.1 26225 0.4775683137616707
k=7;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.1 777572 0.5224059331278581
k=7;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.1 288389 0.4293976694246753
k=7;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.005 670487 0.4151526760068969
k=7;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.005 116739 0.45737283046516464
k=7;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.005 26225 0.3684573651527719
k=7;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_

k=8;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.1 777572 0.4669425943126659
k=8;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.1 288389 0.47323846311331474
k=8;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.005 670487 0.29364734663738756
k=8;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.005 116739 0.42072713404283435
k=8;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.005 26225 0.29766659444759247
k=8;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.005 777572 0.4765129490726927
k=8;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.005 288389 0.4836688149955724
k=8;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.01 670487 0.2912937460057228
k=8;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.01 116739 0.41890694758303004
k=8;max_iter=300;batch_size=100;max_no_improvement=20;rea

k=9;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.005 26225 0.444148641597161
k=9;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.005 777572 0.4607962039761178
k=9;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.005 288389 0.22325519313617487
k=9;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.01 670487 0.4357199575506222
k=9;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.01 116739 0.46195324391809484
k=9;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.01 26225 0.4477754433711179
k=9;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.01 777572 0.4607962039761178
k=9;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.01 288389 0.2391466455915452
k=9;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.1 670487 0.49756093194779827
k=9;max_iter=300;batch_size=100;max_no_improvement=10;reassign

k=9;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.01 116739 0.4401917777494891
k=9;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.01 26225 0.25342856128510594
k=9;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.01 777572 0.2916277072359059
k=9;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.01 288389 0.45751199033060264
k=9;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.1 670487 0.30789367394231537
k=9;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.1 116739 0.464631487846063
k=9;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.1 26225 0.43796733397809773
k=9;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.1 777572 0.2916277072359059
k=9;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.1 288389 0.45714274547965694
k=10;max_iter=300;batch_size=100;max_no_improvement=10;rea

k=10;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.01 288389 0.3783756236826868
k=10;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.1 670487 0.25574640701105616
k=10;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.1 116739 0.4653841781882714
k=10;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.1 26225 0.23930912150348022
k=10;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.1 777572 0.3995343255953977
k=10;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.1 288389 0.3697158747223633
k=10;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.005 670487 0.20092931324589217
k=10;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.005 116739 0.2619069511192316
k=10;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.005 26225 0.24990227248556904
k=10;max_iter=300;batch_size=1024;max_no_impro

k=11;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.1 26225 0.2753642073047494
k=11;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.1 777572 0.29909434587089767
k=11;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.1 288389 0.32196792635179045
k=11;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.005 670487 0.2586379899693325
k=11;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.005 116739 0.3742670092238722
k=11;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.005 26225 0.276826899707869
k=11;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.005 777572 0.24840332811989177
k=11;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.005 288389 0.3316774306638377
k=11;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.01 670487 0.2586379899693325
k=11;max_iter=300;batch_size=1024;max_no_improv

k=12;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.005 670487 0.3963136126959875
k=12;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.005 116739 0.2691054960375211
k=12;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.005 26225 0.40231198175568184
k=12;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.005 777572 0.28469375921056156
k=12;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.005 288389 0.34025361522842246
k=12;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.01 670487 0.3963136126959875
k=12;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.01 116739 0.2506814681756985
k=12;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.01 26225 0.40231198175568184
k=12;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.01 777572 0.258298196961917
k=12;max_iter=300;batch_size=500;max_no_improveme

k=13;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.005 777572 0.31934511826679857
k=13;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.005 288389 0.1923184479349595
k=13;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.01 670487 0.40511853112035306
k=13;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.01 116739 0.2193633010068831
k=13;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.01 26225 0.2773523574322787
k=13;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.01 777572 0.31934511826679857
k=13;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.01 288389 0.1923184479349595
k=13;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.1 670487 0.391808667576501
k=13;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.1 116739 0.3155118683175393
k=13;max_iter=300;batch_size=500;max_no_improvement=10

k=14;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.01 116739 0.23967091433194726
k=14;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.01 26225 0.32504979974276005
k=14;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.01 777572 0.24813546885198473
k=14;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.01 288389 0.30202243123219186
k=14;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.1 670487 0.2755294348033832
k=14;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.1 116739 0.35606378314359
k=14;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.1 26225 0.40348601914368154
k=14;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.1 777572 0.23227824372438516
k=14;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.1 288389 0.3125985609414038
k=14;max_iter=300;batch_size=500;max_no_improvement=10;rea

k=15;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.01 288389 0.43060121539030516
k=15;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.1 670487 0.2758513563183217
k=15;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.1 116739 0.29990662364893794
k=15;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.1 26225 0.36223581229526747
k=15;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.1 777572 0.3031725284436827
k=15;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.1 288389 0.41791663405193546
k=15;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.005 670487 0.3129129528977449
k=15;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.005 116739 0.2922577556028228
k=15;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.005 26225 0.34058859419208554
k=15;max_iter=300;batch_size=100;max_no_improvement=20

k=15;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.1 26225 0.26628570390016715
k=15;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.1 777572 0.3329210492162335
k=15;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.1 288389 0.450702664948529
k=16;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.005 670487 0.30582992754894067
k=16;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.005 116739 0.36205158674485716
k=16;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.005 26225 0.39588209470935737
k=16;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.005 777572 0.22937728638414037
k=16;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.005 288389 0.3863965962397182
k=16;max_iter=300;batch_size=100;max_no_improvement=10;reassignment_ratio=0.01 670487 0.3070615029847211
k=16;max_iter=300;batch_size=100;max_no_improvem

k=16;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.005 670487 0.31543331554205023
k=16;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.005 116739 0.20032636540155607
k=16;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.005 26225 0.2584375219147391
k=16;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.005 777572 0.33523322204623995
k=16;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.005 288389 0.23773805250991192
k=16;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.01 670487 0.31543331554205023
k=16;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.01 116739 0.19978735883193524
k=16;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.01 26225 0.2584375219147391
k=16;max_iter=300;batch_size=1024;max_no_improvement=20;reassignment_ratio=0.01 777572 0.33523322204623995
k=16;max_iter=300;batch_size=1024;ma

k=17;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.005 777572 0.3725263181211924
k=17;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.005 288389 0.3053093284564765
k=17;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.01 670487 0.2855807106753217
k=17;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.01 116739 0.1549819226997366
k=17;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.01 26225 0.2215849161960228
k=17;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.01 777572 0.3838876324977929
k=17;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.01 288389 0.3053093284564765
k=17;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.1 670487 0.3059013652070612
k=17;max_iter=300;batch_size=1024;max_no_improvement=10;reassignment_ratio=0.1 116739 0.20884254406834393
k=17;max_iter=300;batch_size=1024;max_no_impro

k=18;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.01 116739 0.20996718325702823
k=18;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.01 26225 0.21108170789823208
k=18;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.01 777572 0.15998595099276464
k=18;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.01 288389 0.3505502725397076
k=18;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.1 670487 0.2791111383783375
k=18;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.1 116739 0.4732749997476337
k=18;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.1 26225 0.33157116218017957
k=18;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.1 777572 0.16604376389907727
k=18;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.1 288389 0.3639475636603959
k=18;max_iter=300;batch_size=1024;max_no_improvement=10;r

k=19;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.01 288389 0.20047527862528647
k=19;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.1 670487 0.20008309014079118
k=19;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.1 116739 0.319720605050707
k=19;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.1 26225 0.24396132690432557
k=19;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.1 777572 0.3800814971788187
k=19;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.1 288389 0.2496110798294674
k=19;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.005 670487 0.19148953644535086
k=19;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.005 116739 0.3023107823642488
k=19;max_iter=300;batch_size=500;max_no_improvement=20;reassignment_ratio=0.005 26225 0.2029213326004769
k=19;max_iter=300;batch_size=500;max_no_improvement=20;r

k=20;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.1 26225 0.41622449353973767
k=20;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.1 777572 0.2803561071676437
k=20;max_iter=300;batch_size=100;max_no_improvement=20;reassignment_ratio=0.1 288389 0.4452302273204576
k=20;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.005 670487 0.1886161909805364
k=20;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.005 116739 0.22125739431090843
k=20;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.005 26225 0.22764198035198535
k=20;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.005 777572 0.2570806031423963
k=20;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.005 288389 0.18173272527230838
k=20;max_iter=300;batch_size=500;max_no_improvement=10;reassignment_ratio=0.01 670487 0.18621679375734668
k=20;max_iter=300;batch_size=500;max_no_improvemen

Unnamed: 0,PAM50,Intrinsic,PAM50_AB,SCMOD2,IHC,Luminal,Basal,Her2,LumA,LumB,Normal,Claudin-low,IHC_HER2,IHC_ER,IHC_PR,IHC_TNBC,NET_kmeans,NET_ward,seed,parameters
226,0.555318,0.537046,0.393293,0.382225,0.390939,0.5019,0.946969,0.368613,0.320269,0.195449,0.040816,0.098914,0.239056,0.483551,0.296046,0.525326,0.126122,0.103684,116739,k=4;max_iter=300;batch_size=500;max_no_improve...
231,0.555318,0.537046,0.393293,0.382225,0.390939,0.5019,0.946969,0.368613,0.320269,0.195449,0.040816,0.098914,0.239056,0.483551,0.296046,0.525326,0.126122,0.103684,116739,k=4;max_iter=300;batch_size=500;max_no_improve...
211,0.555132,0.536797,0.39355,0.38195,0.392646,0.502261,0.938391,0.380032,0.318401,0.200304,0.039856,0.097158,0.252961,0.484083,0.296254,0.527222,0.128942,0.105969,116739,k=4;max_iter=300;batch_size=500;max_no_improve...
216,0.555132,0.536797,0.39355,0.38195,0.392646,0.502261,0.938391,0.380032,0.318401,0.200304,0.039856,0.097158,0.252961,0.484083,0.296254,0.527222,0.128942,0.105969,116739,k=4;max_iter=300;batch_size=500;max_no_improve...
9,0.550795,0.53287,0.148856,0.158295,0.39757,0.568028,0.885841,0.0,0.021581,-0.063752,-0.032629,0.103069,-0.055744,0.528664,0.335905,0.484983,-0.044779,-0.033683,288389,k=2;max_iter=300;batch_size=100;max_no_improve...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,288389,k=2;max_iter=300;batch_size=100;max_no_improve...
11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,116739,k=2;max_iter=300;batch_size=100;max_no_improve...
26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,116739,k=2;max_iter=300;batch_size=100;max_no_improve...
13,-0.03564,-0.034213,-0.001267,0.025149,-0.024644,-0.031418,-0.045117,-0.081153,0.069397,-0.049839,0.077546,0.0,0.0,-0.03179,-0.017798,-0.049888,0.0,0.0,777572,k=2;max_iter=300;batch_size=100;max_no_improve...


# 3. Selecting best and optimal parameter combinations


In [63]:
#method = 'HierarchicalClustering' ''
#method = 'SpectralClustering'
method = "BIRCH"
ds1 = "TCGA-BRCA"
ds2 = "METABRIC"
base_dir = "clusterings_evaluation/"
performance_col = "PAM50"

In [64]:
df1 = pd.read_csv(base_dir+method+"_"+ds1+".tsv",sep = "\t",index_col=0)
df2 = pd.read_csv(base_dir+method+"_"+ds2+".tsv",sep = "\t",index_col=0)

#if "seed" in df1.columns or "seed" in df2.columns:
df1 = df1.groupby("parameters").agg("mean")
df2 = df2.groupby("parameters").agg("mean")

df1 = df1.sort_values(by=performance_col,ascending= False)
df2 = df2.sort_values(by=performance_col,ascending= False)

df1["rank"] =df1[performance_col].rank(ascending= False)
df2["rank"] =df2[performance_col].rank(ascending= False)
mean_ranks = (df1["rank"]+df2["rank"])*0.5
mean_ranks = mean_ranks.sort_values()
best_mean_rank = mean_ranks.head(1)[0]
optimized_params = mean_ranks[mean_ranks == best_mean_rank].index.values
print(method+"\tbest mean rank:",best_mean_rank, round(best_mean_rank/mean_ranks.shape[0],2))

print("\topt. parameters:\n\t\t"+"\n\t\t".join(optimized_params) )

# perfromance with optimized parameters
opt_perf1 = df1.loc[optimized_params,performance_col].sort_values(ascending= False)[0]
opt_perf2 = df2.loc[optimized_params,performance_col].sort_values(ascending= False)[0]
print("\tperformance w. optimized:\t%s:%.2f\t%s:%.2f"%(ds1,opt_perf1,ds2,opt_perf2))
# best perfromance 
best_perf1 = df1.loc[:,performance_col].sort_values(ascending= False)
best_perf1 = best_perf1[0]
best_param1 =  df1.loc[df1[performance_col]==best_perf1,:].index.values
print("\tbest parameters %s:\t%.2f"%(ds1,best_perf1))
print("\t\t"+"\n\t\t".join(best_param1))

best_perf2 = df2.loc[:,performance_col].sort_values(ascending= False)
best_perf2 = best_perf2[0]
best_param2 =  df2.loc[df2[performance_col]==best_perf2,:].index.values
print("\tbest parameters %s:\t%.2f"%(ds2,best_perf2))
print("\t\t"+"\n\t\t".join(best_param2))

#print(method, df1.shape[0], df2.shape[0])


BIRCH	best mean rank: 9.5 0.06
	opt. parameters:
		k=6;branching_factor=100;threshold=0.1
		k=6;branching_factor=100;threshold=0.5
		k=6;branching_factor=100;threshold=0.95
		k=6;branching_factor=10;threshold=0.1
		k=6;branching_factor=10;threshold=0.5
		k=6;branching_factor=10;threshold=0.95
		k=6;branching_factor=50;threshold=0.1
		k=6;branching_factor=50;threshold=0.5
		k=6;branching_factor=50;threshold=0.95
	performance w. optimized:	TCGA-BRCA:0.48	METABRIC:0.48
	best parameters TCGA-BRCA:	0.48
		k=6;branching_factor=10;threshold=0.5
		k=7;branching_factor=10;threshold=0.95
		k=6;branching_factor=100;threshold=0.5
		k=6;branching_factor=10;threshold=0.95
		k=6;branching_factor=50;threshold=0.1
		k=6;branching_factor=50;threshold=0.5
		k=6;branching_factor=50;threshold=0.95
		k=7;branching_factor=100;threshold=0.1
		k=7;branching_factor=100;threshold=0.5
		k=7;branching_factor=100;threshold=0.95
		k=7;branching_factor=10;threshold=0.1
		k=7;branching_factor=10;threshold=0.5
		k=7;br

In [65]:
df1.loc["k=5;branching_factor=50;threshold=0.5",]

PAM50           0.435597
Intrinsic       0.421222
PAM50_AB        0.244479
SCMOD2          0.272155
IHC             0.314934
Luminal         0.420490
Basal           0.851772
Her2           -0.092976
LumA            0.094375
LumB            0.208882
Normal          0.034693
Claudin-low     0.076547
IHC_HER2        0.038942
IHC_ER          0.403200
IHC_PR          0.245442
IHC_TNBC        0.498923
NET_kmeans      0.118502
NET_ward        0.082501
rank           32.000000
Name: k=5;branching_factor=50;threshold=0.5, dtype: float64