In [None]:
import pandas as pd
import numpy as np
import umap.umap_ as umap
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import sklearn.cluster as cluster
from kneed import KneeLocator
from sklearn.cluster import KMeans
from collections import Counter
from sklearn.metrics import silhouette_score

In [None]:
saeki_DOI = saeki_papers = pd.read_csv('../data/Saeki_papers_doi.csv', encoding = "ISO-8859-1")['doi'].to_list()
saeki_DOI = set([x.upper() for x in saeki_DOI if not pd.isna(x)])

def compare_with_saeki(doi):
    if not pd.isna(doi) and doi.upper() in saeki_DOI:
        return 3
    else:
        return 0
    
def read_df_with_embedding(file):
    df = pd.read_csv(file, encoding = "ISO-8859-1")
    df = df.rename(columns={str(i): i for i in range(768)})
    for i in range(768):
        df[i] = df[i].astype(float)
    return df

## Get the plots for embedding with all seads, search, battery and OPV stability

In [None]:
#assembling dataframe to be clustered
query = '(photovoltaic polymer) | (polymer solar cell)'

seeds = read_df_with_embedding("../data/seeds_Saeki_fullerene_OPV_with_abstract_and_embedding.csv")
search = read_df_with_embedding(f"../data/search_results_{query.replace(' ', '_').replace('|', 'or')}_with_embedding.csv")
battery = read_df_with_embedding("../data/search_results_lithium_batteries_with_embedding_bulk.csv")
oled = read_df_with_embedding("../data/search_results_organic_photovoltaic_stability_with_embedding_bulk.csv")

seeds['group'] = 5
battery['group'] = 2
oled['group'] = 1
search['group'] = 0
for i in range(0,len(search)):
    search.loc[i, 'group'] = compare_with_saeki(search['DOI'][i])

saeki_fullerene_opv = pd.concat([seeds,search, battery, oled])
saeki_fullerene_opv = saeki_fullerene_opv.reset_index(drop=True)
saeki_fullerene_opv = saeki_fullerene_opv.drop(saeki_fullerene_opv.columns[:7], axis=1)
saeki_fullerene_opv_targets = saeki_fullerene_opv.pop('group')

In [None]:
saeki_fullerene_opv_targets.value_counts()

In [None]:
standard_embedding = umap.UMAP(random_state=42).fit_transform(saeki_fullerene_opv)
standard_embedding_df = pd.DataFrame(standard_embedding, columns=['Arbitrary Dimension x', 'Arbitrary Dimension y'])

In [None]:
plt.figure(figsize=(16,9))

color_map = ['C0', 'C1', 'C2', 'C0']

for i in range(4):
    embedding_i = standard_embedding_df[saeki_fullerene_opv_targets == i].to_numpy()
    plt.scatter(embedding_i[:, 0], embedding_i[:, 1], c=color_map[i], label=i, s=5, alpha=1)

legend_handles = [
    Line2D([0], [0], marker='o', color='w', label='(photovoltaic polymer) | (polymer solar cell)', markersize=10, markerfacecolor='C0'),
    Line2D([0], [0], marker='o', color='w', label='OPV Stability and Degradation', markersize=10, markerfacecolor='C1'),
    Line2D([0], [0], marker='o', color='w', label='Lithium Batteries', markersize=10, markerfacecolor='C2'),
]

# Add the custom legend to the plot
plt.legend(handles=legend_handles, title='Search Key Words', title_fontsize=17, fontsize=17)
plt.xlabel('Arbitrary Dimension x', fontsize=20)
plt.ylabel('Arbitrary Dimension y', fontsize=20)
plt.tick_params(axis='both', labelsize=20)
plt.xlim([0, 16])
plt.ylim([0.5, 9.5])

plt.savefig('../plots/Umap_all_plots_randomstate_42.png', bbox_inches='tight')

# Assembling dataset

In [None]:
#assembling dataframe to be clustered
query = '(photovoltaic polymer) | (polymer solar cell)'

seeds = read_df_with_embedding("../data/seeds_Saeki_fullerene_OPV_with_abstract_and_embedding.csv")
search = read_df_with_embedding(f"../data/search_results_{query.replace(' ', '_').replace('|', 'or')}_with_embedding.csv")
seeds['group'] = 5
search['group'] = 0
for i in range(0,len(search)):
    search.loc[i, 'group'] = compare_with_saeki(search['DOI'][i])

saeki_fullerene_opv = pd.concat([seeds,search])
saeki_fullerene_opv = saeki_fullerene_opv.reset_index(drop=True)
saeki_fullerene_opv = saeki_fullerene_opv.drop(saeki_fullerene_opv.columns[:7], axis=1)
saeki_fullerene_opv_targets = saeki_fullerene_opv.pop('group')

In [None]:
seeds.loc[:, 'DOI']

In [None]:
saeki_fullerene_opv_targets.value_counts()

# UMAP dimensional reduction

In [None]:
standard_embedding = umap.UMAP(random_state=42).fit_transform(saeki_fullerene_opv)
standard_embedding_df = pd.DataFrame(standard_embedding, columns=['Arbitrary Dimension x', 'Arbitrary Dimension y'])

In [None]:
plt.figure(figsize=(16,9))

for i in (0, 3):
    embedding_i = standard_embedding_df[saeki_fullerene_opv_targets == i].to_numpy()
    plt.scatter(embedding_i[:, 0], embedding_i[:, 1], c=f"C{i}", label=i, s=5, alpha=0.7)

for seed in range(0,5):
    plt.scatter(standard_embedding[seed, 0], standard_embedding[seed, 1], c='k', s=100, marker = '*')

legend_handles = [
    Line2D([0], [0], marker='*', color='w', label='Seeds', markersize=15, markerfacecolor='k'),
    Line2D([0], [0], marker='o', color='w', label='Saeki papers in search results', markersize=10, markerfacecolor='C3'),
    Line2D([0], [0], marker='o', color='w', label='non-Saeki papers in search results', markersize=10, markerfacecolor='C0'),
    #Line2D([0], [0], marker='o', color='w', label='OPV Stability and Degradation', markersize=10, markerfacecolor='C1'),
    #Line2D([0], [0], marker='o', color='w', label='Lithium Batteries', markersize=10, markerfacecolor='C2'),
    #Line2D([0], [0], marker='o', color='w', label='Electrolytes', markersize=10, markerfacecolor='C4')
]

# Add the custom legend to the plot
plt.legend(handles=legend_handles, loc='best', fontsize=13.5)
plt.xlabel('Arbitrary Dimension x', fontsize=15)
plt.ylabel('Arbitrary Dimension y', fontsize=15)
plt.tick_params(axis='both', labelsize=13.5)

plt.savefig('../plots/Umap_plots_randomstate_42.png', bbox_inches='tight')

2D UMAP visualization of the embeddings of papers from saeki's dataset compared with papers from different search results

# Clustering

In [None]:
kmeans_kwargs = {
    "init": "random",
    "n_init": 10,
    "max_iter": 300,
    "random_state": 42,
}

In [None]:
sse = []
for k in range(1, 41):
    print(k)
    clusterer = KMeans(n_clusters=k, **kmeans_kwargs)
    clusterer.fit(saeki_fullerene_opv)
    sse.append(clusterer.inertia_)

## Find the elbow point
kl = KneeLocator(
    range(1, 41), sse, curve="convex", direction="decreasing"
)

print("Optimal number of clusters based on elbow method: ", kl.elbow)

In [None]:
plt.plot(sse)

In [None]:
kmeans_labels = cluster.KMeans(n_clusters=kl.elbow, **kmeans_kwargs).fit_predict(saeki_fullerene_opv)
plt.figure(figsize=(16,9))
cmap_name = 'tab20'
cmap = plt.get_cmap(cmap_name)
colors = [cmap(i/(kl.elbow - 1)) for i in range(kl.elbow)]

legend_handles = []

for i in range(kl.elbow):
    legend_handles.append(Line2D([0], [0], marker='o', color='w', label=f'Cluster {i+1}', markersize=10, markerfacecolor=colors[i]))

legend_handles.append(Line2D([0], [0], marker='*', color='w', label='Seeds', markersize=15, markerfacecolor='k'))
plt.scatter(standard_embedding[:, 0], standard_embedding[:, 1], c=kmeans_labels, s=5, cmap=cmap_name, alpha=0.75);

for seed in range(0,5):
    plt.scatter(standard_embedding[seed, 0], standard_embedding[seed, 1], c='k', s=100, marker = '*')

plt.legend(handles=legend_handles, loc='best', fontsize=13.5)
plt.xlabel('Arbitrary Dimension x', fontsize=15)
plt.ylabel('Arbitrary Dimension y', fontsize=15)
plt.tick_params(axis='both', labelsize=13.5)
plt.savefig('../plots/clustering_plots_randomstate_42.png', bbox_inches='tight')

2D UMAP visualization of the kmeans clustering performed on our dataset

# Analyzing results

In [None]:
for i in range(0,5):
    print(f"seed {i} is in cluster {kmeans_labels[i]+1}")

Based on this we have determined our best cluster. Note: When referring to the clusters in the following graphics, I called the first one cluster 1. If we want to switch to zero based numbering we can change index_offset in the next block of code.

In [None]:
best_cluster = 12

# Change to 0 for zero based indexing
index_offset = 1

best_cluster += -1 * index_offset


Finding the breakdown of clusters for each group of papers

In [None]:
targets = pd.DataFrame(saeki_fullerene_opv_targets)['group'].to_list()
saeki_clusters = [0] * kl.elbow
search_clusters = [0] * kl.elbow
lithium_clusters = [0] * kl.elbow
opv_stability_clusters = [0] * kl.elbow
# electrolyte_clusters = [0] * kl.elbow

for i in range(5, len(targets)):
    group = kmeans_labels[i]
    if targets[i] == 3:
        saeki_clusters[group] += 1
    elif targets[i] == 0:
        search_clusters[group] += 1
    # elif targets[i] == 1:
    #     lithium_clusters[group] += 1
    # elif targets[i] == 2:
    #     opv_stability_clusters[group] += 1
    # elif targets[i] == 4:
    #     electrolyte_clusters[group] += 1

In [None]:
saeki_clusters, saeki_clusters[7] + saeki_clusters[11]

In [None]:
kmeans_labels_dict = Counter(kmeans_labels.tolist())
kmeans_labels_dict, kmeans_labels_dict[7] + kmeans_labels_dict[11]

In [None]:
cmap = plt.get_cmap('tab20')
colors2 = [cmap(i/(kl.elbow - 1)) for i in range(kl.elbow)]

In [None]:
labels = list(range(index_offset,kl.elbow+index_offset))  # Labels for each segment
explode = [0] * kl.elbow
explode[best_cluster] = .05
# Create the pie chart
plt.figure(figsize=(10, 8))  # Optional: specify the size of the figure
wedges, texts, autotexts = plt.pie(saeki_clusters, explode=explode, autopct='%1.1f', startangle=140, colors = colors2, pctdistance=1.1)
plt.legend(labels, title="Clusters", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))
# Equal aspect ratio ensures that pie chart is a circle

for i, a in enumerate(autotexts):
    print(a)
    if float(a.get_text()) < 1:  # Show percentage only if slice is small
        a.set_text('')
    else:
        a.set_text(f'{a.get_text()}%')
plt.axis('equal')

# Show the plot
plt.title('The Cluster Distribution of Saeki\'s Papers')
plt.savefig('../plots/clustering_plots_randomstate_42_cluster_of_saeki.png', bbox_inches='tight')
plt.show()

In [None]:
# let's take a look at what silhouette analysis looks like:
from sklearn.metrics import silhouette_samples, silhouette_score
import matplotlib.cm as cm

X = saeki_fullerene_opv
n_clusters = kl.elbow
#kmeans = KMeans(n_clusters=n_clusters , **kmeans_kwargs)
#kmeans.fit(saeki_fullerene_opv)
cluster_labels = kmeans_labels
score = silhouette_score(saeki_fullerene_opv, cluster_labels)


# Create a subplot with 1 row and 2 columns
fig, (ax1) = plt.subplots(1, 1)
fig.set_size_inches(12, 7)

# The 1st subplot is the silhouette plot
# The silhouette coefficient can range from -1, 1 but in this example all
# lie within [-0.1, 1]
ax1.set_xlim(-0.1, .5)
# The (n_clusters+1)*10 is for inserting blank space between silhouette
# plots of individual clusters, to demarcate them clearly.
ax1.set_ylim([0, len(saeki_fullerene_opv) + (n_clusters + 1) * 10])


# The silhouette_score gives the average value for all the samples.
# This gives a perspective into the density and separation of the formed
# clusters
silhouette_avg = silhouette_score(saeki_fullerene_opv, cluster_labels)
print("For n_clusters =", n_clusters,
    "The average silhouette_score is :", silhouette_avg)

# Compute the silhouette scores for each sample
sample_silhouette_values = silhouette_samples(saeki_fullerene_opv, cluster_labels)

y_lower = 10
    
for i in range(n_clusters):
    # Aggregate the silhouette scores for samples belonging to
    # cluster i, and sort them
    ith_cluster_silhouette_values = \
        sample_silhouette_values[cluster_labels == i]

    ith_cluster_silhouette_values.sort()

    size_cluster_i = ith_cluster_silhouette_values.shape[0]
    y_upper = y_lower + size_cluster_i
    color = cmap(float(i) / (n_clusters - 1))
    ax1.fill_betweenx(np.arange(y_lower, y_upper),
                      0, ith_cluster_silhouette_values,
                      facecolor=color, edgecolor=color, alpha=1)

    # Label the silhouette plots with their cluster numbers at the middle
    ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i+index_offset))

    # Compute the new y_lower for next plot
    y_lower = y_upper + 10  # 10 for the 0 samples

ax1.set_xlabel("Silhouette coefficient value")
ax1.set_ylabel("Cluster label")

# The vertical line for average silhouette score of all the values
ax1.axvline(x=silhouette_avg, color="red", linestyle="--")

ax1.set_yticks([])  # Clear the yaxis labels / ticks
ax1.set_xticks([-0.1,0, 0.1, 0.2, 0.3, 0.4, 0.5])
fig.savefig('../plots/clustering_plots_randomstate_42_silhouette_score.png', bbox_inches='tight')

# Export Data

In [None]:
np.save("../data/seeds_and_search_embedding.npy", saeki_fullerene_opv.to_numpy())

In [None]:
saeki_fullerene_opv_save_df = pd.concat([seeds, search])
saeki_fullerene_opv_save_df = saeki_fullerene_opv_save_df.reset_index(drop=True)
saeki_fullerene_opv_save_df = saeki_fullerene_opv_save_df.drop(list(saeki_fullerene_opv_save_df.columns[:3]) + list(saeki_fullerene_opv_save_df.columns[7:-1]), axis=1)
saeki_fullerene_opv_save_df = pd.concat([saeki_fullerene_opv_save_df, standard_embedding_df], axis=1)
saeki_fullerene_opv_save_df = pd.concat([saeki_fullerene_opv_save_df, pd.DataFrame(kmeans_labels, columns=['kmean_label'])], axis=1)
saeki_fullerene_opv_save_df.to_csv("../data/cluster_results_randomstate_42.csv")

# Random State Experiments

In [None]:
opt_number_clusters = []
chosen_saeki = []
chosen_search = []

In [None]:
for randS in range(100):
    print("Expriment with random state = ", randS)
    standard_embedding = umap.UMAP(random_state=randS).fit_transform(saeki_fullerene_opv)
    kmeans_kwargs = {
        "init": "random",
        "n_init": 10,
        "max_iter": 300,
        "random_state": randS,
    }

    sse = []
    for k in range(1, 41):
        clusterer = KMeans(n_clusters=k, **kmeans_kwargs)
        clusterer.fit(saeki_fullerene_opv)
        sse.append(clusterer.inertia_)

    ## Find the elbow point
    kl = KneeLocator(
        range(1, 41), sse, curve="convex", direction="decreasing"
    )
    print("  Optimal number of clusters based on elbow method: ", kl.elbow)
    opt_number_clusters.append(kl.elbow)

    kmeans_labels = cluster.KMeans(n_clusters=kl.elbow, **kmeans_kwargs).fit_predict(saeki_fullerene_opv)

    chosen_clusters = set()
    for i in range(0,5):
        chosen_clusters.add(kmeans_labels[i])
        print(f"  seed {i} is in cluster {kmeans_labels[i]+1}")

    targets = pd.DataFrame(saeki_fullerene_opv_targets)['group'].to_list()
    saeki_clusters = [0] * kl.elbow
    search_clusters = [0] * kl.elbow

    for i in range(5, len(targets)):
        group = kmeans_labels[i]
        if targets[i] == 3:
            saeki_clusters[group] += 1
        elif targets[i] == 0:
            search_clusters[group] += 1

    chosen_saeki.append(sum([saeki_clusters[i] for i in chosen_clusters]))
    chosen_search.append(sum([search_clusters[i] for i in chosen_clusters]))
    print(f"  chosed {chosen_saeki[-1]} saeki, {chosen_search[-1]} non saeki in search")

In [None]:
raw = """
Expriment with random state =  0
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 14
  seed 1 is in cluster 14
  seed 2 is in cluster 11
  seed 3 is in cluster 11
  seed 4 is in cluster 14
  chosed 233 saeki, 2702 non saeki in search
Expriment with random state =  1
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  9
  seed 0 is in cluster 3
  seed 1 is in cluster 3
  seed 2 is in cluster 6
  seed 3 is in cluster 6
  seed 4 is in cluster 5
  chosed 290 saeki, 6686 non saeki in search
Expriment with random state =  2
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  11
  seed 0 is in cluster 9
  seed 1 is in cluster 5
  seed 2 is in cluster 10
  seed 3 is in cluster 9
  seed 4 is in cluster 1
  chosed 296 saeki, 6646 non saeki in search
Expriment with random state =  3
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 3
  seed 1 is in cluster 3
  seed 2 is in cluster 7
  seed 3 is in cluster 7
  seed 4 is in cluster 3
  chosed 226 saeki, 2719 non saeki in search
Expriment with random state =  4
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  12
  seed 0 is in cluster 9
  seed 1 is in cluster 9
  seed 2 is in cluster 8
  seed 3 is in cluster 8
  seed 4 is in cluster 9
  chosed 204 saeki, 3930 non saeki in search
Expriment with random state =  5
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  10
  seed 0 is in cluster 7
  seed 1 is in cluster 7
  seed 2 is in cluster 3
  seed 3 is in cluster 3
  seed 4 is in cluster 2
  chosed 287 saeki, 6319 non saeki in search
Expriment with random state =  6
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  13
  seed 0 is in cluster 13
  seed 1 is in cluster 13
  seed 2 is in cluster 4
  seed 3 is in cluster 4
  seed 4 is in cluster 13
  chosed 237 saeki, 2802 non saeki in search
Expriment with random state =  7
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  11
  seed 0 is in cluster 6
  seed 1 is in cluster 6
  seed 2 is in cluster 7
  seed 3 is in cluster 7
  seed 4 is in cluster 9
  chosed 290 saeki, 4939 non saeki in search
Expriment with random state =  8
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  13
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 6
  seed 3 is in cluster 6
  seed 4 is in cluster 1
  chosed 233 saeki, 2799 non saeki in search
Expriment with random state =  9
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 13
  seed 1 is in cluster 13
  seed 2 is in cluster 5
  seed 3 is in cluster 5
  seed 4 is in cluster 13
  chosed 225 saeki, 2725 non saeki in search
Expriment with random state =  10
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  13
  seed 0 is in cluster 10
  seed 1 is in cluster 10
  seed 2 is in cluster 13
  seed 3 is in cluster 13
  seed 4 is in cluster 10
  chosed 223 saeki, 2871 non saeki in search
Expriment with random state =  11
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  11
  seed 0 is in cluster 5
  seed 1 is in cluster 5
  seed 2 is in cluster 7
  seed 3 is in cluster 7
  seed 4 is in cluster 9
  chosed 291 saeki, 4937 non saeki in search
Expriment with random state =  12
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 2
  seed 1 is in cluster 2
  seed 2 is in cluster 10
  seed 3 is in cluster 10
  seed 4 is in cluster 2
  chosed 230 saeki, 2706 non saeki in search
Expriment with random state =  13
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  10
  seed 0 is in cluster 10
  seed 1 is in cluster 5
  seed 2 is in cluster 4
  seed 3 is in cluster 4
  seed 4 is in cluster 10
  chosed 284 saeki, 5111 non saeki in search
Expriment with random state =  14
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  9
  seed 0 is in cluster 6
  seed 1 is in cluster 6
  seed 2 is in cluster 9
  seed 3 is in cluster 9
  seed 4 is in cluster 3
  chosed 280 saeki, 6792 non saeki in search
Expriment with random state =  15
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  13
  seed 0 is in cluster 4
  seed 1 is in cluster 4
  seed 2 is in cluster 7
  seed 3 is in cluster 7
  seed 4 is in cluster 4
  chosed 234 saeki, 2816 non saeki in search
Expriment with random state =  16
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  12
  seed 0 is in cluster 3
  seed 1 is in cluster 3
  seed 2 is in cluster 6
  seed 3 is in cluster 6
  seed 4 is in cluster 3
  chosed 207 saeki, 3659 non saeki in search
Expriment with random state =  17
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  10
  seed 0 is in cluster 8
  seed 1 is in cluster 8
  seed 2 is in cluster 4
  seed 3 is in cluster 4
  seed 4 is in cluster 3
  chosed 285 saeki, 6326 non saeki in search
Expriment with random state =  18
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  11
  seed 0 is in cluster 6
  seed 1 is in cluster 6
  seed 2 is in cluster 2
  seed 3 is in cluster 2
  seed 4 is in cluster 6
  chosed 205 saeki, 4008 non saeki in search
Expriment with random state =  19
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  8
  seed 0 is in cluster 4
  seed 1 is in cluster 4
  seed 2 is in cluster 6
  seed 3 is in cluster 6
  seed 4 is in cluster 2
  chosed 328 saeki, 6327 non saeki in search
Expriment with random state =  20
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 12
  seed 1 is in cluster 12
  seed 2 is in cluster 4
  seed 3 is in cluster 4
  seed 4 is in cluster 12
  chosed 209 saeki, 2250 non saeki in search
Expriment with random state =  21
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  8
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 5
  seed 3 is in cluster 5
  seed 4 is in cluster 8
  chosed 285 saeki, 7503 non saeki in search
Expriment with random state =  22
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  13
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 11
  seed 3 is in cluster 11
  seed 4 is in cluster 1
  chosed 237 saeki, 2783 non saeki in search
Expriment with random state =  23
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  12
  seed 0 is in cluster 3
  seed 1 is in cluster 3
  seed 2 is in cluster 12
  seed 3 is in cluster 12
  seed 4 is in cluster 11
  chosed 296 saeki, 4988 non saeki in search
Expriment with random state =  24
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  9
  seed 0 is in cluster 2
  seed 1 is in cluster 2
  seed 2 is in cluster 1
  seed 3 is in cluster 1
  seed 4 is in cluster 6
  chosed 280 saeki, 6789 non saeki in search
Expriment with random state =  25
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 5
  seed 1 is in cluster 5
  seed 2 is in cluster 10
  seed 3 is in cluster 10
  seed 4 is in cluster 5
  chosed 233 saeki, 2681 non saeki in search
Expriment with random state =  26
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  10
  seed 0 is in cluster 7
  seed 1 is in cluster 7
  seed 2 is in cluster 10
  seed 3 is in cluster 10
  seed 4 is in cluster 9
  chosed 297 saeki, 5193 non saeki in search
Expriment with random state =  27
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  10
  seed 0 is in cluster 8
  seed 1 is in cluster 10
  seed 2 is in cluster 7
  seed 3 is in cluster 7
  seed 4 is in cluster 8
  chosed 312 saeki, 3945 non saeki in search
Expriment with random state =  28
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 3
  seed 1 is in cluster 3
  seed 2 is in cluster 5
  seed 3 is in cluster 5
  seed 4 is in cluster 3
  chosed 230 saeki, 2710 non saeki in search
Expriment with random state =  29
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 7
  seed 1 is in cluster 7
  seed 2 is in cluster 5
  seed 3 is in cluster 5
  seed 4 is in cluster 7
  chosed 230 saeki, 2701 non saeki in search
Expriment with random state =  30
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  9
  seed 0 is in cluster 5
  seed 1 is in cluster 5
  seed 2 is in cluster 9
  seed 3 is in cluster 9
  seed 4 is in cluster 8
  chosed 280 saeki, 6809 non saeki in search
Expriment with random state =  31
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  11
  seed 0 is in cluster 11
  seed 1 is in cluster 11
  seed 2 is in cluster 2
  seed 3 is in cluster 2
  seed 4 is in cluster 9
  chosed 293 saeki, 4949 non saeki in search
Expriment with random state =  32
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  15
  seed 0 is in cluster 13
  seed 1 is in cluster 13
  seed 2 is in cluster 2
  seed 3 is in cluster 2
  seed 4 is in cluster 13
  chosed 210 saeki, 2370 non saeki in search
Expriment with random state =  33
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 13
  seed 1 is in cluster 13
  seed 2 is in cluster 6
  seed 3 is in cluster 6
  seed 4 is in cluster 13
  chosed 229 saeki, 2510 non saeki in search
Expriment with random state =  34
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  11
  seed 0 is in cluster 11
  seed 1 is in cluster 11
  seed 2 is in cluster 10
  seed 3 is in cluster 10
  seed 4 is in cluster 3
  chosed 293 saeki, 5038 non saeki in search
Expriment with random state =  35
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  8
  seed 0 is in cluster 3
  seed 1 is in cluster 3
  seed 2 is in cluster 8
  seed 3 is in cluster 5
  seed 4 is in cluster 7
  chosed 355 saeki, 9530 non saeki in search
Expriment with random state =  36
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  9
  seed 0 is in cluster 5
  seed 1 is in cluster 5
  seed 2 is in cluster 2
  seed 3 is in cluster 2
  seed 4 is in cluster 6
  chosed 282 saeki, 6837 non saeki in search
Expriment with random state =  37
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  11
  seed 0 is in cluster 9
  seed 1 is in cluster 9
  seed 2 is in cluster 4
  seed 3 is in cluster 4
  seed 4 is in cluster 9
  chosed 236 saeki, 3030 non saeki in search
Expriment with random state =  38
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  15
  seed 0 is in cluster 10
  seed 1 is in cluster 10
  seed 2 is in cluster 4
  seed 3 is in cluster 4
  seed 4 is in cluster 10
  chosed 212 saeki, 2388 non saeki in search
Expriment with random state =  39
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  9
  seed 0 is in cluster 4
  seed 1 is in cluster 4
  seed 2 is in cluster 5
  seed 3 is in cluster 5
  seed 4 is in cluster 8
  chosed 290 saeki, 6771 non saeki in search
Expriment with random state =  40
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  12
  seed 0 is in cluster 8
  seed 1 is in cluster 8
  seed 2 is in cluster 4
  seed 3 is in cluster 4
  seed 4 is in cluster 2
  chosed 291 saeki, 4921 non saeki in search
Expriment with random state =  41
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  9
  seed 0 is in cluster 2
  seed 1 is in cluster 2
  seed 2 is in cluster 4
  seed 3 is in cluster 4
  seed 4 is in cluster 6
  chosed 279 saeki, 6790 non saeki in search
Expriment with random state =  42
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 12
  seed 1 is in cluster 12
  seed 2 is in cluster 8
  seed 3 is in cluster 8
  seed 4 is in cluster 12
  chosed 229 saeki, 2725 non saeki in search
Expriment with random state =  43
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 10
  seed 3 is in cluster 10
  seed 4 is in cluster 1
  chosed 226 saeki, 2705 non saeki in search
Expriment with random state =  44
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 6
  seed 1 is in cluster 6
  seed 2 is in cluster 13
  seed 3 is in cluster 13
  seed 4 is in cluster 6
  chosed 227 saeki, 2689 non saeki in search
Expriment with random state =  45
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  13
  seed 0 is in cluster 4
  seed 1 is in cluster 4
  seed 2 is in cluster 8
  seed 3 is in cluster 8
  seed 4 is in cluster 4
  chosed 237 saeki, 2797 non saeki in search
Expriment with random state =  46
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  9
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 2
  seed 3 is in cluster 2
  seed 4 is in cluster 5
  chosed 279 saeki, 6791 non saeki in search
Expriment with random state =  47
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  8
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 4
  seed 3 is in cluster 5
  seed 4 is in cluster 6
  chosed 355 saeki, 9529 non saeki in search
Expriment with random state =  48
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  15
  seed 0 is in cluster 9
  seed 1 is in cluster 9
  seed 2 is in cluster 8
  seed 3 is in cluster 8
  seed 4 is in cluster 9
  chosed 203 saeki, 2194 non saeki in search
Expriment with random state =  49
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  10
  seed 0 is in cluster 4
  seed 1 is in cluster 4
  seed 2 is in cluster 10
  seed 3 is in cluster 10
  seed 4 is in cluster 9
  chosed 285 saeki, 6338 non saeki in search
Expriment with random state =  50
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 4
  seed 1 is in cluster 4
  seed 2 is in cluster 9
  seed 3 is in cluster 9
  seed 4 is in cluster 4
  chosed 228 saeki, 2713 non saeki in search
Expriment with random state =  51
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  12
  seed 0 is in cluster 9
  seed 1 is in cluster 9
  seed 2 is in cluster 8
  seed 3 is in cluster 8
  seed 4 is in cluster 9
  chosed 233 saeki, 2907 non saeki in search
Expriment with random state =  52
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  8
  seed 0 is in cluster 8
  seed 1 is in cluster 8
  seed 2 is in cluster 5
  seed 3 is in cluster 4
  seed 4 is in cluster 3
  chosed 355 saeki, 9556 non saeki in search
Expriment with random state =  53
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  9
  seed 0 is in cluster 3
  seed 1 is in cluster 6
  seed 2 is in cluster 2
  seed 3 is in cluster 3
  seed 4 is in cluster 3
  chosed 329 saeki, 4774 non saeki in search
Expriment with random state =  54
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  11
  seed 0 is in cluster 4
  seed 1 is in cluster 4
  seed 2 is in cluster 2
  seed 3 is in cluster 2
  seed 4 is in cluster 4
  chosed 205 saeki, 3933 non saeki in search
Expriment with random state =  55
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  15
  seed 0 is in cluster 11
  seed 1 is in cluster 11
  seed 2 is in cluster 8
  seed 3 is in cluster 8
  seed 4 is in cluster 11
  chosed 206 saeki, 2207 non saeki in search
Expriment with random state =  56
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 14
  seed 3 is in cluster 14
  seed 4 is in cluster 1
  chosed 232 saeki, 2697 non saeki in search
Expriment with random state =  57
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  10
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 6
  seed 3 is in cluster 6
  seed 4 is in cluster 9
  chosed 288 saeki, 6369 non saeki in search
Expriment with random state =  58
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 7
  seed 1 is in cluster 7
  seed 2 is in cluster 5
  seed 3 is in cluster 5
  seed 4 is in cluster 7
  chosed 228 saeki, 2842 non saeki in search
Expriment with random state =  59
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  10
  seed 0 is in cluster 7
  seed 1 is in cluster 7
  seed 2 is in cluster 6
  seed 3 is in cluster 6
  seed 4 is in cluster 7
  chosed 204 saeki, 4212 non saeki in search
Expriment with random state =  60
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  10
  seed 0 is in cluster 3
  seed 1 is in cluster 3
  seed 2 is in cluster 1
  seed 3 is in cluster 1
  seed 4 is in cluster 10
  chosed 285 saeki, 6513 non saeki in search
Expriment with random state =  61
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  8
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 6
  seed 3 is in cluster 3
  seed 4 is in cluster 7
  chosed 355 saeki, 9524 non saeki in search
Expriment with random state =  62
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  8
  seed 0 is in cluster 4
  seed 1 is in cluster 4
  seed 2 is in cluster 6
  seed 3 is in cluster 5
  seed 4 is in cluster 7
  chosed 355 saeki, 9550 non saeki in search
Expriment with random state =  63
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 4
  seed 3 is in cluster 4
  seed 4 is in cluster 1
  chosed 227 saeki, 2723 non saeki in search
Expriment with random state =  64
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  13
  seed 0 is in cluster 8
  seed 1 is in cluster 4
  seed 2 is in cluster 12
  seed 3 is in cluster 12
  seed 4 is in cluster 8
  chosed 299 saeki, 3618 non saeki in search
Expriment with random state =  65
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  10
  seed 0 is in cluster 8
  seed 1 is in cluster 8
  seed 2 is in cluster 5
  seed 3 is in cluster 5
  seed 4 is in cluster 6
  chosed 286 saeki, 6338 non saeki in search
Expriment with random state =  66
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 14
  seed 1 is in cluster 14
  seed 2 is in cluster 12
  seed 3 is in cluster 12
  seed 4 is in cluster 14
  chosed 233 saeki, 2700 non saeki in search
Expriment with random state =  67
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  13
  seed 0 is in cluster 11
  seed 1 is in cluster 11
  seed 2 is in cluster 5
  seed 3 is in cluster 5
  seed 4 is in cluster 11
  chosed 229 saeki, 2863 non saeki in search
Expriment with random state =  68
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 4
  seed 1 is in cluster 4
  seed 2 is in cluster 14
  seed 3 is in cluster 14
  seed 4 is in cluster 4
  chosed 229 saeki, 2689 non saeki in search
Expriment with random state =  69
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  15
  seed 0 is in cluster 2
  seed 1 is in cluster 2
  seed 2 is in cluster 6
  seed 3 is in cluster 6
  seed 4 is in cluster 2
  chosed 230 saeki, 2477 non saeki in search
Expriment with random state =  70
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 7
  seed 1 is in cluster 7
  seed 2 is in cluster 14
  seed 3 is in cluster 14
  seed 4 is in cluster 7
  chosed 226 saeki, 2708 non saeki in search
Expriment with random state =  71
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  10
  seed 0 is in cluster 3
  seed 1 is in cluster 3
  seed 2 is in cluster 6
  seed 3 is in cluster 6
  seed 4 is in cluster 1
  chosed 289 saeki, 6185 non saeki in search
Expriment with random state =  72
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 4
  seed 1 is in cluster 4
  seed 2 is in cluster 13
  seed 3 is in cluster 13
  seed 4 is in cluster 4
  chosed 228 saeki, 2681 non saeki in search
Expriment with random state =  73
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  8
  seed 0 is in cluster 6
  seed 1 is in cluster 6
  seed 2 is in cluster 2
  seed 3 is in cluster 2
  seed 4 is in cluster 3
  chosed 329 saeki, 6207 non saeki in search
Expriment with random state =  74
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  9
  seed 0 is in cluster 2
  seed 1 is in cluster 2
  seed 2 is in cluster 6
  seed 3 is in cluster 6
  seed 4 is in cluster 4
  chosed 345 saeki, 7228 non saeki in search
Expriment with random state =  75
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  8
  seed 0 is in cluster 5
  seed 1 is in cluster 5
  seed 2 is in cluster 8
  seed 3 is in cluster 8
  seed 4 is in cluster 7
  chosed 327 saeki, 5998 non saeki in search
Expriment with random state =  76
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  8
  seed 0 is in cluster 6
  seed 1 is in cluster 6
  seed 2 is in cluster 1
  seed 3 is in cluster 4
  seed 4 is in cluster 3
  chosed 355 saeki, 9538 non saeki in search
Expriment with random state =  77
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 10
  seed 1 is in cluster 10
  seed 2 is in cluster 14
  seed 3 is in cluster 14
  seed 4 is in cluster 10
  chosed 230 saeki, 2694 non saeki in search
Expriment with random state =  78
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 14
  seed 1 is in cluster 14
  seed 2 is in cluster 3
  seed 3 is in cluster 3
  seed 4 is in cluster 14
  chosed 229 saeki, 2259 non saeki in search
Expriment with random state =  79
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 5
  seed 1 is in cluster 5
  seed 2 is in cluster 10
  seed 3 is in cluster 10
  seed 4 is in cluster 5
  chosed 227 saeki, 2691 non saeki in search
Expriment with random state =  80
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  11
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 10
  seed 3 is in cluster 10
  seed 4 is in cluster 1
  chosed 204 saeki, 3967 non saeki in search
Expriment with random state =  81
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  15
  seed 0 is in cluster 10
  seed 1 is in cluster 10
  seed 2 is in cluster 2
  seed 3 is in cluster 2
  seed 4 is in cluster 10
  chosed 206 saeki, 2228 non saeki in search
Expriment with random state =  82
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  13
  seed 0 is in cluster 9
  seed 1 is in cluster 9
  seed 2 is in cluster 2
  seed 3 is in cluster 2
  seed 4 is in cluster 9
  chosed 237 saeki, 2794 non saeki in search
Expriment with random state =  83
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  8
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 8
  seed 3 is in cluster 8
  seed 4 is in cluster 6
  chosed 284 saeki, 7658 non saeki in search
Expriment with random state =  84
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 13
  seed 1 is in cluster 13
  seed 2 is in cluster 3
  seed 3 is in cluster 3
  seed 4 is in cluster 13
  chosed 230 saeki, 2700 non saeki in search
Expriment with random state =  85
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  9
  seed 0 is in cluster 4
  seed 1 is in cluster 4
  seed 2 is in cluster 7
  seed 3 is in cluster 7
  seed 4 is in cluster 1
  chosed 283 saeki, 6816 non saeki in search
Expriment with random state =  86
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  9
  seed 0 is in cluster 2
  seed 1 is in cluster 2
  seed 2 is in cluster 1
  seed 3 is in cluster 1
  seed 4 is in cluster 5
  chosed 288 saeki, 6231 non saeki in search
Expriment with random state =  87
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  13
  seed 0 is in cluster 10
  seed 1 is in cluster 10
  seed 2 is in cluster 12
  seed 3 is in cluster 12
  seed 4 is in cluster 10
  chosed 234 saeki, 2813 non saeki in search
Expriment with random state =  88
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 12
  seed 1 is in cluster 12
  seed 2 is in cluster 13
  seed 3 is in cluster 13
  seed 4 is in cluster 12
  chosed 227 saeki, 2724 non saeki in search
Expriment with random state =  89
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  12
  seed 0 is in cluster 6
  seed 1 is in cluster 6
  seed 2 is in cluster 2
  seed 3 is in cluster 2
  seed 4 is in cluster 6
  chosed 204 saeki, 3934 non saeki in search
Expriment with random state =  90
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  12
  seed 0 is in cluster 6
  seed 1 is in cluster 6
  seed 2 is in cluster 7
  seed 3 is in cluster 7
  seed 4 is in cluster 6
  chosed 225 saeki, 2906 non saeki in search
Expriment with random state =  91
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  13
  seed 0 is in cluster 13
  seed 1 is in cluster 13
  seed 2 is in cluster 11
  seed 3 is in cluster 11
  seed 4 is in cluster 13
  chosed 224 saeki, 2865 non saeki in search
Expriment with random state =  92
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 11
  seed 3 is in cluster 11
  seed 4 is in cluster 1
  chosed 227 saeki, 2681 non saeki in search
Expriment with random state =  93
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  11
  seed 0 is in cluster 6
  seed 1 is in cluster 6
  seed 2 is in cluster 4
  seed 3 is in cluster 4
  seed 4 is in cluster 3
  chosed 293 saeki, 4939 non saeki in search
Expriment with random state =  94
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  14
  seed 0 is in cluster 6
  seed 1 is in cluster 6
  seed 2 is in cluster 1
  seed 3 is in cluster 1
  seed 4 is in cluster 6
  chosed 229 saeki, 2710 non saeki in search
Expriment with random state =  95
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  12
  seed 0 is in cluster 3
  seed 1 is in cluster 3
  seed 2 is in cluster 8
  seed 3 is in cluster 8
  seed 4 is in cluster 3
  chosed 238 saeki, 2838 non saeki in search
Expriment with random state =  96
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  11
  seed 0 is in cluster 4
  seed 1 is in cluster 4
  seed 2 is in cluster 5
  seed 3 is in cluster 5
  seed 4 is in cluster 4
  chosed 205 saeki, 3997 non saeki in search
Expriment with random state =  97
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  13
  seed 0 is in cluster 5
  seed 1 is in cluster 5
  seed 2 is in cluster 8
  seed 3 is in cluster 8
  seed 4 is in cluster 5
  chosed 229 saeki, 2804 non saeki in search
Expriment with random state =  98
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  11
  seed 0 is in cluster 1
  seed 1 is in cluster 1
  seed 2 is in cluster 3
  seed 3 is in cluster 3
  seed 4 is in cluster 1
  chosed 209 saeki, 4131 non saeki in search
Expriment with random state =  99
/Users/hanghu/Applications/miniconda3/envs/mse-pipe/lib/python3.12/site-packages/umap/umap_.py:1945: UserWarning: n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  Optimal number of clusters based on elbow method:  13
  seed 0 is in cluster 8
  seed 1 is in cluster 8
  seed 2 is in cluster 10
  seed 3 is in cluster 10
  seed 4 is in cluster 8
  chosed 235 saeki, 2817 non saeki in search
"""

In [None]:
parsed_results = []

splitted_result = raw.split('\n')
i = 1
while i + 9 < len(splitted_result):
    choosed_paper = splitted_result[i+9].split(' ')
    parsed_results.append({
        "optimal number of clusters": int(splitted_result[i+3].split(' ')[-1]),
        "seed number of clusters": len(set([splitted_result[j].split(' ')[-1] for j in range(i+4, i+9)])),
        "chosed saeki papers": int(choosed_paper[3]),
        "chosed non saeki papers": int(choosed_paper[5]),
    })
    i+=10

In [None]:
optimal = []
chosed_saeki = []
chosed_all = []

for result in parsed_results:
    optimal.append(result['optimal number of clusters'])
    chosed_saeki.append(result['chosed saeki papers'])
    chosed_all.append(result['chosed saeki papers'] + result['chosed non saeki papers'])

In [None]:
np.average(chosed_saeki), np.std(chosed_saeki)

In [None]:
np.average(chosed_all), np.std(chosed_all)

In [None]:
for randS in range(56, 100):
    print("Expriment with random state = ", randS)
    standard_embedding = umap.UMAP(random_state=randS).fit_transform(saeki_fullerene_opv)
    kmeans_kwargs = {
        "init": "random",
        "n_init": 10,
        "max_iter": 300,
        "random_state": randS,
    }

    sse = []
    for k in range(1, 41):
        clusterer = KMeans(n_clusters=k, **kmeans_kwargs)
        clusterer.fit(saeki_fullerene_opv)
        sse.append(clusterer.inertia_)

    ## Find the elbow point
    kl = KneeLocator(
        range(1, 41), sse, curve="convex", direction="decreasing"
    )
    print("  Optimal number of clusters based on elbow method: ", kl.elbow)
    opt_number_clusters.append(kl.elbow)

    kmeans_labels = cluster.KMeans(n_clusters=kl.elbow, **kmeans_kwargs).fit_predict(saeki_fullerene_opv)

    chosen_clusters = set()
    for i in range(0,5):
        chosen_clusters.add(kmeans_labels[i])
        print(f"  seed {i} is in cluster {kmeans_labels[i]+1}")

    targets = pd.DataFrame(saeki_fullerene_opv_targets)['group'].to_list()
    saeki_clusters = [0] * kl.elbow
    search_clusters = [0] * kl.elbow

    for i in range(5, len(targets)):
        group = kmeans_labels[i]
        if targets[i] == 3:
            saeki_clusters[group] += 1
        elif targets[i] == 0:
            search_clusters[group] += 1

    chosen_saeki.append(sum([saeki_clusters[i] for i in chosen_clusters]))
    chosen_search.append(sum([search_clusters[i] for i in chosen_clusters]))
    print(f"  chosed {chosen_saeki[-1]} saeki, {chosen_search[-1]} non saeki in search")