In [None]:
import pickle
import numpy as np

with open("../data/yelp/handled/pca64_itm_emb_np.pkl", "rb") as f:
    item_emb = pickle.load(f)

X = np.asarray(item_emb)
print("Yelp 数据 shape:", X.shape)

In [2]:
pip install hdbscan

Looking in indexes: http://mirrors.aliyun.com/pypi/simple
Collecting hdbscan
  Downloading http://mirrors.aliyun.com/pypi/packages/8a/d9/11564d3ebfe7429fb2e54356b07b2e44ac3dca668c47401d98170809a2f6/hdbscan-0.8.40-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.2/4.2 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: hdbscan
Successfully installed hdbscan-0.8.40
[0mNote: you may need to restart the kernel to use updated packages.


In [3]:
pip install umap

Looking in indexes: http://mirrors.aliyun.com/pypi/simple
Collecting umap
  Downloading http://mirrors.aliyun.com/pypi/packages/4b/46/08ab68936625400fe690684428d4db4764f49b406782cc133df1d0299d06/umap-0.1.1.tar.gz (3.2 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: umap
  Building wheel for umap (setup.py) ... [?25ldone
[?25h  Created wheel for umap: filename=umap-0.1.1-py3-none-any.whl size=3542 sha256=63c2202291aab9fd6b617191e5c8c6d210a36becb9afd83849ab135996a51dfb
  Stored in directory: /root/.cache/pip/wheels/41/3c/08/99aa427d28227bdb30d11c733a293eb811fcfee4bf112ef8b7
Successfully built umap
Installing collected packages: umap
Successfully installed umap-0.1.1
[0mNote: you may need to restart the kernel to use updated packages.


In [3]:
pip install umap-learn

Looking in indexes: http://mirrors.aliyun.com/pypi/simple
[0mNote: you may need to restart the kernel to use updated packages.


In [1]:
import pickle
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
import hdbscan
from itertools import product
from time import time
import umap
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
import pickle
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
import hdbscan
from itertools import product
from time import time
import umap
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# ================== 1. 读取数据 ==================
with open("data/yelp/handled/pca64_itm_emb_np.pkl", "rb") as f:  # ✅ 改成 yelp
    item_emb = pickle.load(f)  # shape: [n_items, dim]
X = np.asarray(item_emb)
X = StandardScaler().fit_transform(X)

print(f"Yelp 数据形状: {X.shape}")

# ================== 2. 参数网格 ==================
umap_grid = {
    "n_neighbors": [10, 30, 50],      # 样本数多，邻居范围适当加大
    "min_dist": [0.0, 0.1, 0.5],      # 控制稀疏度
    "n_components": [8, 16, 32],      # 更高维度也尝试
    "metric": ["euclidean"]
}

hdb_grid = {
    "min_cluster_size": [50, 100, 200],   # Yelp 数据量大，簇规模调大
    "min_samples": [5, 20, 50],
    "cluster_selection_epsilon": [0.0, 0.1],
    "metric": ["euclidean"]
}

umap_combos = list(product(*umap_grid.values()))
hdb_combos = list(product(*hdb_grid.values()))
total = len(umap_combos) * len(hdb_combos)

rows = []
idx = 0

# ================== 3. 遍历组合 ==================
for u_params in umap_combos:
    nn, md, nc, met_r = u_params

    reducer = umap.UMAP(
        n_neighbors=nn, min_dist=md, n_components=nc,
        metric=met_r, random_state=42
    )
    Z = reducer.fit_transform(X)   # 🔹 UMAP 降维

    for h_params in hdb_combos:
        mcs, ms, eps, met_c = h_params
        idx += 1
        t0 = time()

        clusterer = hdbscan.HDBSCAN(
            min_cluster_size=mcs, min_samples=ms,
            cluster_selection_epsilon=eps, metric=met_c
        )
        labels = clusterer.fit_predict(Z)

        # ---- 评估指标 ----
        noise_ratio = float(np.mean(labels == -1))
        mask = labels != -1
        if mask.sum() > 1 and len(np.unique(labels[mask])) >= 2:
            sil = float(silhouette_score(Z[mask], labels[mask]))
        else:
            sil = np.nan

        n_clusters = len(set(labels)) - (1 if -1 in labels else 0)

        print(f"[{idx}/{total}] UMAP(nn={nn}, md={md}, nc={nc}) + "
              f"HDBSCAN(mcs={mcs}, ms={ms}, eps={eps}) "
              f"=> Clusters={n_clusters}, Sil={sil:.4f}, Noise={noise_ratio:.3f}")

        rows.append({
            "nn": nn, "md": md, "nc": nc, "umap_metric": met_r,
            "mcs": mcs, "ms": ms, "eps": eps, "hdb_metric": met_c,
            "Clusters": n_clusters,
            "Silhouette": sil,
            "NoiseRatio": noise_ratio,
            "Score": (sil if not np.isnan(sil) else -1) - noise_ratio,
            "TimeSec": round(time() - t0, 3)
        })

# ================== 4. 保存 & 查看结果 ==================
df = pd.DataFrame(rows)
df_sorted = df.sort_values(by="Score", ascending=False)

print("\nTop 5 参数组合：")
print(df_sorted.head(5))

# ✅ 保存到 yelp 结果文件
df_sorted.to_csv("umap_hdbscan_results_yelp.csv", index=False)
print("\n完整结果已保存到 umap_hdbscan_results_yelp.csv")

Yelp 数据形状: (11383, 64)


  warn(


[1/486] UMAP(nn=10, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=65, Sil=0.6421, Noise=0.176
[2/486] UMAP(nn=10, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=65, Sil=0.6421, Noise=0.176
[3/486] UMAP(nn=10, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=59, Sil=0.6856, Noise=0.191
[4/486] UMAP(nn=10, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=59, Sil=0.6856, Noise=0.191
[5/486] UMAP(nn=10, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=55, Sil=0.7448, Noise=0.261
[6/486] UMAP(nn=10, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=55, Sil=0.7448, Noise=0.261
[7/486] UMAP(nn=10, md=0.0, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=32, Sil=0.6633, Noise=0.249
[8/486] UMAP(nn=10, md=0.0, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=32, Sil=0.6633, Noise=0.249
[9/486] UMAP(nn=10, md=0.0, nc=8) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=35, Sil=0.6555, Noise=0.239
[10/486] UMAP(nn=10, md=0.0, 

  warn(


[19/486] UMAP(nn=10, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=67, Sil=0.6652, Noise=0.166
[20/486] UMAP(nn=10, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=67, Sil=0.6652, Noise=0.166
[21/486] UMAP(nn=10, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=62, Sil=0.6915, Noise=0.191
[22/486] UMAP(nn=10, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=62, Sil=0.6915, Noise=0.191
[23/486] UMAP(nn=10, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=55, Sil=0.7461, Noise=0.259
[24/486] UMAP(nn=10, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=55, Sil=0.7461, Noise=0.259
[25/486] UMAP(nn=10, md=0.0, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=31, Sil=0.6591, Noise=0.219
[26/486] UMAP(nn=10, md=0.0, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=31, Sil=0.6591, Noise=0.219
[27/486] UMAP(nn=10, md=0.0, nc=16) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=30, Sil=0.6099, Noise=0.197
[28/486] UM

  warn(


[37/486] UMAP(nn=10, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=61, Sil=0.6851, Noise=0.174
[38/486] UMAP(nn=10, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=61, Sil=0.6851, Noise=0.174
[39/486] UMAP(nn=10, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=58, Sil=0.6981, Noise=0.195
[40/486] UMAP(nn=10, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=58, Sil=0.6981, Noise=0.195
[41/486] UMAP(nn=10, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=51, Sil=0.7528, Noise=0.263
[42/486] UMAP(nn=10, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=51, Sil=0.7528, Noise=0.263
[43/486] UMAP(nn=10, md=0.0, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=35, Sil=0.6703, Noise=0.225
[44/486] UMAP(nn=10, md=0.0, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=35, Sil=0.6703, Noise=0.225
[45/486] UMAP(nn=10, md=0.0, nc=32) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=35, Sil=0.6800, Noise=0.238
[46/486] UM

  warn(


[55/486] UMAP(nn=10, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=61, Sil=0.6381, Noise=0.187
[56/486] UMAP(nn=10, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=61, Sil=0.6381, Noise=0.187
[57/486] UMAP(nn=10, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=57, Sil=0.6613, Noise=0.209
[58/486] UMAP(nn=10, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=57, Sil=0.6613, Noise=0.209
[59/486] UMAP(nn=10, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=52, Sil=0.7200, Noise=0.286
[60/486] UMAP(nn=10, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=52, Sil=0.7200, Noise=0.286
[61/486] UMAP(nn=10, md=0.1, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=33, Sil=0.6168, Noise=0.222
[62/486] UMAP(nn=10, md=0.1, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=33, Sil=0.6168, Noise=0.222
[63/486] UMAP(nn=10, md=0.1, nc=8) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=31, Sil=0.6269, Noise=0.237
[64/486] UMAP(nn=10,

  warn(


[73/486] UMAP(nn=10, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=65, Sil=0.6342, Noise=0.209
[74/486] UMAP(nn=10, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=65, Sil=0.6342, Noise=0.209
[75/486] UMAP(nn=10, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=60, Sil=0.6711, Noise=0.215
[76/486] UMAP(nn=10, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=60, Sil=0.6711, Noise=0.215
[77/486] UMAP(nn=10, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=46, Sil=0.6796, Noise=0.265
[78/486] UMAP(nn=10, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=46, Sil=0.6796, Noise=0.265
[79/486] UMAP(nn=10, md=0.1, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=33, Sil=0.6131, Noise=0.230
[80/486] UMAP(nn=10, md=0.1, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=33, Sil=0.6131, Noise=0.230
[81/486] UMAP(nn=10, md=0.1, nc=16) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=32, Sil=0.6351, Noise=0.227
[82/486] UM

  warn(


[91/486] UMAP(nn=10, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=57, Sil=0.6530, Noise=0.209
[92/486] UMAP(nn=10, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=57, Sil=0.6530, Noise=0.209
[93/486] UMAP(nn=10, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=57, Sil=0.6741, Noise=0.226
[94/486] UMAP(nn=10, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=57, Sil=0.6741, Noise=0.226
[95/486] UMAP(nn=10, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=49, Sil=0.7117, Noise=0.278
[96/486] UMAP(nn=10, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=49, Sil=0.7117, Noise=0.278
[97/486] UMAP(nn=10, md=0.1, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=30, Sil=0.6204, Noise=0.218
[98/486] UMAP(nn=10, md=0.1, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=30, Sil=0.6204, Noise=0.218
[99/486] UMAP(nn=10, md=0.1, nc=32) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=32, Sil=0.6415, Noise=0.267
[100/486] U

  warn(


[109/486] UMAP(nn=10, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=56, Sil=0.4818, Noise=0.332
[110/486] UMAP(nn=10, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=56, Sil=0.4818, Noise=0.332
[111/486] UMAP(nn=10, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=50, Sil=0.5437, Noise=0.372
[112/486] UMAP(nn=10, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=50, Sil=0.5437, Noise=0.372
[113/486] UMAP(nn=10, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=32, Sil=0.6087, Noise=0.479
[114/486] UMAP(nn=10, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=32, Sil=0.6087, Noise=0.479
[115/486] UMAP(nn=10, md=0.5, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=2, Sil=0.3890, Noise=0.010
[116/486] UMAP(nn=10, md=0.5, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=2, Sil=0.3890, Noise=0.010
[117/486] UMAP(nn=10, md=0.5, nc=8) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=3, Sil=0.3078, Noise=0.029
[118/486] UMAP

  warn(


[127/486] UMAP(nn=10, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=51, Sil=0.4655, Noise=0.286
[128/486] UMAP(nn=10, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=51, Sil=0.4655, Noise=0.286
[129/486] UMAP(nn=10, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=48, Sil=0.5150, Noise=0.338
[130/486] UMAP(nn=10, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=48, Sil=0.5150, Noise=0.338
[131/486] UMAP(nn=10, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=31, Sil=0.5628, Noise=0.444
[132/486] UMAP(nn=10, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=31, Sil=0.5628, Noise=0.444
[133/486] UMAP(nn=10, md=0.5, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=5, Sil=0.2223, Noise=0.049
[134/486] UMAP(nn=10, md=0.5, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=5, Sil=0.2223, Noise=0.049
[135/486] UMAP(nn=10, md=0.5, nc=16) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=19, Sil=0.3405, Noise=0.298
[136

  warn(


[145/486] UMAP(nn=10, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=51, Sil=0.4615, Noise=0.312
[146/486] UMAP(nn=10, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=51, Sil=0.4615, Noise=0.312
[147/486] UMAP(nn=10, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=48, Sil=0.5315, Noise=0.369
[148/486] UMAP(nn=10, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=48, Sil=0.5315, Noise=0.369
[149/486] UMAP(nn=10, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=33, Sil=0.5786, Noise=0.459
[150/486] UMAP(nn=10, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=33, Sil=0.5786, Noise=0.459
[151/486] UMAP(nn=10, md=0.5, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=27, Sil=0.4721, Noise=0.382
[152/486] UMAP(nn=10, md=0.5, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=27, Sil=0.4721, Noise=0.382
[153/486] UMAP(nn=10, md=0.5, nc=32) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=2, Sil=0.3883, Noise=0.011
[15

  warn(


[163/486] UMAP(nn=30, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=62, Sil=0.6822, Noise=0.165
[164/486] UMAP(nn=30, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=61, Sil=0.6881, Noise=0.164
[165/486] UMAP(nn=30, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=64, Sil=0.7095, Noise=0.203
[166/486] UMAP(nn=30, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=64, Sil=0.7095, Noise=0.203
[167/486] UMAP(nn=30, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=56, Sil=0.7588, Noise=0.225
[168/486] UMAP(nn=30, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=56, Sil=0.7588, Noise=0.225
[169/486] UMAP(nn=30, md=0.0, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=35, Sil=0.6823, Noise=0.198
[170/486] UMAP(nn=30, md=0.0, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=35, Sil=0.6823, Noise=0.198
[171/486] UMAP(nn=30, md=0.0, nc=8) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=35, Sil=0.7055, Noise=0.220
[172/486] U

  warn(


[181/486] UMAP(nn=30, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=67, Sil=0.6782, Noise=0.179
[182/486] UMAP(nn=30, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=67, Sil=0.6782, Noise=0.179
[183/486] UMAP(nn=30, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=64, Sil=0.6976, Noise=0.166
[184/486] UMAP(nn=30, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=64, Sil=0.6976, Noise=0.166
[185/486] UMAP(nn=30, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=56, Sil=0.7551, Noise=0.230
[186/486] UMAP(nn=30, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=56, Sil=0.7551, Noise=0.230
[187/486] UMAP(nn=30, md=0.0, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=35, Sil=0.6618, Noise=0.192
[188/486] UMAP(nn=30, md=0.0, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=35, Sil=0.6618, Noise=0.192
[189/486] UMAP(nn=30, md=0.0, nc=16) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=35, Sil=0.7018, Noise=0.221
[1

  warn(


[199/486] UMAP(nn=30, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=64, Sil=0.6721, Noise=0.159
[200/486] UMAP(nn=30, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=64, Sil=0.6721, Noise=0.159
[201/486] UMAP(nn=30, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=62, Sil=0.7030, Noise=0.163
[202/486] UMAP(nn=30, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=62, Sil=0.7030, Noise=0.163
[203/486] UMAP(nn=30, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=53, Sil=0.7447, Noise=0.217
[204/486] UMAP(nn=30, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=53, Sil=0.7447, Noise=0.217
[205/486] UMAP(nn=30, md=0.0, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=35, Sil=0.6604, Noise=0.193
[206/486] UMAP(nn=30, md=0.0, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=35, Sil=0.6604, Noise=0.193
[207/486] UMAP(nn=30, md=0.0, nc=32) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=34, Sil=0.6862, Noise=0.197
[2

  warn(


[217/486] UMAP(nn=30, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=63, Sil=0.6393, Noise=0.186
[218/486] UMAP(nn=30, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=63, Sil=0.6393, Noise=0.186
[219/486] UMAP(nn=30, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=61, Sil=0.6852, Noise=0.204
[220/486] UMAP(nn=30, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=61, Sil=0.6852, Noise=0.204
[221/486] UMAP(nn=30, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=54, Sil=0.7266, Noise=0.277
[222/486] UMAP(nn=30, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=54, Sil=0.7266, Noise=0.277
[223/486] UMAP(nn=30, md=0.1, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=34, Sil=0.6217, Noise=0.218
[224/486] UMAP(nn=30, md=0.1, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=34, Sil=0.6217, Noise=0.218
[225/486] UMAP(nn=30, md=0.1, nc=8) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=33, Sil=0.6474, Noise=0.231
[226/486] U

  warn(


[235/486] UMAP(nn=30, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=60, Sil=0.6320, Noise=0.175
[236/486] UMAP(nn=30, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=60, Sil=0.6320, Noise=0.175
[237/486] UMAP(nn=30, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=60, Sil=0.6736, Noise=0.207
[238/486] UMAP(nn=30, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=60, Sil=0.6736, Noise=0.207
[239/486] UMAP(nn=30, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=45, Sil=0.6853, Noise=0.235
[240/486] UMAP(nn=30, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=45, Sil=0.6853, Noise=0.235
[241/486] UMAP(nn=30, md=0.1, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=34, Sil=0.6289, Noise=0.234
[242/486] UMAP(nn=30, md=0.1, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=34, Sil=0.6289, Noise=0.234
[243/486] UMAP(nn=30, md=0.1, nc=16) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=31, Sil=0.6336, Noise=0.202
[2

  warn(


[253/486] UMAP(nn=30, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=62, Sil=0.6583, Noise=0.198
[254/486] UMAP(nn=30, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=62, Sil=0.6583, Noise=0.198
[255/486] UMAP(nn=30, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=56, Sil=0.6686, Noise=0.190
[256/486] UMAP(nn=30, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=56, Sil=0.6686, Noise=0.190
[257/486] UMAP(nn=30, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=46, Sil=0.6789, Noise=0.219
[258/486] UMAP(nn=30, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=46, Sil=0.6789, Noise=0.219
[259/486] UMAP(nn=30, md=0.1, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=34, Sil=0.6223, Noise=0.211
[260/486] UMAP(nn=30, md=0.1, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=34, Sil=0.6223, Noise=0.211
[261/486] UMAP(nn=30, md=0.1, nc=32) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=31, Sil=0.6480, Noise=0.219
[2

  warn(


[271/486] UMAP(nn=30, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=52, Sil=0.5189, Noise=0.265
[272/486] UMAP(nn=30, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=52, Sil=0.5189, Noise=0.265
[273/486] UMAP(nn=30, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=46, Sil=0.5275, Noise=0.292
[274/486] UMAP(nn=30, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=46, Sil=0.5275, Noise=0.292
[275/486] UMAP(nn=30, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=37, Sil=0.6094, Noise=0.407
[276/486] UMAP(nn=30, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=37, Sil=0.6094, Noise=0.407
[277/486] UMAP(nn=30, md=0.5, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=29, Sil=0.4912, Noise=0.304
[278/486] UMAP(nn=30, md=0.5, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=29, Sil=0.4912, Noise=0.304
[279/486] UMAP(nn=30, md=0.5, nc=8) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=2, Sil=0.5593, Noise=0.000
[280/486] UM

  warn(


[289/486] UMAP(nn=30, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=55, Sil=0.4996, Noise=0.276
[290/486] UMAP(nn=30, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=55, Sil=0.4996, Noise=0.276
[291/486] UMAP(nn=30, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=46, Sil=0.5329, Noise=0.315
[292/486] UMAP(nn=30, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=46, Sil=0.5329, Noise=0.315
[293/486] UMAP(nn=30, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=34, Sil=0.5742, Noise=0.384
[294/486] UMAP(nn=30, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=34, Sil=0.5742, Noise=0.384
[295/486] UMAP(nn=30, md=0.5, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=27, Sil=0.4796, Noise=0.311
[296/486] UMAP(nn=30, md=0.5, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=27, Sil=0.4796, Noise=0.311
[297/486] UMAP(nn=30, md=0.5, nc=16) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=2, Sil=0.5796, Noise=0.003
[29

  warn(


[307/486] UMAP(nn=30, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=53, Sil=0.5103, Noise=0.258
[308/486] UMAP(nn=30, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=53, Sil=0.5103, Noise=0.258
[309/486] UMAP(nn=30, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=44, Sil=0.5309, Noise=0.296
[310/486] UMAP(nn=30, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=44, Sil=0.5309, Noise=0.296
[311/486] UMAP(nn=30, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=36, Sil=0.6140, Noise=0.398
[312/486] UMAP(nn=30, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=36, Sil=0.6140, Noise=0.398
[313/486] UMAP(nn=30, md=0.5, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=28, Sil=0.4562, Noise=0.283
[314/486] UMAP(nn=30, md=0.5, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=28, Sil=0.4562, Noise=0.283
[315/486] UMAP(nn=30, md=0.5, nc=32) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=2, Sil=0.5324, Noise=0.000
[31

  warn(


[325/486] UMAP(nn=50, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=71, Sil=0.6800, Noise=0.161
[326/486] UMAP(nn=50, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=70, Sil=0.6853, Noise=0.158
[327/486] UMAP(nn=50, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=65, Sil=0.6988, Noise=0.176
[328/486] UMAP(nn=50, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=65, Sil=0.6988, Noise=0.176
[329/486] UMAP(nn=50, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=58, Sil=0.7430, Noise=0.217
[330/486] UMAP(nn=50, md=0.0, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=58, Sil=0.7430, Noise=0.217
[331/486] UMAP(nn=50, md=0.0, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=36, Sil=0.6652, Noise=0.197
[332/486] UMAP(nn=50, md=0.0, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=36, Sil=0.6652, Noise=0.197
[333/486] UMAP(nn=50, md=0.0, nc=8) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=36, Sil=0.6760, Noise=0.199
[334/486] U

  warn(


[343/486] UMAP(nn=50, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=63, Sil=0.6745, Noise=0.162
[344/486] UMAP(nn=50, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=62, Sil=0.6788, Noise=0.160
[345/486] UMAP(nn=50, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=63, Sil=0.6881, Noise=0.172
[346/486] UMAP(nn=50, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=63, Sil=0.6881, Noise=0.172
[347/486] UMAP(nn=50, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=54, Sil=0.7398, Noise=0.195
[348/486] UMAP(nn=50, md=0.0, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=54, Sil=0.7398, Noise=0.195
[349/486] UMAP(nn=50, md=0.0, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=36, Sil=0.6690, Noise=0.205
[350/486] UMAP(nn=50, md=0.0, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=36, Sil=0.6690, Noise=0.205
[351/486] UMAP(nn=50, md=0.0, nc=16) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=35, Sil=0.6893, Noise=0.211
[3

  warn(


[361/486] UMAP(nn=50, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=67, Sil=0.6919, Noise=0.165
[362/486] UMAP(nn=50, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=66, Sil=0.6987, Noise=0.163
[363/486] UMAP(nn=50, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=60, Sil=0.7117, Noise=0.155
[364/486] UMAP(nn=50, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=60, Sil=0.7117, Noise=0.155
[365/486] UMAP(nn=50, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=51, Sil=0.7322, Noise=0.200
[366/486] UMAP(nn=50, md=0.0, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=51, Sil=0.7322, Noise=0.200
[367/486] UMAP(nn=50, md=0.0, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=33, Sil=0.6518, Noise=0.188
[368/486] UMAP(nn=50, md=0.0, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=33, Sil=0.6518, Noise=0.188
[369/486] UMAP(nn=50, md=0.0, nc=32) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=33, Sil=0.6851, Noise=0.195
[3

  warn(


[379/486] UMAP(nn=50, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=64, Sil=0.6340, Noise=0.174
[380/486] UMAP(nn=50, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=64, Sil=0.6340, Noise=0.174
[381/486] UMAP(nn=50, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=56, Sil=0.6546, Noise=0.176
[382/486] UMAP(nn=50, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=56, Sil=0.6546, Noise=0.176
[383/486] UMAP(nn=50, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=50, Sil=0.6911, Noise=0.213
[384/486] UMAP(nn=50, md=0.1, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=50, Sil=0.6911, Noise=0.213
[385/486] UMAP(nn=50, md=0.1, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=32, Sil=0.5970, Noise=0.191
[386/486] UMAP(nn=50, md=0.1, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=32, Sil=0.5970, Noise=0.191
[387/486] UMAP(nn=50, md=0.1, nc=8) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=31, Sil=0.6203, Noise=0.197
[388/486] U

  warn(


[397/486] UMAP(nn=50, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=55, Sil=0.6411, Noise=0.159
[398/486] UMAP(nn=50, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=55, Sil=0.6411, Noise=0.159
[399/486] UMAP(nn=50, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=55, Sil=0.6638, Noise=0.177
[400/486] UMAP(nn=50, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=55, Sil=0.6638, Noise=0.177
[401/486] UMAP(nn=50, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=47, Sil=0.6827, Noise=0.186
[402/486] UMAP(nn=50, md=0.1, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=47, Sil=0.6827, Noise=0.186
[403/486] UMAP(nn=50, md=0.1, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=34, Sil=0.6126, Noise=0.214
[404/486] UMAP(nn=50, md=0.1, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=34, Sil=0.6126, Noise=0.214
[405/486] UMAP(nn=50, md=0.1, nc=16) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=31, Sil=0.6488, Noise=0.226
[4

  warn(


[415/486] UMAP(nn=50, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=61, Sil=0.6464, Noise=0.180
[416/486] UMAP(nn=50, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=61, Sil=0.6464, Noise=0.180
[417/486] UMAP(nn=50, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=55, Sil=0.6819, Noise=0.195
[418/486] UMAP(nn=50, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=55, Sil=0.6819, Noise=0.195
[419/486] UMAP(nn=50, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=45, Sil=0.6760, Noise=0.205
[420/486] UMAP(nn=50, md=0.1, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=45, Sil=0.6760, Noise=0.205
[421/486] UMAP(nn=50, md=0.1, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=32, Sil=0.6105, Noise=0.196
[422/486] UMAP(nn=50, md=0.1, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=32, Sil=0.6105, Noise=0.196
[423/486] UMAP(nn=50, md=0.1, nc=32) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=30, Sil=0.6417, Noise=0.198
[4

  warn(


[433/486] UMAP(nn=50, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=54, Sil=0.4958, Noise=0.251
[434/486] UMAP(nn=50, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=54, Sil=0.4958, Noise=0.251
[435/486] UMAP(nn=50, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=45, Sil=0.5121, Noise=0.267
[436/486] UMAP(nn=50, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=45, Sil=0.5121, Noise=0.267
[437/486] UMAP(nn=50, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=36, Sil=0.5570, Noise=0.349
[438/486] UMAP(nn=50, md=0.5, nc=8) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=36, Sil=0.5570, Noise=0.349
[439/486] UMAP(nn=50, md=0.5, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=2, Sil=0.5732, Noise=0.000
[440/486] UMAP(nn=50, md=0.5, nc=8) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=2, Sil=0.5732, Noise=0.000
[441/486] UMAP(nn=50, md=0.5, nc=8) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=2, Sil=0.5732, Noise=0.000
[442/486] UMAP

  warn(


[451/486] UMAP(nn=50, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=50, Sil=0.5030, Noise=0.239
[452/486] UMAP(nn=50, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=50, Sil=0.5030, Noise=0.239
[453/486] UMAP(nn=50, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=2, Sil=0.5659, Noise=0.000
[454/486] UMAP(nn=50, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=2, Sil=0.5659, Noise=0.000
[455/486] UMAP(nn=50, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=2, Sil=0.5659, Noise=0.000
[456/486] UMAP(nn=50, md=0.5, nc=16) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=2, Sil=0.5659, Noise=0.000
[457/486] UMAP(nn=50, md=0.5, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=2, Sil=0.5659, Noise=0.000
[458/486] UMAP(nn=50, md=0.5, nc=16) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=2, Sil=0.5659, Noise=0.000
[459/486] UMAP(nn=50, md=0.5, nc=16) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=2, Sil=0.5659, Noise=0.000
[460/486]

  warn(


[469/486] UMAP(nn=50, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.0) => Clusters=56, Sil=0.5045, Noise=0.264
[470/486] UMAP(nn=50, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=5, eps=0.1) => Clusters=56, Sil=0.5045, Noise=0.264
[471/486] UMAP(nn=50, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.0) => Clusters=45, Sil=0.5112, Noise=0.289
[472/486] UMAP(nn=50, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=20, eps=0.1) => Clusters=45, Sil=0.5112, Noise=0.289
[473/486] UMAP(nn=50, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.0) => Clusters=2, Sil=0.5420, Noise=0.000
[474/486] UMAP(nn=50, md=0.5, nc=32) + HDBSCAN(mcs=50, ms=50, eps=0.1) => Clusters=2, Sil=0.5420, Noise=0.000
[475/486] UMAP(nn=50, md=0.5, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.0) => Clusters=2, Sil=0.5420, Noise=0.000
[476/486] UMAP(nn=50, md=0.5, nc=32) + HDBSCAN(mcs=100, ms=5, eps=0.1) => Clusters=2, Sil=0.5420, Noise=0.000
[477/486] UMAP(nn=50, md=0.5, nc=32) + HDBSCAN(mcs=100, ms=20, eps=0.0) => Clusters=3, Sil=0.3276, Noise=0.007
[478/48

In [1]:
import pandas as pd

# 读取结果
df = pd.read_csv("umap_hdbscan_results_yelp.csv")

# 筛选条件：簇数适中 + 噪声不过大
df_filtered = df[
    (df["Clusters"] >= 10) & 
    (df["Clusters"] <= 50) & 
    (df["NoiseRatio"] <= 0.3)
]

# 按 Score 排序，取 Top 10
df_top = df_filtered.sort_values(by="Score", ascending=False).head(10)

print("===== 符合条件的 Top 10 参数组合 =====")
print(df_top)

# 保存筛选结果
df_top.to_csv("umap_hdbscan_filtered.csv", index=False)
print("\n筛选后的结果已保存到 umap_hdbscan_filtered.csv")


===== 符合条件的 Top 10 参数组合 =====
    nn   md  nc umap_metric  mcs  ms  eps hdb_metric  Clusters  Silhouette  \
77  50  0.1  16   euclidean   50  50  0.1  euclidean        47    0.682731   
76  50  0.1  16   euclidean   50  50  0.0  euclidean        47    0.682731   
81  50  0.0   8   euclidean  100  50  0.1  euclidean        33    0.711992   
80  50  0.0   8   euclidean  100  50  0.0  euclidean        33    0.711992   
83  50  0.0  32   euclidean  100  20  0.1  euclidean        33    0.685103   
82  50  0.0  32   euclidean  100  20  0.0  euclidean        33    0.685103   
87  30  0.0  32   euclidean  100  20  0.1  euclidean        34    0.686189   
86  30  0.0  32   euclidean  100  20  0.0  euclidean        34    0.686189   
89  30  0.0  32   euclidean  100  50  0.0  euclidean        31    0.709584   
88  30  0.0  32   euclidean  100  50  0.1  euclidean        31    0.709584   

    NoiseRatio     Score  TimeSec  
77    0.185891  0.496840    6.604  
76    0.185891  0.496840    6.608  
81 

In [2]:
import pickle
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
import hdbscan
from itertools import product
from time import time
import umap
import warnings

In [2]:
# 1. Yelp最优参数（从Top10中选择综合评分最高的组合，如第81行：nn=50, nc=8, mcs=100, ms=50, eps=0.1）
best_umap_params = {
    "n_neighbors": 30,        # Yelp最优nn=50（原Fashion=20）
    "min_dist": 0.0,          # Yelp最优md=0.0（原Fashion=0.0）
    "n_components": 8,        # Yelp最优nc=8（原Fashion=8，可保持一致）
    "metric": "euclidean"     # 距离度量与Yelp一致
}
best_hdb_params = {
    "min_cluster_size": 50,  # Yelp最优mcs=100（原Fashion=50）
    "min_samples": 50,        # Yelp最优ms=50（原Fashion=10，显著增大）
    "cluster_selection_epsilon": 0.0,  # Yelp最优eps=0.1（原Fashion=0.0）
    "metric": "euclidean"     # 距离度量与Yelp一致
}

# 2. 加载Yelp的PCA64嵌入（路径需修改为Yelp数据集路径）
# X_pca = pickle.load(open("../data/fashion/handled/pca64_itm_emb_np.pkl", "rb"))
X_pca = pickle.load(open("../data/yelp/handled/pca64_itm_emb_np.pkl", "rb"))  # 改为Yelp路径
X_scaled = StandardScaler().fit_transform(X_pca)

# 3. UMAP降维（适配Yelp的高维语义结构，n_neighbors增大至50）
best_reducer = umap.UMAP(**best_umap_params, random_state=42)
Z_best = best_reducer.fit_transform(X_scaled)

# 4. HDBSCAN聚类（关键修改：增大min_cluster_size和min_samples，减少噪声点）
best_clusterer = hdbscan.HDBSCAN(**best_hdb_params)
labels_best = best_clusterer.fit_predict(Z_best)
probs_best = best_clusterer.probabilities_

# 5. 验证Yelp聚类结果（调整噪声比例阈值，Yelp允许更高噪声）
noise_ratio = np.mean(labels_best == -1)
print(f"Yelp聚类：簇数量={len(set(labels_best))-(1 if -1 in labels_best else 0)}, "
      f"噪声比例={noise_ratio:.3f}（Yelp场景可放宽至≤0.3）")

AttributeError: module 'umap' has no attribute 'UMAP'

In [None]:
import umap.umap_ as umap

In [2]:
# ================== 1. （Yelp专属）用Yelp最优参数生成聚类结果 ==================
# 加载Yelp的PCA64嵌入（路径替换为Yelp数据集路径）
X_pca = pickle.load(open("../data/yelp/handled/pca64_itm_emb_np.pkl", "rb"))  # 改Yelp路径
X_scaled = StandardScaler().fit_transform(X_pca)  # 标准化（Yelp数据维度更高，标准化更关键）

# Yelp最优UMAP降维参数（从Top10选最优：nn=50, md=0.0, nc=8，适配Yelp稀疏语义）
best_umap_params = {"n_neighbors":50, "min_dist":0.0, "n_components":8, "metric":"euclidean"}  # 改Yelp参数
best_reducer = umap.UMAP(**best_umap_params, random_state=42)
Z_best = best_reducer.fit_transform(X_scaled)

# Yelp最优HDBSCAN聚类参数（从Top10选最优：mcs=100, ms=50, eps=0.1，减少Yelp噪声）
best_hdb_params = {"min_cluster_size":100, "min_samples":50, "cluster_selection_epsilon":0.1, "metric":"euclidean"}  # 改Yelp参数
best_clusterer = hdbscan.HDBSCAN(**best_hdb_params)
labels_best = best_clusterer.fit_predict(Z_best)  # 簇标签（含噪声-1）
probs_best = best_clusterer.probabilities_        # 核心点隶属度（Yelp噪声多，需用此过滤）


# ================== 2. （Yelp适配）计算加权簇中心 cluster_centers_final ==================
# 步骤1：筛选有效簇（排除噪声标签-1，Yelp噪声比例更高，需严格过滤）
valid_cluster_ids = np.unique(labels_best[labels_best != -1])  # 所有非噪声的簇ID
cluster_centers_final = []

# 步骤2：对每个有效簇，按核心隶属度加权计算中心（Yelp物品多，增加“最小核心点数量”过滤小簇）
min_core_points = 30  # Yelp专属：过滤小于30个核心点的小簇（避免簇中心不稳定）
for cid in valid_cluster_ids:
    cluster_mask = labels_best == cid  # 该簇的物品掩码
    cluster_core_mask = cluster_mask & (probs_best > 0)  # 仅保留核心点（Yelp非核心点噪声多）
    cluster_embeddings = X_pca[cluster_core_mask]  # 用核心点嵌入计算中心（更稳定）
    cluster_probs = probs_best[cluster_core_mask]  # 核心点的隶属度（权重）
    
    # 过滤过小的簇（Yelp避免小簇干扰）
    if len(cluster_embeddings) < min_core_points:
        continue
    
    # 加权平均计算簇中心（与Fashion逻辑一致，但用核心点优化）
    weighted_center = np.average(cluster_embeddings, axis=0, weights=cluster_probs)
    cluster_centers_final.append(weighted_center)

# 转为numpy数组（Yelp簇数量通常比Fashion多，如33簇左右）
cluster_centers_final = np.array(cluster_centers_final)
print(f"Yelp有效簇数量：{len(cluster_centers_final)}（已过滤小簇和噪声）")


# ================== 3. （Yelp优化）计算模糊隶属度向量 fuzzy_U_final ==================
def compute_fuzzy_membership_yelp(item_emb, cluster_labels, cluster_probs, cluster_centers, fuzzy_m=2.5):
    """Yelp专属模糊隶属度计算：调整模糊指数、增强噪声点鲁棒性"""
    N = len(item_emb)
    C = len(cluster_centers)
    fuzzy_U = np.zeros((N, C))  # N=Yelp物品数（远大于Fashion的4722），C=有效簇数
    
    # 1. 处理非噪声点（Yelp核心点比例低，用更高模糊指数保留多簇关联）
    non_noise_mask = cluster_labels != -1
    valid_cluster_ids = np.unique(cluster_labels[non_noise_mask])
    for i in np.where(non_noise_mask)[0]:
        cid = cluster_labels[i]
        # 匹配簇ID到索引（Yelp簇ID可能不连续，需精准映射）
        cid_idx = np.where(valid_cluster_ids == cid)[0]
        if len(cid_idx) == 0:  # 排除已过滤的小簇
            continue
        cid_idx = cid_idx[0]
        
        main_prob = cluster_probs[i]
        # 计算到各簇中心的距离（Yelp用L2距离，增加距离衰减系数）
        dists = np.linalg.norm(item_emb[i] - cluster_centers, axis=1)
        dists[dists < 1e-8] = 1e-8  # 避免除零
        inv_dists = 1 / (dists ** 1.2)  # Yelp专属：距离衰减更明显，减少远簇干扰
        
        # 分配隶属度并归一化
        fuzzy_U[i, cid_idx] = main_prob * inv_dists[cid_idx]
        for k in range(C):
            if k != cid_idx:
                fuzzy_U[i, k] = (1 - main_prob) * inv_dists[k]
        fuzzy_U[i] /= np.sum(fuzzy_U[i])
    
    # 2. 处理噪声点（Yelp噪声多，用更平滑的模糊分配）
    noise_mask = cluster_labels == -1
    for i in np.where(noise_mask)[0]:
        dists = np.linalg.norm(item_emb[i] - cluster_centers, axis=1)
        dists[dists < 1e-8] = 1e-8
        inv_dists = 1 / (dists ** 1.2)
        # 模糊指数从2→2.5（Yelp专属：减少噪声点对单一簇的依赖）
        fuzzy_U[i] = inv_dists ** (2 / (fuzzy_m - 1))
        fuzzy_U[i] /= np.sum(fuzzy_U[i])
    
    return fuzzy_U

# 调用Yelp专属函数计算模糊隶属度（用PCA64嵌入，匹配双视图建模）
fuzzy_U_final = compute_fuzzy_membership_yelp(
    item_emb=X_pca,
    cluster_labels=labels_best,
    cluster_probs=probs_best,
    cluster_centers=cluster_centers_final,
    fuzzy_m=1.8  # Yelp专属模糊指数（比Fashion高，增强鲁棒性）
)


# ================== 4. （Yelp路径）保存文件 ==================
save_dir = "../data/yelp/handled/"  # 改Yelp保存路径
# 保存Yelp聚类结果（文件名与Fashion一致，便于后续模型调用）
pickle.dump(labels_best, open(f"{save_dir}/hdbscan_best_labels.pkl", "wb"))
pickle.dump(cluster_centers_final, open(f"{save_dir}/hdbscan_cluster_centers.pkl", "wb"))
pickle.dump(fuzzy_U_final, open(f"{save_dir}/hdbscan_fuzzy_U.pkl", "wb"))
pickle.dump(probs_best, open(f"{save_dir}/hdbscan_core_probs.pkl", "wb"))

# 额外输出Yelp关键指标（验证结果合理性）
noise_ratio = np.mean(labels_best == -1)
print(f"Yelp聚类结果验证：")
print(f"- 噪声比例：{noise_ratio:.3f}（Yelp允许≤0.3，符合要求）")
print(f"- 模糊隶属度矩阵形状：{fuzzy_U_final.shape}（物品数×簇数，匹配Yelp数据规模）")
print(f"Yelp所有文件保存成功！")

AttributeError: module 'umap' has no attribute 'UMAP'

In [6]:
# 验证变量是否存在且格式正确
print(f"cluster_centers_final 形状: {cluster_centers_final.shape}")  # 预期 (27, 64)（27个簇，64维嵌入）
print(f"fuzzy_U_final 形状: {fuzzy_U_final.shape}")                  # 预期 (4722, 27)（4722个物品，27个簇）
print(f"labels_best 形状: {labels_best.shape}")                      # 预期 (4722,)

cluster_centers_final 形状: (56, 64)
fuzzy_U_final 形状: (11383, 56)
labels_best 形状: (11383,)
