爬取动漫排行榜上所有subject号

In [None]:
import requests
from bs4 import BeautifulSoup
import re
import time

url='https://bangumi.tv/anime/browser'
headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7,ja;q=0.6,ru;q=0.5",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
}

page_range=list(range(1,406))
pattern = re.compile(r"^/subject/\d+$")

code_list=[]

for page in page_range:
    params = {
        'sort':'rank',
        'page':page
    }

    resp = requests.get(url, headers=headers, params=params)
    resp.encoding = "utf-8"
    soup = BeautifulSoup(resp.text, "html.parser")

    # 本页所有 a.l
    items = soup.select("a.l")

    valid_links = []

    for a in items:
        link = a.get("href", "")
        if pattern.fullmatch(link):       # 正则匹配 /subject/数字
            valid_links.append(link)
            code_list.append(link)

    print(f"page {page}: {len(valid_links)} items")
    time.sleep(1)   # polite delay

print(len(code_list))
print(code_list[:20])


In [None]:
cleaned_code_list=[code[9:] for code in code_list]
cleaned_code_list

使用Bangumi官方api获取标签信息

In [None]:
import requests
import pandas as pd
import time

name_cn_list = []
tag_dict_list = []

for code in cleaned_code_list:
    url = f"https://api.bgm.tv/v0/subjects/{code}"
    headers = {
        "Accept": "application/json",
        "User-Agent": "Yuri Anime Classification project (sjysjy222@126.com)"
    }

    try:
        response = requests.get(url=url, headers=headers)
        response.raise_for_status()  # 请求失败会抛异常
        data = response.json()
        
        # 如果没有 name_cn，就用 name
        name_cn = data.get('name_cn') or data.get('name') or ''
        tags = data.get('tags', [])
        
    except Exception as e:
        print(f"{code} 获取失败: {e}")
        name_cn = ''
        tags = []
    
    name_cn_list.append(name_cn)
    tag_dict_list.append(tags)
    
    print(name_cn)
    print(tags)
    time.sleep(1)  # 避免请求过快

# 保存到 DataFrame
df_anime_tag = pd.DataFrame({
    'name_cn': name_cn_list,
    'tags': tag_dict_list
})

print(df_anime_tag)

# 导出 Excel
df_anime_tag.to_excel(r'D:\python-for-excel\bangumi百合番分类\anime_tag.xlsx', index=False)
print('已导出！')


将字典展开为数据矩阵

对标记人数进行ln(1+x)转化以压缩尺度

In [None]:
import numpy as np
import pandas as pd

# 清洗
df_anime_tag_cleaned = df_anime_tag.dropna(how='any')
df_anime_tag_cleaned = df_anime_tag_cleaned[
    df_anime_tag_cleaned['tags'].apply(lambda x: isinstance(x, list) and len(x) > 0)
]

n = len(df_anime_tag_cleaned)

# 第一步：先收集所有 tag 名字（一次性）
all_tags = set()
for tags in df_anime_tag_cleaned['tags']:
    for tag in tags:
        all_tags.add(tag['name'])

# 第二步：构建一个 dict，每个 tag 对应一个 float 列 (初始化 0.0)
wide_dict = {tag: np.zeros(n, dtype=float) for tag in all_tags}

# 第三步：填充值
for i, tags in enumerate(df_anime_tag_cleaned['tags']):
    for tag in tags:
        tag_name = tag['name']
        tag_count = tag['count']
        wide_dict[tag_name][i] = np.log1p(tag_count)

# 第四步：构建 DataFrame（一次性，无碎片）
df_anime_tag_wide = pd.DataFrame(wide_dict)

# 加上 name_cn
df_anime_tag_wide.insert(0, 'name_cn', df_anime_tag_cleaned['name_cn'].values)

print(df_anime_tag_wide)


In [None]:
!pip install umap-learn
!pip install pypinyin

读表

In [None]:
import pandas as pd

#df=pd.read_csv(r'D:\python-for-excel\bangumi_anime_class\anime_tag_wide.csv')
df=df_anime_tag_wide

display(df)

df_yuri=df[
    (df['百合'] > 0)
    ]

cols=[
    col for col in df_yuri.columns if df_yuri[col].sum() != 0
    ]

df_yuri=df_yuri[cols]

display(df_yuri)

全动画UMAP降维与Kmeans聚类

<i>调参</i>

In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import umap

# ---- 数据 ----
df_copy = df.copy()
names = df_copy['name_cn'].values
X = df_copy.drop(columns=['name_cn']).values

# ---- 2. UMAP 降到2维 ----
umap_model = umap.UMAP(
    n_components=2,
    n_neighbors=20,
    min_dist=0.1,
    metric='euclidean',
    random_state=42
)
X_umap = umap_model.fit_transform(X)

# ---- 3. 用 Silhouette Score 自动选 k ----
Ks = range(2, 21)
sil_scores = []

for k in Ks:
    km = KMeans(n_clusters=k, random_state=42, n_init=10)
    labels = km.fit_predict(X_umap)
    score = silhouette_score(X_umap, labels)
    sil_scores.append(score)

# ---- 4. 选最大 Silhouette 对应的 k ----
best_k = Ks[np.argmax(sil_scores)]
print(f"最大Silhouette Score为 {max(sil_scores)}, 对应的 k = {best_k}")

# ---- 5. 最终 KMeans 聚类 ----
km_final = KMeans(n_clusters=best_k, random_state=42, n_init=10)
labels = km_final.fit_predict(X_umap)
df_copy['cluster'] = labels

# ---- 6. 输出结果 ----
df_umap = pd.DataFrame(X_umap, columns=['UMAP1', 'UMAP2'])
df_umap['name_cn'] = names
df_umap['cluster'] = labels

print("UMAP 坐标 + 聚类标签已生成，可直接用于可视化。")


<i>可视化，预计时间10分钟</i>

In [None]:
import pandas as pd
import numpy as np
import umap
import plotly.graph_objects as go
import plotly.express as px
import re
from pypinyin import lazy_pinyin
from sklearn.cluster import KMeans

#质心计算
centroids = []
for l in np.unique(labels):
    idx = np.where(labels == l)[0]
    # 取 UMAP 坐标的平均值作为 centroid
    centroid = df_umap.loc[idx, ['UMAP1', 'UMAP2']].mean().values
    centroids.append(centroid)

centroids = np.array(centroids)

# ==================== Hover tags ====================
top_tags = df.drop(columns=['name_cn']).columns.tolist()

hover_text = []
for i,row in df.iterrows():
    active_tags = [t for t in top_tags if row[t] > 0]
    hover_text.append(f"{row['name_cn']}<br>Tags: {', '.join(active_tags[:10])}")

df_umap['hover'] = hover_text

#centroid 文本
centroid_hover = []
for k in range(len(centroids)):
    cluster_idx = np.where(labels == k)[0]       # cluster k 的动画索引
    cluster_tags_sum = df.iloc[cluster_idx][top_tags].sum()
    top_tags_k = cluster_tags_sum.nlargest(3).index.tolist()
    n_items = len(cluster_idx)  # cluster 中动画数量
    centroid_hover.append(f"群 {k} ({n_items} 部动画, Top tags: {', '.join(top_tags_k)})")

# ==================== 距离函数（最近十部） ====================

def find_closest(anime_name, X, names, k=10):
    idx = np.where(names == anime_name)[0][0]
    target = X[idx]

    dists = np.linalg.norm(X - target, axis=1)
    dists[idx] = np.inf

    nearest = np.argsort(dists)[:k]
    return nearest.tolist()


# ==================== Figure ====================
fig = go.Figure()

cluster_colors = px.colors.qualitative.T10  # 10种颜色，如果聚类>10可以用 D3, Prism 等
group_colors = [cluster_colors[l % len(cluster_colors)] for l in labels]# kmeans 聚类颜色

fig.add_trace(
    go.Scattergl(
        x=df_umap['UMAP1'],
        y=df_umap['UMAP2'],
        mode='markers',
        marker=dict(size=6, color=group_colors.copy(), opacity=0.7),
        text=df_umap['hover'],
        hoverinfo='text',
        name='Anime'
    )
)

# 添加 centroid trace
fig.add_trace(
    go.Scattergl(
        x=centroids[:,0],
        y=centroids[:,1],
        mode='markers',
        marker=dict(size=12, color='yellow', symbol='star'),
        text=centroid_hover,
        hoverinfo='text',
        name='Centroids'
    )
)

# ==================== Dropdown ====================
def sort_key(tag):
    # 判断是否中文
    if re.match(r'^[\u4e00-\u9fff]+$', tag):
        # 中文按拼音排序
        pinyin = ''.join(lazy_pinyin(tag))
        return (0, pinyin)
    else:
        # 非中文排后，按原始 Unicode 排序
        return (1, tag)


# 计算每部动画的标签总分
df_copy['tag_sum'] = df_copy.drop(columns=['name_cn']).sum(axis=1)
# 按 tag 总分排序，取前 50 部动画
top50_anime = df_copy.nlargest(50, 'tag_sum')['name_cn'].values
sorted_names = sorted(top50_anime,key=sort_key)
buttons = []

group_colors_copy = [group_colors] #for 'no' button 
#plotly button only accept [['abc','cde']]format

# 0. 空选项：全部默认
buttons.append(
    dict(
        label='(No Anime)',
        method='restyle',
        args=[{
            'marker.opacity': [0.7] * len(names),
            'marker.color': group_colors_copy,
            'marker.size': [6] * len(names),
        },[0]]
    )
)

# 1. 每个动画
for anime_name in sorted_names:
    i = np.where(names == anime_name)[0][0]

    opacity = [0.15] * len(names)
    color = ['blue'] * len(names)
    size = [6] * len(names)

    # ---- 选中的动画：红色 ----
    opacity[i] = 1.0
    color[i] = 'red'
    size[i] = 22

    # ---- 最近五部动画：橙色 ----
    nearest = find_closest(anime_name, X_umap, names, k=10)
    for j in nearest:
        opacity[j] = 0.9
        color[j] = 'orange'
        size[j] = 16

    # 添加按钮
    buttons.append(
        dict(
            label=anime_name,
            method='restyle',
            args=[{
                'marker.opacity': [opacity],
                'marker.color': [color],
                'marker.size': [size]
            },[0]]
        )
    )


# -------------------- 标签 Dropdown --------------------

tag_counts = df[top_tags].sum().sort_values(ascending=False)
top_tags_limited = tag_counts.head(70).index.tolist() #取最活跃的前70tags
sorted_tags = sorted(top_tags_limited,key=sort_key)

tag_buttons = []

# 第一项：不选标签
tag_buttons.append(
    dict(
        label="(All tags)",
        method="restyle",
        args=[{
            'marker.opacity': [0.7]*len(names),
            'marker.color': group_colors_copy,
            'marker.size': [6]*len(names)
        },[0]]
    )
)

# 每一个标签都做一个按钮
for tag in sorted_tags:
    mask = df[tag] > 0  # 该标签的动漫
    opacity = [0.15]*len(names)
    color = ['blue'] * len(names)
    size = [6]*len(names)

    # 有该标签的动画高亮
    for i, has_tag in enumerate(mask):
        if has_tag:
            opacity[i] = 1.0
            color[i] = 'red'     # 高亮色
            size[i] = 14         # 大一点

    tag_buttons.append(
        dict(
            label=tag,
            method="restyle",
            args=[{
                'marker.opacity':[opacity],
                'marker.color':[color],
                'marker.size':[size]
            },[0]]
        )
    )

tag_2_buttons=[]

tag_2_buttons.append(
    dict(
        label="(All tags)",
        method="restyle",
        args=[{
            'marker.symbol': ['circle'] * len(names)  # ➜ 恢复为全 circle
        }, [0]]
    )
)

for tag in sorted_tags:
    mask = df[tag] > 0  # 该标签的动漫
    shape = ['circle'] * len(names) 

    # 有该标签的动画变方
    for i, has_tag in enumerate(mask):
        if has_tag:
            shape[i] = 'square'

    tag_2_buttons.append(
        dict(
            label=tag,
            method="restyle",
            args=[{
                'marker.symbol': [shape]
            },[0]]
        )
    )

fig.add_annotation(
    x=0, y=1,  # 相对坐标 (0~1)
    xref='paper', yref='paper',
    text="数据来自https://bangumi.tv/anime。"\
    "基于UMAP算法与标签标记数的动画聚类图。" \
    "使用Anime下拉菜单可选择具体动画及其相邻的10部动画。" \
    "使用Tag下拉菜单可选择具体动画标签。"\
    "使用Tag2下拉菜单可选择另一个动画标签，显示为方形。"\
    "建议使用PC端查看。",
    showarrow=False,
    font=dict(size=9, color="black"),
    align='left'
)

# ==================== Layout ====================
fig.update_layout(
    updatemenus=[dict(
        buttons=buttons,
        direction="down",
        showactive=True,
        xanchor='right',
        yanchor='top',
        x=1,
        y=1,
        pad=dict(t=10, r=10),
        font=dict(size=12),
    ), dict(   # 第二个：标签 dropdown
        buttons=tag_buttons,
        direction="down",
        xanchor='right',
        yanchor='top',
        x=1, y=0.85
    ),dict( #第三个： 标签 2 dropdown
        buttons=tag_2_buttons,
        direction="down",
        xanchor='right',
        yanchor='top',
        x=1, y=0.70 
    )],
    
    title='全动画聚类图',
    xaxis_title='UMAP1',
    yaxis_title='UMAP2',
    autosize=True,
    font=dict(family="Microsoft YaHei, SimHei, Arial, sans-serif")
)

# ==================== Export HTML ====================
html_path = r'D:\python-for-excel\bangumi_anime_class\output\anime_umap_cluster_ver2.html'
fig.write_html(html_path, include_plotlyjs='cdn', full_html=True)
print("已导出 全部动画 UMAP 降维聚类图")


百合动画UMAP降维与Kmeans聚类

（由于聚类困难，无东方动画）

<i>调参</i>

In [None]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import umap

# ---- 数据 ----
df_copy = df_yuri.copy()

drop_name=['东方活动写真馆', '东方梦想夏乡', '幻想万华镜 春雪异变之章', '第四届东方M-1漫才大赛', '第三届东方M-1漫才大赛', '第六届东方M-1漫才大赛', '幻想万华镜 华鸟风月', 
           '第一届东方M-1漫才大赛 重制版', '幻想万华镜 红雾异变之章', '第八届东方M-1漫才大赛', '第七届东方M-1漫才大赛', 
           '第五届东方M-1漫才大赛', '第一届东方M-1漫才大赛', '第十一届东方M-1漫才大赛', 
           '东方PVD2', '第九届东方M-1漫才大赛', '东方PVD', '第十二届东方M-1漫才大赛', '第二届东方M-1漫才大赛 重制版', '第十届东方M-1漫才大赛', 
           '第二届东方M-1漫才大赛', '幻想万华镜 泡沫、哀伤的桃源', '幻想万华镜 月于丛云花于风', '秘封活动记录 ～ The Sealed Esoteric History.', 
           '幻想万华镜 永夜异变之章', '幻想万华镜 花之异变之章', '幻想万华镜 巨大妖怪传说之章', '幻想万华镜 试胆大会之章', '幻想万华镜 妖怪之山决战之章'] 
#东方的标签特征过于特殊，留着基本没法聚类了

df_copy = df_copy[~df_copy['name_cn'].isin(drop_name)]

names = df_copy['name_cn'].values
X = df_copy.drop(columns=['name_cn']).values

# ---- 2. UMAP 降到2维 ----
umap_model = umap.UMAP(
    n_components=2,
    n_neighbors=20,
    min_dist=0.1,
    metric='euclidean',
    random_state=42
)
X_umap = umap_model.fit_transform(X)

# ---- 3. 用 Silhouette Score 自动选 k ----
Ks = range(2, 21)
sil_scores = []

for k in Ks:
    km = KMeans(n_clusters=k, random_state=42, n_init=10)
    labels = km.fit_predict(X_umap)
    score = silhouette_score(X_umap, labels)
    sil_scores.append(score)

# ---- 4. 选最大 Silhouette 对应的 k ----
best_k = Ks[np.argmax(sil_scores)]
print(f"最大Silhouette Score为 {max(sil_scores)}, 对应的 k = {best_k}")

# ---- 5. 最终 KMeans 聚类 ----
km_final = KMeans(n_clusters=best_k, random_state=42, n_init=10)
labels = km_final.fit_predict(X_umap)
df_copy['cluster'] = labels

# ---- 6. 输出结果 ----
df_umap = pd.DataFrame(X_umap, columns=['UMAP1', 'UMAP2'])
df_umap['name_cn'] = names
df_umap['cluster'] = labels

print("UMAP 坐标 + 聚类标签已生成，可直接用于可视化。")


<i>可视化</i>

In [None]:
import pandas as pd
import numpy as np
import umap
import plotly.graph_objects as go
import plotly.express as px
import re
from pypinyin import lazy_pinyin
from sklearn.cluster import KMeans
import matplotlib.cm as cm
import matplotlib.colors as mcolors

names=df_copy['name_cn'].values

#质心计算
centroids = []
for l in np.unique(labels):
    idx = np.where(labels == l)[0]
    # 取 UMAP 坐标的平均值作为 centroid
    centroid = df_umap.loc[idx, ['UMAP1', 'UMAP2']].mean().values
    centroids.append(centroid)

centroids = np.array(centroids)

# ==================== Hover tags ====================
top_tags = [c for c in df_copy.columns if c not in ['name_cn', 'cluster', '百合']]

hover_text = []
for i,row in df_copy.iterrows(): #notice here: text length
    active_tags = [t for t in top_tags if row[t] > 0]
    hover_text.append(f"{row['name_cn']}<br>Tags: {', '.join(active_tags[:10])}")

df_umap['hover'] = hover_text

#centroid 文本
centroid_hover = []
for k in range(len(centroids)):
    cluster_idx = np.where(labels == k)[0]       # cluster k 的动画索引
    cluster_tags_sum = df_copy.iloc[cluster_idx][top_tags].sum()
    top_tags_k = cluster_tags_sum.nlargest(5).index.tolist()        # 出现/分数最高的标签
    n_items = len(cluster_idx)  # cluster 中动画数量
    centroid_hover.append(f"群 {k} ({n_items} 部动画, Top tags: {', '.join(top_tags_k)})")

# ==================== 距离函数（最近十部） ====================

def find_closest(anime_name, X, names, k=10):
    idx = np.where(names == anime_name)[0][0]
    target = X[idx]

    dists = np.linalg.norm(X - target, axis=1)
    dists[idx] = np.inf

    nearest = np.argsort(dists)[:k]
    return nearest.tolist()


# ==================== Figure ====================
fig = go.Figure()

n_clusters = len(np.unique(labels))
cmap = cm.get_cmap('viridis', n_clusters)  # 'viridis' 或其他连续色板

# 转换为 plotly 可用的 rgb 字符串
group_colors = [
    f'rgb({int(r*255)},{int(g*255)},{int(b*255)})'
    for r, g, b, _ in cmap(range(n_clusters))
]
group_colors = [group_colors[l] for l in labels]  # 对应每个动画的 cluster

fig.add_trace(
    go.Scattergl(
        x=df_umap['UMAP1'],
        y=df_umap['UMAP2'],
        mode='markers',
        marker=dict(size=8, color=group_colors.copy(), opacity=0.8),
        text=df_umap['hover'],
        hoverinfo='text',
        name='Anime'
    )
)

# 添加 centroid trace
fig.add_trace(
    go.Scattergl(
        x=centroids[:,0],
        y=centroids[:,1],
        mode='markers',
        marker=dict(size=12, color='darkblue', symbol='star'),
        text=centroid_hover,
        hoverinfo='text',
        name='Centroids'
    )
)

# ==================== Dropdown ====================
def sort_key(tag):
    # 判断是否中文
    if re.match(r'^[\u4e00-\u9fff]+$', tag):
        # 中文按拼音排序
        pinyin = ''.join(lazy_pinyin(tag))
        return (0, pinyin)
    else:
        # 非中文排后，按原始 Unicode 排序
        return (1, tag)


# 计算每部动画的标签总分
sorted_names = sorted(names,key=sort_key)
buttons = []

group_colors_copy = [group_colors] #for 'no' button 
#plotly button only accept [['abc','cde']]format

# 0. 空选项：全部默认
buttons.append(
    dict(
        label='(No Anime)',
        method='restyle',
        args=[{
            'marker.opacity': [0.7] * len(names),
            'marker.color': group_colors_copy,
            'marker.size': [6] * len(names),
        },[0]]
    )
)

# 1. 每个动画
for anime_name in sorted_names:
    i = np.where(names == anime_name)[0][0]

    opacity = [0.15] * len(names)
    color = ['blue'] * len(names)
    size = [6] * len(names)

    # ---- 选中的动画：红色 ----
    opacity[i] = 1.0
    color[i] = 'red'
    size[i] = 22

    # ---- 最近五部动画：橙色 ----
    nearest = find_closest(anime_name, X_umap, names, k=10)
    for j in nearest:
        opacity[j] = 0.9
        color[j] = 'orange'
        size[j] = 16

    # 添加按钮
    buttons.append(
        dict(
            label=anime_name,
            method='restyle',
            args=[{
                'marker.opacity': [opacity],
                'marker.color': [color],
                'marker.size': [size]
            },[0]]
        )
    )


# -------------------- 标签 Dropdown --------------------

tag_counts = df_copy[top_tags].sum().sort_values(ascending=False) #notice here
top_tags_limited = tag_counts.head(100).index.tolist() #取最活跃的前50tags
sorted_tags = sorted(top_tags_limited,key=sort_key)

tag_buttons = []

# 第一项：不选标签
tag_buttons.append(
    dict(
        label="(All tags)",
        method="restyle",
        args=[{
            'marker.opacity': [0.7]*len(names),
            'marker.color': group_colors_copy,
            'marker.size': [6]*len(names)
        },[0]]
    )
)

# 每一个标签都做一个按钮
for tag in sorted_tags:
    mask = df_copy[tag] > 0  # 该标签的动漫 #notice here df and df_yuri
    opacity = [0.15]*len(names)
    color = ['blue'] * len(names)
    size = [6]*len(names)

    # 有该标签的动画高亮
    for i, has_tag in enumerate(mask):
        if has_tag:
            opacity[i] = 1.0
            color[i] = 'red'     # 高亮色
            size[i] = 14         # 大一点

    tag_buttons.append(
        dict(
            label=tag,
            method="restyle",
            args=[{
                'marker.opacity':[opacity],
                'marker.color':[color],
                'marker.size':[size]
            },[0]]
        )
    )

tag_2_buttons=[]

tag_2_buttons.append(
    dict(
        label="(All tags)",
        method="restyle",
        args=[{
            'marker.symbol': ['circle'] * len(names)  # ➜ 恢复为全 circle
        }, [0]]
    )
)

for tag in sorted_tags:
    mask = df_copy[tag] > 0  # 该标签的动漫
    shape = ['circle'] * len(names) 

    # 有该标签的动画变方
    for i, has_tag in enumerate(mask):
        if has_tag:
            shape[i] = 'square'

    tag_2_buttons.append(
        dict(
            label=tag,
            method="restyle",
            args=[{
                'marker.symbol': [shape]
            },[0]]
        )
    )


fig.add_annotation(
    x=0, y=1,  # 相对坐标 (0~1)
    xref='paper', yref='paper',
    text="数据来自https://bangumi.tv/anime。"\
    "基于UMAP算法与标签标记数的动画聚类图。" \
    "使用Anime下拉菜单可选择具体动画及其相邻的10部动画。" \
    "使用Tag下拉菜单可选择具体动画标签，显示为红色圆形。"\
    "使用Tag2下拉菜单可选择另一个动画标签，显示为方形。"\
    "建议使用PC端查看。",
    showarrow=False,
    font=dict(size=9, color="black"),
    align='left'
)

# ==================== Layout ====================
fig.update_layout(
    updatemenus=[dict(
        buttons=buttons,
        direction="down",
        showactive=True,
        xanchor='right',
        yanchor='top',
        x=1,
        y=1,
        pad=dict(t=10, r=10),
        font=dict(size=12),
    ), dict(   # 第二个：标签 dropdown
        buttons=tag_buttons,
        direction="down",
        xanchor='right',
        yanchor='top',
        x=1, y=0.85
    ),dict( #第三个： 标签 2 dropdown
        buttons=tag_2_buttons,
        direction="down",
        xanchor='right',
        yanchor='top',
        x=1, y=0.70 
    )],
    
    title='Yuri Anime Grouper',
    xaxis_title='UMAP1',
    yaxis_title='UMAP2',
    autosize=True,
    font=dict(family="Microsoft YaHei, SimHei, Arial, sans-serif")
)

# ==================== Export HTML ====================
html_path = r'D:\python-for-excel\bangumi_anime_class\output\yuri_anime_umap_cluster_ver2.html'
fig.write_html(html_path, include_plotlyjs='cdn', full_html=True)
print("已导出 百合动画 UMAP 降维聚类图")
