In [682]:
import pandas as pd

In [743]:
node_labels = {0: 'Law, Politics',
               1: 'Geography & Environment',
               2: 'Computing',
               3: 'Dentistry, Ophthalmology, Dermatology',
               4: 'Oncology',
5: 'Electrical & Electronic Engineering',
6: 'Physics',
7: 'Cardiology',
8: 'Ecology & Zoology',
9: 'Psychology',
10: 'Information Engineering',
11: 'Chemistry & Materials',
12: 'Geology',
13: 'History & Literature & Philosophy',
14: 'Mechanic Engineering',
15: 'Mathematics',
16: 'Animal',
17: 'Molecular & Cell Biology',
18: 'Infectious Diseases',
19: 'Linguistics',
20: 'Nursing',
21: 'Agriculture',
22: 'Rehabilitation & Sports',
23: 'Sociology & Culture',
24: 'Economics',
25: 'Education'}

In [684]:
node_stats_df = pd.read_csv("/home/lyuzhuoqi/projects/clustering/structure/node.csv")

In [685]:
edge_df = pd.read_csv("/home/lyuzhuoqi/projects/clustering/structure/filtered_edge.csv")

In [686]:
node_stats_df['color'] = (node_stats_df['inner_citations']-node_stats_df['inner_citations'].min())/(node_stats_df['inner_citations'].max()-node_stats_df['inner_citations'].min())
edge_df['normalized_weight'] = (edge_df['weight']-edge_df['weight'].min())/(edge_df['weight'].max()-edge_df['weight'].min())

In [943]:
from bqplot import Graph, ColorScale, Figure
import ipywidgets as widgets
import numpy as np

# 转换节点和边数据
node_data = []
for _, row in node_stats_df.iterrows():
    r = np.sqrt(row.total_citations)*0.005
    label_text = node_labels[row.cluster]
    label_loc = 'center'
    if r < 15 or len(label_text) > 25:
        label_loc = 'outside'
    node_data.append({
        'label': label_text,
        'label_display': label_loc,
        'shape': 'circle',
        'color': row.color,
        'shape_attrs': {'r': r},
    })

link_data = []
for _, row in edge_df.iterrows():    
    link_data.append({
        'source': row.source,
        'target': row.target,
        'value': row.normalized_weight,
    })

# 创建颜色比例尺
node_color_scale = ColorScale(min=node_stats_df.color.min(), 
                             max=node_stats_df.color.max(),
                             colors=['#ffeda0', '#f03b20'])
link_color_scale = ColorScale(min=edge_df.normalized_weight.min(), 
                             mid=edge_df.normalized_weight.mean(),
                             max=edge_df.normalized_weight.max(),
                             colors=['#f7fbff', '#6baed6', '#08306b'])

# 创建图形标记
graph = Graph(
    node_data=node_data,
    link_data=link_data,
    static=False,
    directed=True,
    link_type='arc',
    scales={
        'color': node_color_scale,
        'link_color': link_color_scale
    },
    charge=-1500,
)

# 创建图形并添加图例
figure = Figure(
    marks=[graph],
    layout=widgets.Layout(width='1200px', height='1200px'),
)

figure

Figure(fig_margin={'top': 60, 'bottom': 60, 'left': 60, 'right': 60}, layout=Layout(height='1200px', width='12…

In [None]:
from IPython.display import display, HTML
import numpy as np

# 计算实际显示参数
# 节点大小参数
node_sizes = np.sqrt(node_stats_df['total_citations']) * 0.005
min_size, max_size = node_sizes.min(), node_sizes.max()
size_legend_values = np.linspace(min_size, max_size, 5)
size_labels = [f"{(s/0.01)**2:.0f}" for s in size_legend_values]

# 节点颜色参数
inner_citations_min = node_stats_df['inner_citations'].min()
inner_citations_max = node_stats_df['inner_citations'].max()

# 边颜色参数
edge_min = edge_df['weight'].min()
edge_max = edge_df['weight'].max()

legend_html = f"""
<div style="width: 14%; padding: 15px; background: white; border-radius: 6px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
    <h7 style="color: #333;">Node Size: Total Citations</h7>
    
    <div>
        <div style="display: flex; align-items: flex-end; height: 100px;">
            {''.join([f'<div style="display: flex; flex-direction: column; align-items: center; margin: 0 8px;">'
                     f'<div style="width: {s*2}px; height: {s*2}px; border-radius: 50%; '
                     f'background: #ffeda0; border: 1.75px solid #0b0b0b;"></div>'
                     f'<span style="margin-top: 8px; font-size: 12px; color: #333">{label}</span></div>'
                     for s, label in zip(size_legend_values, size_labels)])}
        </div>
    </div>
    
    <h7 style="margin-bottom: 5px; color: #333;">Node Color: Intradisciplinary Citations</h7>
    <div style="height: 10px; width: 100%; background: linear-gradient(to right, #ffeda0, #f03b20); border-radius: 4px;"></div>
    <div style="margin-bottom: -0px; display: flex; justify-content: space-between; font-size: 12px; color: #333">
        <span>{inner_citations_min}</span>
        <span>{inner_citations_min+(inner_citations_max-inner_citations_min)/4:.0f}</span>
        <span>{inner_citations_min+(inner_citations_max-inner_citations_min)/2:.0f}</span>
        <span>{inner_citations_min+(inner_citations_max-inner_citations_min)*3/4:.0f}</span>
        <span>{inner_citations_max}</span>
    </div>

    <h7 style="margin-bottom: 5px; color: #333;">Edge Weight: Interdisciplinary Citations</h7>
    <div style="height: 10px; width: 100%; background: linear-gradient(to right, #f7fbff, #6baed6, #08306b); border-radius: 4px;"></div>
    <div style="margin-bottom: -10px; display: flex; justify-content: space-between;">
        <span style="font-size: 12px; color: #333;">{edge_min}</span>
        <span style="font-size: 12px; color: #333;">{edge_min+(edge_max-edge_min)/4:.0f}</span>
        <span style="font-size: 12px; color: #333;">{edge_min+(edge_max-edge_min)/2:.0f}</span>
        <span style="font-size: 12px; color: #333;">{edge_min+(edge_max-edge_min)*3/4:.0f}</span>
        <span style="font-size: 12px; color: #333;">{edge_max}</span>
    </div>
</div>
"""

display(HTML(legend_html))