In [2]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show, output_file, save
from bokeh.models import ColumnDataSource, Select, Label, LinearColorMapper, CustomJS, HoverTool, WheelZoomTool
from math import pi, sin, cos
from bokeh.palettes import Plasma9, Viridis9, Iridescent20, Magma256
from bokeh.layouts import column
from bokeh.models.widgets import Div
from bokeh.transform import linear_cmap
from bokeh.embed import file_html
from bokeh.resources import CDN

def create_crime_time_visualization(df, crime_type="motor vehicle theft"):
    """创建犯罪时间分布的极坐标柱状图（改进版）"""
    
    # 确保Year列是字符串类型
    df['Year'] = df['Year'].astype(str)
    
    # 提取时间的小时部分
    df['Hour'] = df['Incident Time'].apply(lambda x: int(str(x).split(':')[0]) if ':' in str(x) else 0)
    
    # 筛选指定的犯罪类型
    # 确保犯罪类型的大小写匹配
    crime_column = 'Incident Category'  # 假设犯罪类型存储在'Incident Category'列中
    # 为了更健壮的匹配，使用不区分大小写的筛选
    df = df[df[crime_column].str.lower() == crime_type.lower()]
    # 按年份和小时聚合数据
    years = sorted(df['Year'].unique())
    years.remove('2025')
    all_data = {}
    
    for year in years:
        year_data = df[df['Year'] == year]
        hour_data = {}
        for hour in range(24):
            hour_data[hour] = len(year_data[year_data['Hour'] == hour])
        all_data[year] = hour_data
    
    # 设置输出为HTML文件（背景透明）
    output_file("crime_time_distribution.html", title="Crime Time Distribution")
    
    # 找出所有数据中的最大值用于缩放
    max_count = 0
    for year in all_data:
        year_max = max(all_data[year].values())
        if year_max > max_count:
            max_count = year_max
    
    # 为每个年份创建数据源
    data_sources = {}
    for year in years:
        hours = list(range(24))
        values = [all_data[year][hour] for hour in hours]
        angles = [hour/24 * 2*pi for hour in hours]
        bar_width = 2*pi/24
        
        source_data = {
            'hour': hours,
            'value': values,
            'angle': angles,
            'inner_radius': [0.1] * len(hours),
            'outer_radius': [(v / max_count) * 180 for v in values],
            'start_angle': [a - bar_width/2 for a in angles],
            'end_angle': [a + bar_width/2 for a in angles],
        }
        data_sources[year] = ColumnDataSource(source_data)
    
    ################################################################################

    # 创建基础图表
    p = figure(
        tools="",
        width=500, height=500,
        x_range=(-250, 250), y_range=(-250, 250),
        toolbar_location=None,
        background_fill_color=None,
        border_fill_color=None,
        outline_line_color=None,
        min_border_left=5,   # 设置左边距
        min_border_right=5,  # 设置右边距
        min_border_top=5,    # 设置上边距
        min_border_bottom=5, # 设置下边距
    )
    p.add_tools(WheelZoomTool())
    # 隐藏坐标轴
    p.axis.visible = False
    p.grid.visible = False
    
    # 添加参考圆
    for radius in [60, 120, 180]:
        p.circle(0, 0, radius=radius, fill_color=None, line_color="#888888", 
                line_dash="dashed", line_width=1, alpha=0.7)
    
    # 添加小时刻度线
    for hour in range(24):
        angle = hour/24 * 2*pi
        x = cos(angle) * 180
        y = sin(angle) * 180
        p.line([0, x], [0, y], line_color="#666666", line_width=1, alpha=0.6)
        
        # 添加小时标签
        label_x = cos(angle) * 200
        label_y = sin(angle) * 200
        hour_label = Label(
            x=label_x, y=label_y, 
            text=str(hour),
            text_align="center", text_baseline="middle",
            text_font_size="9pt",
            text_font_style="bold",
            text_color="#333333"  # 深色文本确保可见性
        )
        p.add_layout(hour_label)
    
    # 创建年份选择器
    year_select = Select(
        title="Choose Year",
        value=years[0],
        options=years,
        width=80,
        height=30,
    )
    
    # 创建标题div - 初始为第一年
    
    # 选择一个更好的颜色映射 - Turbo是一个高饱和度的映射
    # 我们会从颜色映射中提取子集，避免太浅的颜色
    color_palette = list(reversed(Magma256[75:256]))  # 避开太浅的颜色
    
    # 为每个年份创建扇形图
    wedges_dict = {}
    for year in years:
        source = data_sources[year]
        
        wedges = p.annular_wedge(
            x=0, y=0,
            inner_radius='inner_radius', 
            outer_radius='outer_radius',
            start_angle='start_angle', 
            end_angle='end_angle',
            fill_color=linear_cmap('value', color_palette, 0, max_count),
            line_color="#444444",  # 添加细线边框增强可见度
            line_width=0.5,
            source=source,
            visible=(year == years[0])  # 只有第一年的数据初始可见
        )
        wedges_dict[year] = wedges
    
    # 创建JavaScript回调用于更新图表
    callback = CustomJS(args=dict(wedges_dict=wedges_dict), code="""
        // 隐藏所有年份的数据
        Object.keys(wedges_dict).forEach(function(year) {
            wedges_dict[year].visible = false;
        });
        
        // 显示选定年份的数据
        var selected_year = cb_obj.value;
        wedges_dict[selected_year].visible = true;
    """)
    
    # 添加回调到选择器
    year_select.js_on_change('value', callback)
    
    # 创建图例说明
    legend_div = Div(
        text="""
        """,
        width=200
    )

        # 添加悬停工具
    hover = HoverTool(
        tooltips=[
            ("Incident count", "@value")
        ],
        renderers=[wedges_dict[years[0]]]  # 初始时只应用于第一年的数据
    )
    p.add_tools(hover)
    
    # 创建最终布局，width要和复选框一样，不然不居中
    controls = column(year_select, legend_div, width=80, align="center", margin=(0, 0, 0, 0))
    layout = column(
        column(p, controls),  # 次序表示上下位置。matters
        align="center",
    )
    
    
    # 创建包含所有必要JS的HTML
    html = file_html(layout, CDN, "Crime Time Distribution")
    
    # 保存完整的HTML文件
    with open("crime_time_distribution.html", "w") as f:
        f.write(html)
    
    return layout

# Hyper parameters
## Define directories
SAVE_DIR = "./pics/"
DATA_DIR = "../../data_sf/"
CSV_NAME = 'combined_crime_data.csv'

# read it once only
df = pd.read_csv(DATA_DIR + CSV_NAME)

# 调用函数来创建可视化
visualization = create_crime_time_visualization(df)

        # <div style="margin-top: 10px; background-color: rgba(255,255,255,0.7); padding: 10px; border-radius: 5px;">
        #     <p style="font-weight: bold; margin: 0;">数据量级</p>
        #     <div style="display: flex; align-items: center; margin-top: 5px;">
        #         <div style="width: 20px; height: 20px; background: """ + color_palette[0] + """; margin-right: 5px;"></div>
        #         <span>低</span>
        #         <div style="flex-grow: 1; height: 20px; background: linear-gradient(to right, """ + color_palette[0] + """, """ + color_palette[-1] + """); margin: 0 10px;"></div>
        #         <div style="width: 20px; height: 20px; background: """ + color_palette[-1] + """; margin-right: 5px;"></div>
        #         <span>高</span>
        #     </div>s
        # </div>

In [None]:
# import pandas as pd
# import numpy as np
# from bokeh.plotting import figure, show, output_file, save
# from bokeh.models import ColumnDataSource, Select, Label, LinearColorMapper, CustomJS
# from bokeh.layouts import column, row
# from bokeh.io import curdoc, output_notebook
# from math import pi, sin, cos
# from bokeh.palettes import Plasma9, Viridis9, Turbo256
# from bokeh.models.widgets import Div
# from bokeh.transform import linear_cmap
# from bokeh.embed import file_html
# from bokeh.resources import CDN

# def create_crime_time_visualization(df, crime_type="motor vehicle theft"):
#     """创建犯罪时间分布的极坐标柱状图（改进版）"""
    
#     # 确保Year列是字符串类型
#     df['Year'] = df['Year'].astype(str)
    
#     # 提取时间的小时部分
#     df['Hour'] = df['Incident Time'].apply(lambda x: int(str(x).split(':')[0]) if ':' in str(x) else 0)
    
#     # 筛选指定的犯罪类型
#     # 确保犯罪类型的大小写匹配
#     crime_column = 'Incident Category'  # 假设犯罪类型存储在'Incident Category'列中
#     # 为了更健壮的匹配，使用不区分大小写的筛选
#     df = df[df[crime_column].str.lower() == crime_type.lower()]
#     # 按年份和小时聚合数据
#     years = sorted(df['Year'].unique())
#     all_data = {}
    
#     for year in years:
#         year_data = df[df['Year'] == year]
#         hour_data = {}
#         for hour in range(24):
#             hour_data[hour] = len(year_data[year_data['Hour'] == hour])
#         all_data[year] = hour_data
    
#     # 设置输出为HTML文件（背景透明）
#     output_file("crime_time_distribution.html", title="Crime Time Distribution")
    
#     # 找出所有数据中的最大值用于缩放
#     max_count = 0
#     for year in all_data:
#         year_max = max(all_data[year].values())
#         if year_max > max_count:
#             max_count = year_max
    
#     # 为每个年份创建数据源
#     data_sources = {}
#     for year in years:
#         hours = list(range(24))
#         values = [all_data[year][hour] for hour in hours]
#         angles = [hour/24 * 2*pi for hour in hours]
#         bar_width = 2*pi/24
        
#         source_data = {
#             'hour': hours,
#             'value': values,
#             'angle': angles,
#             'inner_radius': [0.1] * len(hours),
#             'outer_radius': [(v / max_count) * 180 for v in values],
#             'start_angle': [a - bar_width/2 for a in angles],
#             'end_angle': [a + bar_width/2 for a in angles],
#         }
#         data_sources[year] = ColumnDataSource(source_data)
    
#     # 创建基础图表
#     p = figure(
#         width=500, height=500,
#         x_range=(-250, 250), y_range=(-250, 250),
#         toolbar_location=None,
#         background_fill_color=None,
#         border_fill_color=None,
#         outline_line_color=None,
#     )
    
#     # 隐藏坐标轴
#     p.axis.visible = False
#     p.grid.visible = False
    
#     # 添加参考圆
#     for radius in [60, 120, 180]:
#         p.circle(0, 0, radius=radius, fill_color=None, line_color="#888888", 
#                 line_dash="dashed", line_width=1, alpha=0.7)
    
#     # 添加小时刻度线
#     for hour in range(24):
#         angle = hour/24 * 2*pi
#         x = cos(angle) * 180
#         y = sin(angle) * 180
#         p.line([0, x], [0, y], line_color="#666666", line_width=1, alpha=0.6)
        
#         # 添加小时标签
#         label_x = cos(angle) * 200
#         label_y = sin(angle) * 200
#         hour_label = Label(
#             x=label_x, y=label_y, 
#             text=str(hour),
#             text_align="center", text_baseline="middle",
#             text_font_size="9pt",
#             text_font_style="bold",
#             text_color="#333333"  # 深色文本确保可见性
#         )
#         p.add_layout(hour_label)
    
#     # 创建年份选择器
#     year_select = Select(
#         title="Choose YEAR",
#         value=years[0],
#         options=years,
#         width=120
#     )
    
#     # 创建标题div - 初始为第一年
#     title_div = Div(
#         text=f"<h2 style='text-align: center;'>CRIME INCIDENTS IN SAN FRANCISCO, {years[0]}</h2>" +
#              "<h3 style='text-align: center; color: #666666;'>Distribution by hour of day</h3>",
#         width=700
#     )
    
#     # 选择一个更好的颜色映射 - Turbo是一个高饱和度的映射
#     # 我们会从颜色映射中提取子集，避免太浅的颜色
#     color_palette = Turbo256[50:256]  # 避开太浅的颜色
    
#     # 为每个年份创建扇形图
#     wedges_dict = {}
#     for year in years:
#         source = data_sources[year]
        
#         wedges = p.annular_wedge(
#             x=0, y=0,
#             inner_radius='inner_radius', 
#             outer_radius='outer_radius',
#             start_angle='start_angle', 
#             end_angle='end_angle',
#             fill_color=linear_cmap('value', color_palette, 0, max_count),
#             line_color="#444444",  # 添加细线边框增强可见度
#             line_width=0.5,
#             source=source,
#             visible=(year == years[0])  # 只有第一年的数据初始可见
#         )
#         wedges_dict[year] = wedges
    
#     # 创建JavaScript回调用于更新图表
#     callback = CustomJS(args=dict(wedges_dict=wedges_dict, title_div=title_div), code="""
#         // 隐藏所有年份的数据
#         Object.keys(wedges_dict).forEach(function(year) {
#             wedges_dict[year].visible = false;
#         });
        
#         // 显示选定年份的数据
#         var selected_year = cb_obj.value;
#         wedges_dict[selected_year].visible = true;
        
#         // 更新标题
#         title_div.text = "<h2 style='text-align: center;'>CRIME INCIDENTS IN SAN FRANCISCO, " + selected_year + "</h2>" +
#                          "<h3 style='text-align: center; color: #666666;'>Distribution by hour of day</h3>";
#     """)
    
#     # 添加回调到选择器
#     year_select.js_on_change('value', callback)
    
#     # 创建图例说明
#     legend_div = Div(
#         text="""
#         """,
#         width=200
#     )
    
#     # 创建最终布局
#     controls = column(year_select, legend_div, width=200)
#     layout = column(
#         title_div,
#         row(controls, p)
#     )
    
#     # 创建包含所有必要JS的HTML
#     html = file_html(layout, CDN, "Crime Time Distribution")
    
#     # 保存完整的HTML文件
#     with open("crime_time_distribution.html", "w") as f:
#         f.write(html)
    
#     return layout

# # Hyper parameters
# ## Define directories
# SAVE_DIR = "./pics/"
# DATA_DIR = "../../data_sf/"
# CSV_NAME = 'combined_crime_data.csv'

# # read it once only
# df = pd.read_csv(DATA_DIR + CSV_NAME)

# # 调用函数来创建可视化
# visualization = create_crime_time_visualization(df)

#         # <div style="margin-top: 10px; background-color: rgba(255,255,255,0.7); padding: 10px; border-radius: 5px;">
#         #     <p style="font-weight: bold; margin: 0;">数据量级</p>
#         #     <div style="display: flex; align-items: center; margin-top: 5px;">
#         #         <div style="width: 20px; height: 20px; background: """ + color_palette[0] + """; margin-right: 5px;"></div>
#         #         <span>低</span>
#         #         <div style="flex-grow: 1; height: 20px; background: linear-gradient(to right, """ + color_palette[0] + """, """ + color_palette[-1] + """); margin: 0 10px;"></div>
#         #         <div style="width: 20px; height: 20px; background: """ + color_palette[-1] + """; margin-right: 5px;"></div>
#         #         <span>高</span>
#         #     </div>
#         # </div>