In [None]:
import sys
!{sys.executable} -m pip install -r ../requirements.txt

In [None]:
import pandas as pd

df = pd.read_parquet("../data/processed/sapporo_density.parquet")
df.head(20)

In [2]:
# 1) (d,t,x,y) 必須唯一
dup = df.duplicated(["d","t","x","y"]).sum()
print("dup:", dup)

# 2) 範圍檢查
print(df[["d","t","x","y","count"]].describe())

# 3) 每天/每時段是否齊全（至少 t 應該 0~47）
print(df["t"].min(), df["t"].max(), df["t"].nunique())
print(df["d"].min(), df["d"].max(), df["d"].nunique())

dup: 0
                  d             t             x             y         count
count  6.024808e+06  6.024808e+06  6.024808e+06  6.024808e+06  6.024808e+06
mean   3.646489e+01  2.471382e+01  3.140973e+01  1.493889e+02  3.063422e+00
std    2.146972e+01  1.252862e+01  2.165999e+01  3.666576e+01  8.499277e+00
min    0.000000e+00  0.000000e+00  1.000000e+00  1.000000e+00  1.000000e+00
25%    1.800000e+01  1.500000e+01  2.000000e+01  1.410000e+02  1.000000e+00
50%    3.600000e+01  2.500000e+01  3.000000e+01  1.550000e+02  2.000000e+00
75%    5.500000e+01  3.500000e+01  4.000000e+01  1.710000e+02  4.000000e+00
max    7.400000e+01  4.700000e+01  9.990000e+02  9.990000e+02  1.201000e+03
0 47 48
0 74 75


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display


# 確保型別
for c in ["d","t","x","y","count"]:
    df[c] = df[c].astype(int)

d_min, d_max = int(df["d"].min()), int(df["d"].max())
t_min, t_max = int(df["t"].min()), int(df["t"].max())
x_max, y_max = int(df["x"].max()), int(df["y"].max())

# 讓 (d,t) 查詢更快
df_idx = df.set_index(["d","t"]).sort_index()

def _subset(d, t):
    """回傳某個(d,t)的子表（統一成 DataFrame）"""
    try:
        sub = df_idx.loc[(d, t)]
    except KeyError:
        return None
    if isinstance(sub, pd.Series):
        sub = sub.to_frame().T
    return sub.reset_index(drop=True)  # columns: x,y,count

def build_grid(d, t):
    sub = _subset(d, t)
    grid = np.zeros((y_max + 1, x_max + 1), dtype=np.float32)

    if sub is None or len(sub) == 0:
        return grid, 0

    xs = sub["x"].to_numpy()
    ys = sub["y"].to_numpy()
    cs = sub["count"].to_numpy()

    # (x,y) 在聚合後應該是唯一；若不是，這行會覆蓋，必要時可改成 +=
    grid[ys, xs] = cs
    return grid, int(cs.sum())

def draw(d, t, log1p=False):
    grid, total = build_grid(d, t)
    show = np.log1p(grid) if log1p else grid

    plt.figure(figsize=(8, 6))
    plt.imshow(show, origin="lower", aspect="auto")
    plt.colorbar(label="log1p(count)" if log1p else "count")
    plt.title(f"Sapporo crowd density heatmap | d={d}, t={t} | total={total}")
    plt.xlabel("x")
    plt.ylabel("y")
    plt.show()

# --- 互動控制（拉桿） ---
d_slider = widgets.IntSlider(value=d_min, min=d_min, max=d_max, step=1, description="d", continuous_update=True)
t_slider = widgets.IntSlider(value=t_min, min=t_min, max=t_max, step=1, description="t", continuous_update=True)
log_chk  = widgets.Checkbox(value=False, description="log1p", indent=False)

ui = widgets.HBox([d_slider, t_slider, log_chk])
out = widgets.interactive_output(draw, {"d": d_slider, "t": t_slider, "log1p": log_chk})

display(ui, out)


HBox(children=(IntSlider(value=0, description='d', max=74), IntSlider(value=0, description='t', max=47), Check…

Output()