# Macro

### Morans I

In [None]:
import os

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
analyze_path = os.path.join(parent_dir, "utils")

os.chdir(analyze_path)

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

dataA1 = pd.read_csv('../ComputedData/Accident/DataA1_with_MYP.csv')
dataA2 = pd.read_csv('../ComputedData/Accident/DataA2_with_MYP.csv')
taiwan = gpd.read_file('../Data/OFiles_9e222fea-bafb-4436-9b17-10921abc6ef2/TOWN_MOI_1140318.shp')
taiwan = taiwan[(~taiwan['TOWNNAME'].isin(['旗津區', '頭城鎮', '蘭嶼鄉', '綠島鄉', '琉球鄉'])) & 
                (~taiwan['COUNTYNAME'].isin(['金門縣', '連江縣', '澎湖縣']))]

In [None]:
from utils import get_grid

filtered_A2 = dataA2[dataA2['當事者順位'] == 1]
filtered_A1 = dataA1[dataA1['當事者順位'] == 1]

filtered_A1['source'] = 'A1'
filtered_A2['source'] = 'A2'
filtered_A1['num_accidents'] = 1 
filtered_A2['num_accidents'] = 1
combined_data = pd.concat([filtered_A1, filtered_A2], ignore_index=True)

hex_grid = get_grid(combined_data, hex_size=0.01, threshold=-1)
taiwan = taiwan.to_crs(hex_grid.crs)  # 確保 CRS 一致

hex_grid = hex_grid[hex_grid.intersects(taiwan.unary_union)]
# hex_grid.to_file('../ComputedData/Grid/macro_hs.geojson', driver='GeoJSON')

## Infrastructure Analyze

In [None]:
combined_data['full_infrastructure'] = combined_data['mrt_100m_count'] + combined_data['youbike_100m_count'] + combined_data['parkinglot_100m_count']

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties

# 設置中文字體
myfont = FontProperties(fname=r"/Users/wangqiqian/Library/Fonts/標楷體.ttf")
sns.set(style="whitegrid", font=myfont.get_name())

def plot_facility_vs_human_vehicle_subplot(data, facilities, accident_col, accident_type):
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    axes = axes.flatten()

    for i, facility_col in enumerate(facilities):

        data['is_human_vehicle'] = (data[accident_col] == accident_type).astype(int)

        grouped = data.groupby(facility_col).agg(
            human_vehicle_ratio=('is_human_vehicle', 'mean'),
            total_count=('is_human_vehicle', 'size')
        ).reset_index()

        sns.barplot(
            data=grouped,
            x=facility_col,
            y='human_vehicle_ratio',
            palette="Blues_d",
            ax=axes[i]  # 指定子圖
        )

        for index, row in grouped.iterrows():
            axes[i].text(
                x=index, 
                y=row['human_vehicle_ratio'] + 0.001,
                s=f"{round(row['total_count'])}", 
                ha='center', 
                va='bottom', 
                fontsize=10, 
                fontproperties=myfont
            )

        axes[i].set_title(f"{facility_col} 附近人與車事故比例", fontsize=14, fontproperties=myfont)
        axes[i].set_xlabel(f"{facility_col} Count", fontsize=12, fontproperties=myfont)
        axes[i].set_ylabel("人與車事故比例", fontsize=12, fontproperties=myfont)
        axes[i].tick_params(axis='x', labelsize=10)
        axes[i].tick_params(axis='y', labelsize=10)
        axes[i].grid(axis='y', linestyle='--', alpha=0.7)

    for j in range(len(facilities), len(axes)):
        fig.delaxes(axes[j])

    plt.tight_layout()
    plt.show()

plot_facility_vs_human_vehicle_subplot(
    combined_data,
    facilities=['youbike_100m_count', 'mrt_100m_count', 'parkinglot_100m_count', 'full_infrastructure'],
    accident_col='事故類型及型態大類別名稱',
    accident_type='人與車'
)

## Accident map


In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
taiwan.plot(ax=ax, color='white', edgecolor='black') 
hex_grid.plot(
    column='num_accidents', 
    cmap='OrRd', 
    legend=True, 
    edgecolor='black', 
    linewidth=0.2, 
    alpha=0.6,
    ax=ax
)
plt.title('Hexagon Accident Counts (num_accidents > 0)')
plt.axis('off')
plt.show()

### This is now deprecated because ISA isn't intuitive for hexagons.

In [None]:
from utils import incremental_spatial_autocorrelation, incremental_spatial_autocorrelation_knn

# Incremental Spatial Autocorrelation
# grid 中心點到中心點的incremental
thresholds, moran_I, z_scores, p_values = incremental_spatial_autocorrelation(
    hex_grid, value_col='num_accidents', min_dist=1000, max_dist=5000, step=1000
)
# thresholds, moran_I, z_scores, p_values = incremental_spatial_autocorrelation_knn(
#     hex_grid, value_col='num_accidents', min_k=5, max_k=30, step=1
# )

plt.figure(figsize=(10, 6))
plt.plot(thresholds / 1000, z_scores, marker='o')
plt.xlabel('Distance Threshold (km)')
plt.ylabel('Z-Score')
plt.title("Incremental Spatial Autocorrelation (ISA)")
plt.grid(True)
plt.show()

# 找 Z-score 最大的距離
best_idx = np.argmax(z_scores)
best_distance = thresholds[best_idx]
print(f"最佳分析距離 (m): {best_distance}")
print(f"Z-score: {z_scores[best_idx]:.4f}")
print(f"Moran's I: {moran_I[best_idx]:.4f}")

## Lisa plot

In [None]:
from utils_lisa import LocalMoranAnalysis

analysis = LocalMoranAnalysis(hex_grid, taiwan, k=6)
analysis.calculate_local_moran()
analysis.plot_lisa()
analysis.lisa_scatter_plot()

### Calculate and Plot GI
This is to find the most significant hotspot in each knn

In [None]:
from utils import calculate_gi, plot_map

# 只有queen不能使用，因為沒參數可以修改
# for i in range(3000, 15000, 3000):
# for i in range(10000, 30000, 5000):
for i in range(6, 10, 1): # knn的i意義是鄰近資料
    print(i)
    grid = calculate_gi(i, hex_grid, adjacency='knn')
    c = grid[grid['hotspot'] != 'Not Significant']
    if c.shape[0] > 0:
        print('sig:', c.shape[0])

In [None]:
from utils import calculate_gi, plot_map
# 做圖要轉回原始 CRS
# grid = calculate_gi(10000, hex_grid, adjacency=None)
# grid = calculate_gi(10000, hex_grid, adjacency='queen')
grid = calculate_gi(6, hex_grid, adjacency='knn')
# grid.to_file('../ComputedData/Grid/macro_gi.geojson', driver='GeoJSON')

# plot_map(filtered_A2, grid.to_crs('EPSG:4326'), gi=True)

In [None]:
import matplotlib.colors as mcolors

cmap = mcolors.ListedColormap([
    '#800026',  # dark red - Hotspot 99%
    '#FC4E2A',  # red - Hotspot 95%
    '#FD8D3C',  # light red - Hotspot 90%
    '#d9d9d9',  # grey - Not Significant
    '#6baed6',  # light blue - Coldspot 90%
    '#3182bd',  # blue - Coldspot 95%
    '#08519c'   # dark blue - Coldspot 99%
])

# 照順序排
categories = [
    'Hotspot 99%', 
    'Hotspot 95%', 
    'Hotspot 90%', 
    'Not Significant', 
    'Coldspot 90%', 
    'Coldspot 95%', 
    'Coldspot 99%'
]

grid = grid.to_crs(epsg=4326)  # 把座標轉回跟 folium 一樣

fig, ax = plt.subplots(figsize=(10, 10))
taiwan.to_crs(epsg=4326).plot(ax=ax, color='white', edgecolor='black', linewidth=0.5)

grid.plot(
    column='hotspot', 
    categorical=True, 
    cmap=cmap, 
    legend=True, 
    edgecolor='grey', 
    linewidth=0.2, 
    alpha=0.6,
    ax=ax,
    categories=categories,
    legend_kwds={
        'bbox_to_anchor': (1.05, 1),
        'loc': 'upper left',
        'frameon': False
    }
)

plt.title('Hotspot Analysis (Getis-Ord Gi*) - 90%, 95%, 99% Confidence Levels')
plt.axis('off')
plt.show()

### Find the nearest county from grid

In [None]:
counties = taiwan[['COUNTYNAME', 'geometry']].copy()
hot_hex = hex_grid[hex_grid['hotspot'] != 'Not Significant'].copy()

def find_nearest_county(hexagon, counties_gdf):
    # 每個 hexagon 到所有 county 的距離
    distances = counties_gdf.distance(hexagon)
    nearest_idx = distances.idxmin()
    return counties_gdf.loc[nearest_idx, 'COUNTYNAME']

hot_hex['nearest_county'] = hot_hex['geometry'].apply(lambda x: find_nearest_county(x, counties))
hot_hex['nearest_county'].unique()

## Find the nearest county for each hexagon

In [None]:
counties = counties.to_crs("EPSG:3826")
hex_grid = hex_grid.to_crs("EPSG:3826")

hex_with_county = gpd.sjoin_nearest(hex_grid, counties[['COUNTYNAME', 'geometry']], how='left', distance_col='dist_to_county')
# Find the nearest county for each hexagon

### With normalize

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['font.family'] = ['Arial Unicode Ms']

hot = hot_hex[['hotspot', 'nearest_county']].value_counts()
hot_df = hot.reset_index()
hot_df.columns = ['熱點', '最近縣市', '數量']

# Add a normalized column
count_hexagon_in_county = hex_with_county['COUNTYNAME'].value_counts()

hot_df['normalized_count'] = hot_df.apply(
    lambda row: row['數量'] / count_hexagon_in_county[row['最近縣市']], axis=1
)

plt.figure(figsize=(12, 6))
sns.barplot(data=hot_df, x='最近縣市', y='normalized_count', hue='熱點', palette='viridis')

plt.title('各縣市內熱點分布（比例）', fontsize=16)
plt.xlabel('最近縣市', fontsize=12)
plt.ylabel('比例', fontsize=12)
plt.legend(title='熱點', fontsize=10)
plt.xticks(rotation=30)

plt.tight_layout()
plt.show()

### Without normalize

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.rcParams['font.family'] = ['Arial Unicode Ms']

hot = hot_hex[['hotspot', 'nearest_county']].value_counts()
hot_df = hot.reset_index()
hot_df.columns = ['熱點', '最近縣市', '數量']

plt.figure(figsize=(12, 6))
sns.barplot(data=hot_df, x='最近縣市', y='數量', hue='熱點', palette='viridis')

plt.title('各縣市內熱點分布', fontsize=16)
plt.xlabel('最近縣市', fontsize=12)
plt.ylabel('數量', fontsize=12)
plt.legend(title='熱點', fontsize=10)
plt.xticks(rotation=30)

plt.tight_layout()
plt.show()

## 作圖比較
主要以道路設計進行，因為天氣、車輛無法討論空間同質

In [None]:
col = '道路型態子類別名稱'

# 先合併所有熱點 hex 的事故索引與縣市
city_indices = []
for city in hot_hex['nearest_county'].unique():
    indices = sum(hot_hex[hot_hex['nearest_county'] == city]['accident_indices'], [])
    city_indices.append((city, indices))

result = []

for city, indices in city_indices:
    if not indices:
        continue
    
    # project回原始資料
    city_data = combined_data.loc[indices]
    counts = city_data[col].value_counts(normalize=True)  # 計算比例
    for signal_type, ratio in counts.items():
        result.append({'城市': city, col: signal_type, '比例': ratio})

result_df = pd.DataFrame(result)

# 轉成 pivot table 方便比較
pivot = result_df.pivot(index='城市', columns=col, values='比例').fillna(0)

city_order = [
    '臺北市', '新北市',  # 最北
    '桃園市', '新竹市', '新竹縣', '宜蘭縣',  # 北部
    '苗栗縣', '臺中市', '彰化縣',  # 中部
    '嘉義市', '嘉義縣', '臺南市', '高雄市', '屏東縣',  # 南部
    '花蓮縣', '臺東縣'  # 東部
]

pivot_sorted = pivot.loc[city_order]

# plt.rcParams['font.family'] = ['Arial Unicode Ms']
plt.rcParams['font.family'] = ['Microsoft JhengHei'] 

# pivot_sorted = pivot.loc[pivot.max(axis=1).sort_values(ascending=False).index]
pivot_sorted.plot(kind='bar', stacked=True, figsize=(12, 8), colormap='tab20')

plt.ylabel('比例')
plt.title(f'各城市熱點區域內不同{col}比例')
plt.legend(title=col, bbox_to_anchor=(1.05, 1), loc='upper left')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

[
    '臺北市', '新北市',  # 最北
    '桃園市', '新竹市', '新竹縣', '宜蘭縣',  # 北部
    '苗栗縣', '臺中市', '彰化縣',  # 中部
    '嘉義市', '嘉義縣', '臺南市', '高雄市', '屏東縣',  # 南部
    '花蓮縣', '臺東縣'  # 東部
]

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns

similarity_matrix = pd.DataFrame(
    cosine_similarity(pivot_sorted.values),
    index=pivot_sorted.index, # 列索引為城市名稱
    columns=pivot_sorted.index # 欄索引為城市名稱
)

plt.figure(figsize=(10, 8))
sns.heatmap(similarity_matrix, annot=True, cmap='YlGnBu', cbar=False)
plt.title(f'城市間{col}比例相似')
plt.show()