### Morans I

In [None]:
import os

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
analyze_path = os.path.join(parent_dir, "utils")

os.chdir(analyze_path)

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from utils import get_grid, read_data

combined_data = read_data()
taiwan = gpd.read_file('../Data/OFiles_9e222fea-bafb-4436-9b17-10921abc6ef2/TOWN_MOI_1140318.shp')
taiwan = taiwan[(~taiwan['TOWNNAME'].isin(['旗津區', '頭城鎮', '蘭嶼鄉', '綠島鄉', '琉球鄉'])) & 
                (~taiwan['COUNTYNAME'].isin(['金門縣', '連江縣', '澎湖縣']))]

In [10]:
# save taiwan
taiwan.to_file('../ComputedData/Taiwan/taiwan.shp')

In [None]:
hex_grid = get_grid(combined_data, hex_size=0.01, threshold=-1)
taiwan = taiwan.to_crs(hex_grid.crs)
hex_grid = hex_grid[hex_grid.intersects(taiwan.unary_union)]

### Infrastructure Analyze

In [None]:
combined_data['full_infrastructure'] = combined_data['mrt_100m_count'] + combined_data['youbike_100m_count'] + combined_data['parkinglot_100m_count']

In [None]:
from utils_macro import plot_facility_vs_human_vehicle_subplot

plot_facility_vs_human_vehicle_subplot(
    combined_data,
    facilities=['youbike_100m_count', 'mrt_100m_count', 'parkinglot_100m_count', 'full_infrastructure'],
    accident_col='事故類型及型態大類別名稱',
    accident_type='人與車'
)

### Lisa plot

In [None]:
from utils_macro import LocalMoranAnalysis

lma = LocalMoranAnalysis(hex_grid, taiwan, k=6)
lma.calculate_local_moran()
lma.plot_lisa()
lma.lisa_scatter_plot()

In [8]:
lma.hex_grid.to_csv('../ComputedData/Grid/local_moran_results.csv', index=False)

### Calculate and Plot GI
This is to find the most significant hotspot in each knn

In [None]:
from utils import calculate_gi, plot_map

# 只有queen不能使用，因為沒參數可以修改
# for i in range(3000, 15000, 3000):
# for i in range(10000, 30000, 5000):
for i in range(6, 10, 1): # knn的i意義是鄰近資料
    print(i)
    grid = calculate_gi(i, hex_grid, adjacency='knn')
    c = grid[grid['hotspot'] != 'Not Significant']
    if c.shape[0] > 0:
        print('sig:', c.shape[0])

In [None]:
from utils import calculate_gi, plot_map

# grid = calculate_gi(10000, hex_grid, adjacency=None)
# grid = calculate_gi(10000, hex_grid, adjacency='queen')
grid = calculate_gi(6, hex_grid, adjacency='knn')
# grid.to_csv('../ComputedData/Grid/grid_gi.csv', index=False)

# plot_map(filtered_A2, grid.to_crs('EPSG:4326'), gi=True)

In [None]:
import matplotlib.colors as mcolors

cmap = mcolors.ListedColormap([
    '#800026',  # dark red - Hotspot 99%
    '#FC4E2A',  # red - Hotspot 95%
    '#FD8D3C',  # light red - Hotspot 90%
    '#d9d9d9',  # grey - Not Significant
    '#6baed6',  # light blue - Coldspot 90%
    '#3182bd',  # blue - Coldspot 95%
    '#08519c'   # dark blue - Coldspot 99%
])

# 照順序排
categories = [
    'Hotspot 99%', 
    'Hotspot 95%', 
    'Hotspot 90%', 
    'Not Significant', 
    'Coldspot 90%', 
    'Coldspot 95%', 
    'Coldspot 99%'
]

grid = grid.to_crs(epsg=4326)  # 把座標轉回跟 folium 一樣

fig, ax = plt.subplots(figsize=(10, 10))
taiwan.to_crs(epsg=4326).plot(ax=ax, color='white', edgecolor='black', linewidth=0.5)

grid.plot(
    column='hotspot', 
    categorical=True, 
    cmap=cmap, 
    legend=True, 
    edgecolor='grey', 
    linewidth=0.2, 
    alpha=0.6,
    ax=ax,
    categories=categories,
    legend_kwds={
        'bbox_to_anchor': (1.05, 1),
        'loc': 'upper left',
        'frameon': False
    }
)

plt.title('Hotspot Analysis (Getis-Ord Gi*) - 90%, 95%, 99% Confidence Levels')
plt.axis('off')
plt.show()

### Find the nearest county from grid

In [None]:
counties = taiwan[['COUNTYNAME', 'geometry']].copy()
hot_hex = hex_grid[hex_grid['hotspot'] != 'Not Significant'].copy()

def find_nearest_county(hexagon, counties_gdf):
    # 每個 hexagon 到所有 county 的距離
    distances = counties_gdf.distance(hexagon)
    nearest_idx = distances.idxmin()
    return counties_gdf.loc[nearest_idx, 'COUNTYNAME']

hot_hex['nearest_county'] = hot_hex['geometry'].apply(lambda x: find_nearest_county(x, counties))
hot_hex['nearest_county'].unique()

### Find the nearest county for each hexagon

In [None]:
counties = counties.to_crs("EPSG:3826")
hex_grid = hex_grid.to_crs("EPSG:3826")

hex_with_county = gpd.sjoin_nearest(hex_grid, counties[['COUNTYNAME', 'geometry']], how='left', distance_col='dist_to_county')

In [None]:
from utils_macro import hotspot_in_county
from config import category_value_map, feature_name_map, countycity_dct

hotspot_in_county(hot_hex, hex_with_county, countycity_dct, normalize=False, en=True)

主要以道路設計進行，因為天氣、車輛無法討論空間同質

In [None]:
from utils_macro import attribute_in_city

col = '事故類型及型態子類別名稱'
pivot_sorted = attribute_in_city(combined_data, hot_hex, col, countycity_dct, feature_name_map, category_value_map, en=True)

[<br/>
    '臺北市', '新北市',  # 最北 <br/>
    '桃園市', '新竹市', '新竹縣', '宜蘭縣',  # 北部<br/>
    '苗栗縣', '臺中市', '彰化縣',  # 中部<br/>
    '嘉義市', '嘉義縣', '臺南市', '高雄市', '屏東縣',  # 南部<br/>
    '花蓮縣', '臺東縣'  # 東部<br/>
]<br/>

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns

similarity_matrix = pd.DataFrame(
    cosine_similarity(pivot_sorted.values),
    index=pivot_sorted.index,
    columns=pivot_sorted.index
)

col_en = feature_name_map.get(col, col)
plt.figure(figsize=(10, 8))
sns.heatmap(similarity_matrix, annot=True, cmap='YlGnBu', cbar=False)
plt.title(f'Similarity of {col_en} proportions across cities')
plt.xticks(rotation=30)
plt.show()