### Morans I

In [None]:
import os

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
analyze_path = os.path.join(parent_dir, "utils")

os.chdir(analyze_path)

In [None]:
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from utils import get_grid, read_data, read_taiwan_specific

combined_data = read_data()
taiwan, grid_filter = read_taiwan_specific()
taiwan.to_file('../ComputedDataV2/Taiwan/taiwan.shp')

# find combined_data which are in taiwan
# combined_data_gdf = gpd.GeoDataFrame(combined_data, 
#                                      geometry=gpd.points_from_xy(combined_data['經度'], 
#                                                                  combined_data['緯度']), crs='EPSG:4326')
# combined_data_gdf.to_crs(epsg=3826, inplace=True)
# combined_data_in_taiwan = gpd.sjoin(combined_data_gdf, taiwan, how='inner', predicate='within')
# combined_data_in_taiwan.to_csv('../ComputedDataV2/Accident/combined_data_in_taiwan.csv', index=False)

# combined_data = pd.read_csv('../ComputedDataV2/Accident/combined_data_in_taiwan.csv')

In [None]:
hex_grid = get_grid(combined_data, hex_size=0.01, threshold=-1)
taiwan = taiwan.to_crs(hex_grid.crs)
hex_grid = hex_grid[hex_grid.intersects(taiwan.unary_union)]

### Infrastructure Analyze

In [None]:
combined_data['full_infrastructure'] = combined_data['mrt_100m_count'] + combined_data['youbike_100m_count'] + combined_data['parkinglot_100m_count']

In [None]:
from utils_macro import plot_facility_vs_human_vehicle_subplot

plot_facility_vs_human_vehicle_subplot(
    combined_data,
    facilities=['youbike_100m_count', 'mrt_100m_count', 'parkinglot_100m_count', 'full_infrastructure'],
    accident_col='事故類型及型態大類別名稱',
    accident_type='人與車'
)

### Lisa plot

In [None]:
from utils_macro import LocalMoranAnalysis

lma = LocalMoranAnalysis(hex_grid, taiwan, k=6)
lma.calculate_local_moran()
lma.plot_lisa()
lma.lisa_scatter_plot()

In [None]:
lma.hex_grid.to_csv('../ComputedDataV2/Grid/local_moran_results.csv', index=False)

### Calculate and Plot GI

In [None]:
from utils import calculate_gi, plot_map

grid = calculate_gi(6, hex_grid, adjacency='knn')
grid.to_csv('../ComputedDataV2/Grid/grid_gi.csv', index=False)

# plot_map(filtered_A2, grid.to_crs('EPSG:4326'), gi=True)

In [None]:
from utils_macro import plot_gi_map
plot_gi_map(grid, taiwan)

### Find the nearest county from grid

In [None]:
counties = taiwan[['COUNTYNAME', 'geometry']].copy()
hot_hex = hex_grid[hex_grid['hotspot'] != 'Not Significant'].copy()

def find_nearest_county(hexagon, counties_gdf):
    # 每個 hexagon 到所有 county 的距離
    distances = counties_gdf.distance(hexagon)
    nearest_idx = distances.idxmin()
    return counties_gdf.loc[nearest_idx, 'COUNTYNAME']

hot_hex['nearest_county'] = hot_hex['geometry'].apply(lambda x: find_nearest_county(x, counties))
hot_hex['nearest_county'].unique()

### Find the nearest county for each hexagon

In [None]:
hex_with_county = gpd.sjoin_nearest(hex_grid, counties[['COUNTYNAME', 'geometry']], how='left', distance_col='dist_to_county')
# hex_with_county.to_csv('../ComputedDataV2/Grid/hex_grid_with_county.csv', index=False)

In [None]:
from utils_macro import hotspot_in_county
from config import category_value_map, feature_name_map, countycity_dct

hotspot_in_county(hot_hex, hex_with_county, countycity_dct, normalize=False, en=True)

主要以道路設計進行，因為天氣、車輛無法討論空間同質

In [None]:
from utils_macro import attribute_in_city

col = '事故類型及型態子類別名稱'
pivot_sorted = attribute_in_city(combined_data, hot_hex, col, countycity_dct, feature_name_map, category_value_map, en=True)

[<br/>
    '臺北市', '新北市',  # 最北 <br/>
    '桃園市', '新竹市', '新竹縣', '宜蘭縣',  # 北部<br/>
    '苗栗縣', '臺中市', '彰化縣',  # 中部<br/>
    '嘉義市', '嘉義縣', '臺南市', '高雄市', '屏東縣',  # 南部<br/>
    '花蓮縣', '臺東縣'  # 東部<br/>
]<br/>

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import seaborn as sns

similarity_matrix = pd.DataFrame(
    cosine_similarity(pivot_sorted.values),
    index=pivot_sorted.index,
    columns=pivot_sorted.index
)

col_en = feature_name_map.get(col, col)
plt.figure(figsize=(10, 8))
sns.heatmap(similarity_matrix, annot=True, cmap='YlGnBu', cbar=False)
plt.title(f'Similarity of {col_en} proportions across cities')
plt.xticks(rotation=30)
plt.show()