# Micro

### Morans I

In [4]:
import os

current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
analyze_path = os.path.join(parent_dir, "utils")

os.chdir(analyze_path)

In [5]:
import pandas as pd
import geopandas as gpd

from utils import specific_polygon, plot_hex_grid, get_isa_plot, calculate_gi, plot_gi

In [None]:
dataA1 = pd.read_csv('../ComputedData/Accident/DataA1_with_MYP.csv')
dataA2 = pd.read_csv('../ComputedData/Accident/DataA2_with_MYP.csv')
taiwan = gpd.read_file('../Data/OFiles_9e222fea-bafb-4436-9b17-10921abc6ef2/TOWN_MOI_1140318.shp')
taiwan = taiwan[(~taiwan['TOWNNAME'].isin(['旗津區', '頭城鎮', '蘭嶼鄉', '綠島鄉', '琉球鄉'])) & 
                (~taiwan['COUNTYNAME'].isin(['金門縣', '連江縣', '澎湖縣']))]

filtered_A2 = dataA2[dataA2['當事者順位'] == 1]
filtered_A1 = dataA1[dataA1['當事者順位'] == 1]

filtered_A1['source'] = 'A1'
filtered_A2['source'] = 'A2'
filtered_A1['num_accidents'] = 1 
filtered_A2['num_accidents'] = 1
combined_data = pd.concat([filtered_A1, filtered_A2], ignore_index=True)

### deprecated

In [None]:
# 特定區域的polygon
specific_A2, taiwan_specific = specific_polygon(filtered_A2, taiwan, ['臺北市', '新北市', '桃園市'])
# 對特定區域以及grid做圖
hex_grid = plot_hex_grid(specific_A2, taiwan_specific.to_crs(epsg=3826), threshold=-1, hex_size=0.001)
# 計算ISA以及做圖
best_distance = get_isa_plot(specific_A2, -1)
# 依照最佳距離計算gi
grid = calculate_gi(best_distance, hex_grid)
plot_gi(taiwan_specific, grid)

### new

In [None]:
# 特定區域的polygon

# area = ['臺北市', '新北市', '桃園市', '新竹市', '新竹縣']
# area = ['臺中市', '彰化縣', '雲林縣', '苗栗縣']
# area = ['高雄市', '臺南市', '屏東縣', '嘉義市', '嘉義縣']
area = ['花蓮縣', '臺東縣', '宜蘭縣']
# area = ['臺北市', '新北市']

specific, taiwan_specific = specific_polygon(combined_data, taiwan, area)
# 對特定區域以及grid做圖
hex_grid = plot_hex_grid(specific, taiwan_specific.to_crs(epsg=3826), threshold=-1, hex_size=0.01)
# hex_grid.to_file('../ComputedData/Grid/micro_north_hs.geojson', driver='GeoJSON')

## Lisa plot

In [None]:
from utils_lisa import LocalMoranAnalysis

analysis = LocalMoranAnalysis(hex_grid, taiwan_specific, k=6)
analysis.calculate_local_moran()
analysis.plot_lisa()
analysis.lisa_scatter_plot()
# analysis.plot_lisa_folium()

## Best Gestis-Ord

In [None]:
results = []  # 用來存每個距離的結果

# for i in range(1000, 10000, 1000):
for i in range(6, 10, 1):
    print(i)
    hex_grid = calculate_gi(i, hex_grid, adjacency='knn')
    c = hex_grid[hex_grid['hotspot'] != 'Not Significant']
    sig_count = c.shape[0]  # 顯著點的數量
    
    # 把結果存起來
    results.append((i, sig_count))

print(results)

best_distance, max_sig_count = max(results, key=lambda x: x[1])

## Getis-Ord

In [None]:
grid = calculate_gi(6, hex_grid, adjacency='knn')

# plot_gi(taiwan_specific.to_crs(epsg=4326), grid.to_crs(epsg=4326))
# grid.to_file('../ComputedData/Grid/micro_north_gi.geojson', driver='GeoJSON')

### Add Infrastructure on the map

In [None]:
MRT_full = pd.read_csv('../ComputedData/MRT/full_mrt.csv')
Youbike_full = pd.read_csv('../ComputedData/Youbike/full_youbike.csv')
Parkinglot_full = pd.read_csv('../ComputedData/Parkinglot/full_parkinglot.csv')


def group_infrastructure_count(df, hex_grid):
    gdf = gpd.GeoDataFrame(
        df,
        geometry=gpd.points_from_xy(df['PositionLon'], df['PositionLat']),
        crs='EPSG:4326'
    )
    gdf = gdf.to_crs(hex_grid.crs)
    joined = gpd.sjoin(gdf, hex_grid, how='left', predicate='within')
    mrt_count = joined.groupby('index_right').size()
    
    return mrt_count

In [None]:
mrt_count = group_infrastructure_count(MRT_full, hex_grid)
youbike_count = group_infrastructure_count(Youbike_full, hex_grid)
parkinglot_count = group_infrastructure_count(Parkinglot_full, hex_grid)

hex_grid['mrt_count'] = hex_grid.index.map(mrt_count).fillna(0).astype(int)
hex_grid['youbike_count'] = hex_grid.index.map(youbike_count).fillna(0).astype(int)
hex_grid['parkinglot_count'] = hex_grid.index.map(parkinglot_count).fillna(0).astype(int)
hex_grid['full_infrastructure'] = hex_grid['mrt_count'] + hex_grid['youbike_count'] + hex_grid['parkinglot_count']

In [None]:
from utils import plot_map
plot_map(specific, hex_grid.to_crs(epsg=4326), gi=True, count='full_infrastructure')

### 觀察設施的多寡是否和事故的數量有相關

In [None]:
from utils_infrastructure import scatter_with_regression, scatter_with_spearman

scatter_with_regression(hex_grid, 'num_accidents', 'full_infrastructure')
scatter_with_spearman(hex_grid, 'num_accidents', 'full_infrastructure')

### 觀察全部設施是否和人與車有相關

In [None]:
combined_gdf = gpd.GeoDataFrame(
    combined_data,
    geometry=gpd.points_from_xy(combined_data['經度'], combined_data['緯度']),
    crs='EPSG:4326'
)
combined_gdf = combined_gdf.to_crs(hex_grid.crs)
joined = gpd.sjoin(combined_gdf, hex_grid, how='left', predicate='within')

feature = '事故類型及型態大類別名稱'
target = '人與車'

In [None]:
def calculate(group, feature, ratio=False):

    count = len(group[group[feature] == target])

    if ratio:
        total = len(group)
        if total == 0:
            return 0
        return count / total
    else:
        return count
    
def calculate_most_common(group):
    # 計算每個類別的出現次數
    most_common = group[feature].value_counts().idxmax()  # 找到出現次數最多的類別
    return most_common

def calculate_average(group, feature):
    if len(group) > 0:
        return group[feature].mean()
    else:
        return 0

In [None]:
data_feature = joined.groupby('index_right').apply(lambda group: calculate(group, feature, ratio=True))
hex_grid['human_vehicle_total'] = hex_grid.index.map(data_feature).fillna(0)

# most_common_feature = joined.groupby('index_right').apply(calculate_most_common)
# hex_grid['most_common_feature'] = hex_grid.index.map(most_common_feature).fillna('未知')

# data_feature = joined.groupby('index_right').apply(lambda group: calculate_average(group, '速限-第1當事者'))
# hex_grid['average_speed_limit'] = hex_grid.index.map(data_feature).fillna(0)

In [None]:
scatter_with_regression(hex_grid, 'human_vehicle_total', 'full_infrastructure')
scatter_with_spearman(hex_grid, 'human_vehicle_total', 'full_infrastructure')

In [None]:
hex_grid['new_hotspot'] = hex_grid['hotspot'].apply(lambda x: 'Hotspot' if x != 'Not Significant' else 'Not Significant')
hex_grid['new_full_infrastructure'] = hex_grid['full_infrastructure'].apply(lambda x: 'Include Infrastructure' if x > 0 else 'No Infrastructure')
hex_grid['type'] = hex_grid['new_hotspot'] + '/' + hex_grid['new_full_infrastructure']

In [None]:
import json
import folium

def plot_map_type(data, grid):
    grid = grid.copy()
    grid = grid.drop(columns=['centroid'], errors='ignore')
    grid_json = json.loads(grid.to_json())

    # 地圖中心點
    center = [data['緯度'].mean(), data['經度'].mean()]

    # 英文版底圖
    m = folium.Map(
        location=center, 
        zoom_start=10, 
        tiles='https://server.arcgisonline.com/ArcGIS/rest/services/World_Street_Map/MapServer/tile/{z}/{y}/{x}',
        attr='Esri'
    )

    # 定義四種類型的顏色
    def get_color(type_value):
        if type_value == 'Hotspot/Include Infrastructure':
            return "#e44587"  # 紅色 - 熱點且有基礎設施
        elif type_value == 'Hotspot/No Infrastructure':
            return "#e493b5"  # 淺紅色 - 熱點但無基礎設施
        elif type_value == 'Not Significant/Include Infrastructure':
            return "#50e2cf"  # 淺藍色 - 非顯著但有基礎設施
        else:  # 'Not Significant/No Infrastructure'
            return "#b8e2dd"  # 灰色 - 非顯著且無基礎設施

    # 加入格網
    folium.GeoJson(
        grid_json,
        style_function=lambda feature: {
            'fillColor': get_color(feature['properties']['type']),
            'color': 'grey',
            'weight': 0.5,
            'fillOpacity': 0.7
        },
        tooltip=folium.GeoJsonTooltip(
            fields=['type', 'num_accidents', 'full_infrastructure'],
            aliases=['Type:', 'Accidents:', 'Infrastructure:'],
            localize=True
        )
    ).add_to(m)

    # 添加圖例
    legend_html = '''
    <div style="position: fixed; 
                bottom: 50px; left: 50px; width: 280px; height: 160px; 
                background-color: white; border:2px solid grey; z-index:9999; 
                font-size:14px; padding: 10px">
    <h4>Legend</h4>
    <p><i class="fa fa-square" style="color:#e44587"></i> Hotspot/Include Infrastructure</p>
    <p><i class="fa fa-square" style="color:#e493b5"></i> Hotspot/No Infrastructure</p>
    <p><i class="fa fa-square" style="color:#50e2cf"></i> Not Significant/Include Infrastructure</p>
    <p><i class="fa fa-square" style="color:#b8e2dd"></i> Not Significant/No Infrastructure</p>
    </div>
    '''
    m.get_root().html.add_child(folium.Element(legend_html))

    return m

map_result = plot_map_type(specific, hex_grid.to_crs(epsg=4326))
map_result

## 各縣市人車事故比

In [None]:
combined_gdf = gpd.GeoDataFrame(
    combined_data,
    geometry=gpd.points_from_xy(combined_data['經度'], combined_data['緯度']),
    crs='EPSG:4326'
)

# 確保兩個 GeoDataFrame 使用相同的坐標系統
taiwan_crs = taiwan.to_crs('EPSG:4326')
combined_gdf_crs = combined_gdf.to_crs('EPSG:4326')

# 進行空間連接，將事故點與縣市邊界配對
joined = gpd.sjoin(combined_gdf_crs, taiwan_crs, how='left', predicate='within')

In [None]:
# 將縣市資訊加回原始資料
combined_data['縣市'] = joined['COUNTYNAME']

# 計算各縣市的人車事故比
city_human_vehicle_ratio = {}

cities = ['臺北市', '新北市', '臺中市', '高雄市', '花蓮縣', '臺東縣']

for city in cities:
    city_data = combined_data[combined_data['縣市'] == city]
    if len(city_data) > 0:
        human_vehicle_count = len(city_data[city_data['事故類型及型態大類別名稱'] == '人與車'])
        total_count = len(city_data)
        ratio = human_vehicle_count / total_count
        city_human_vehicle_ratio[city] = ratio
        print(f"{city}: {total_count} 件事故, 人車事故比例: {ratio:.3f}")
    else:
        city_human_vehicle_ratio[city] = 0
        print(f"{city}: 0 件事故")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# 轉換為 DataFrame
ratio_df = pd.DataFrame(list(city_human_vehicle_ratio.items()), 
                       columns=['City', 'Human_Vehicle_Ratio'])

# 只保留有資料的縣市
ratio_df = ratio_df[ratio_df['Human_Vehicle_Ratio'] > 0]
ratio_df = ratio_df.sort_values('Human_Vehicle_Ratio', ascending=False)

plt.figure(figsize=(14, 8))
bars = plt.bar(ratio_df['City'], ratio_df['Human_Vehicle_Ratio'], 
               color='steelblue', alpha=0.7)

average_ratio = ratio_df['Human_Vehicle_Ratio'].mean()
plt.axhline(y=average_ratio, color="#6060cf", linestyle='--', linewidth=2, 
            label=f'平均值: {average_ratio:.3f}')

plt.text(len(ratio_df) - 1, average_ratio + 0.003, 
         f'Avg: {average_ratio:.3f}', 
         color='#6060cf', fontweight='bold', fontsize=12)

plt.rcParams['font.family'] = ['Microsoft JhengHei'] 
plt.title('各縣市人車事故比例', fontsize=16, fontweight='bold')
plt.xlabel('縣市', fontsize=12)
plt.ylabel('人車事故比例', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()