In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [None]:
mrt = pd.read_csv('../ComputedData/MRT/full_mrt.csv')
youbike = pd.read_csv('../ComputedData/YouBike/full_youbike.csv')
parkinglot = pd.read_csv('../ComputedData/ParkingLot/full_parkinglot.csv')

dataA2 = pd.read_csv("../Data/Accident/A2.csv", low_memory=False)
dataA1 = pd.read_csv("../Data/Accident/A1.csv")

In [None]:
# Step 1: 轉GeoDataFrame（經度、緯度到幾何點）投影為平面坐標系
dataA1['geometry'] = [Point(xy) for xy in zip(dataA1['經度'], dataA1['緯度'])]
mrt['geometry'] = [Point(xy) for xy in zip(mrt['PositionLon'], mrt['PositionLat'])]

gdf_data = gpd.GeoDataFrame(dataA1, geometry='geometry', crs="EPSG:4326").to_crs(epsg=3826)
gdf_mrt = gpd.GeoDataFrame(mrt, geometry='geometry', crs="EPSG:4326").to_crs(epsg=3826)

print(mrt.shape, gdf_mrt.shape)

In [None]:
# Step 2: 建立500公尺的範圍
gdf_data['buffer'] = gdf_data.geometry.buffer(500)
gdf_buffer = gdf_data.set_geometry('buffer')

# Step 3: 空間連接（找出每個點的MRT）
joined = gpd.sjoin(gdf_buffer, gdf_mrt, how='left', predicate='contains')

# Step 4: 計算每個點的數量
mrt_counts = joined.groupby(joined.index).size()

print(dataA1.shape, gdf_data.shape, gdf_buffer.shape, mrt_counts.shape)

In [None]:
# Step 5: 合併回原始資料表
gdf_data['mrt_500m_count'] = gdf_data.index.map(mrt_counts).fillna(0).astype(int)
gdf_data.drop(columns=['geometry', 'buffer'], inplace=True)

gdf_data.to_csv('../ComputedData/Accident/DataA1_with_MRT_counts.csv', index=False, encoding='utf-8')

## Final

In [None]:
facilities = {
    'mrt': mrt,
    'youbike': youbike,
    'parkinglot': parkinglot
}

def Calculate(X, facility_dict, name):

    # Step 1: 轉GeoDataFrame（經度、緯度到幾何點）投影為平面坐標系
    X['geometry'] = [Point(xy) for xy in zip(X['經度'], X['緯度'])]
    gdf_data = gpd.GeoDataFrame(X, geometry='geometry', crs="EPSG:4326").to_crs(epsg=3826)

    # Step 2: 建立100公尺的範圍
    gdf_data['buffer'] = gdf_data.geometry.buffer(100)
    gdf_buffer = gdf_data.set_geometry('buffer')

    # Step 3: 每個設施資料逐一處理
    for label, facility in facility_dict.items():

        facility['geometry'] = [Point(xy) for xy in zip(facility['PositionLon'], facility['PositionLat'])]
        gdf_facility = gpd.GeoDataFrame(facility, geometry='geometry', crs="EPSG:4326").to_crs(epsg=3826)
        
        # 空間join
        joined = gpd.sjoin(gdf_buffer, gdf_facility, how='left', predicate='intersects')
        joined['index_left'] = joined.index
        valid = joined[~joined['index_right'].isna()]
        counts = valid.groupby('index_left').size().reindex(gdf_buffer.index, fill_value=0)

        # 新增欄位: 該設施在 100 公尺內的數量
        gdf_data[f'{label}_100m_count'] = gdf_data.index.map(counts).fillna(0).astype(int)

    # Step 4: 清理和儲存
    gdf_data.drop(columns=['geometry', 'buffer'], inplace=True)
    gdf_data.to_csv(f'../ComputedData/Accident/{name}.csv', index=False, encoding='utf-8')

Calculate(dataA1, facilities, 'DataA1_with_MYP')
Calculate(dataA2, facilities, 'DataA2_with_MYP')

In [None]:
dataA1 = pd.read_csv('../ComputedData/Accident/DataA1_with_MYP.csv')
dataA2 = pd.read_csv('../ComputedData/Accident/DataA2_with_MYP.csv')

print(dataA1[dataA1['parkinglot_100m_count'] == 0].shape)
print(dataA2[dataA2['parkinglot_100m_count'] == 0].shape)

print(dataA1['parkinglot_100m_count'].value_counts())
print(dataA2['parkinglot_100m_count'].value_counts())

## Incremental for Youbike

In [None]:
facilities = {
    'youbike': youbike,
}
def Calculate(X, facility_dict, name):

    X['geometry'] = [Point(xy) for xy in zip(X['經度'], X['緯度'])]
    gdf_data = gpd.GeoDataFrame(X, geometry='geometry', crs="EPSG:4326").to_crs(epsg=3826)
    for label, facility in facility_dict.items():

        facility['geometry'] = [Point(xy) for xy in zip(facility['PositionLon'], facility['PositionLat'])]
        gdf_facility = gpd.GeoDataFrame(facility, geometry='geometry', crs="EPSG:4326").to_crs(epsg=3826)

        for dist in [50, 100, 200, 300, 400]:
            gdf_data['buffer'] = gdf_data.geometry.buffer(dist)
            gdf_buffer = gdf_data.set_geometry('buffer')
            joined = gpd.sjoin(gdf_buffer, gdf_facility, how='left', predicate='intersects')
            joined['index_left'] = joined.index
            valid = joined[~joined['index_right'].isna()]
            counts = valid.groupby('index_left').size().reindex(gdf_buffer.index, fill_value=0)

            gdf_data[f'{label}_{dist}m_count'] = gdf_data.index.map(counts).fillna(0).astype(int)

    gdf_data.drop(columns=['geometry', 'buffer'], inplace=True)
    gdf_data.to_csv(f'../ComputedData/Accident/{name}.csv', index=False, encoding='utf-8')

Calculate(dataA1, facilities, 'DataA1_with_youbike')
Calculate(dataA2, facilities, 'DataA2_with_youbike')