In [1]:
import folium
import pandas as pd
from shapely.geometry import Point, MultiPoint, Polygon
import geopandas as gpd
from geopy.distance import geodesic

In [2]:
cluster_df = pd.read_csv("clustering_result.csv")
cluster_df = cluster_df[cluster_df['cluster'] != -1].copy()
print(cluster_df.shape)
cluster_df.head()

(4396, 6)


Unnamed: 0,centroid_lat,centroid_lon,pred,cluster,pred_sum,pred_rank
0,37.561427,126.806755,0,0,14.0,23.0
1,37.562328,126.806729,0,0,14.0,23.0
2,37.563228,126.806702,1,0,14.0,23.0
3,37.564129,126.806676,0,0,14.0,23.0
4,37.561448,126.807887,0,0,14.0,23.0


In [3]:
cluster_df['geometry'] = cluster_df.apply(lambda row: Point(row['centroid_lon'], row['centroid_lat']), axis=1)

# GeoDataFrame 변환
gdf = gpd.GeoDataFrame(cluster_df, geometry='geometry')
# 클러스터별 다각형 만들기
cluster_polygons = gdf.groupby('cluster')['geometry'].apply(lambda points: MultiPoint(list(points)).convex_hull).reset_index()
cluster_polygons = gpd.GeoDataFrame(cluster_polygons, geometry='geometry')

# 각 클러스터의 pred_rank 병합
rank_df = cluster_df[['cluster', 'pred_rank']].drop_duplicates(subset='cluster')
cluster_polygons = cluster_polygons.merge(rank_df, on='cluster')

## 견인 위험 군집 시각화

In [13]:
# 지도 중심은 임의 지정
m = folium.Map(location=[37.561427, 126.806755], zoom_start=12)


for _, row in cluster_df.iterrows():
    color = 'red' if row['pred'] == 2 else 'yellow'
    folium.CircleMarker(
        location=[row['centroid_lat'], row['centroid_lon']],
        radius=2,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.8,
        tooltip=f"Risk: {row['pred']}, Cluster: {row['cluster']}"
    ).add_to(m)

for _, row in cluster_polygons.iterrows():
    folium.GeoJson(
        row['geometry'],
        style_function=lambda x: {
            'fillColor': 'blue',
            'color': 'black',
            'weight': 2,
            'fillOpacity': 0.3
        },
        tooltip=folium.Tooltip(f"Cluster {row['cluster']}<br>pred_rank: {row['pred_rank']}")
    ).add_to(m)
    
# 지도 출력
m

In [14]:
m.save("cluster_map.html")

## 테스트

In [6]:
def create_square_polygon(lat, lon, side_length_m):
    north = geodesic(meters=side_length_m).destination((lat, lon), 0)  
    south = geodesic(meters=side_length_m).destination((lat, lon), 180) 
    east = geodesic(meters=side_length_m).destination((lat, lon), 90)  
    west = geodesic(meters=side_length_m).destination((lat, lon), 270)

    # 100x100 폴리곤
    polygon = Polygon([
        (north.longitude, north.latitude),
        (south.longitude, south.latitude),
        (east.longitude, east.latitude),
        (west.longitude, west.latitude)
    ])

    return polygon


def check_intersection(row, cluster_polygons):
    # 각 폴리곤과 교차 여부 확인
    for _, poly in cluster_polygons.iterrows():
        if row['geometry'].intersects(poly['geometry']):
            return True
    return False


In [7]:
df = pd.read_csv("data/seoul_preprocessed.csv")
cols = ['SIG_KOR_NM','centroid_lat', 'centroid_lon', 'tow_count']
exclude_list = ['강남구', '마포구', '서초구', '영등포구'] # test 지역만
df = df[~df['SIG_KOR_NM'].isin(exclude_list)].copy()
print(df.shape)
df.head()

(34099, 31)


Unnamed: 0,SIG_KOR_NM,centroid_lat,centroid_lon,isSchool,rental_count,return_count,apart,closest_hospital_dist,closest_convenience_dist,closest_culture_dist,...,subway_avg_board,subway_avg_alight,area20,area30,area40,wp_area20,wp_area30,wp_area40,isCommercial,tow_count
0,강서구,37.555328,126.765147,0,0.0,0.0,0.0,7.955399,8.020145,8.518302,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
1,강서구,37.555768,126.765465,0,0.0,0.0,0.0,7.941459,8.007402,8.508414,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
2,강서구,37.555308,126.766217,0,0.0,0.0,0.0,7.923215,7.989793,8.502376,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
3,강서구,37.555945,126.766239,0,0.0,0.0,0.0,7.915868,7.98349,8.494468,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0
4,강서구,37.5526,126.768528,0,0.0,0.0,0.0,7.889078,7.911432,8.502308,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0


In [8]:
df_filtered = df[df['tow_count'] >= 1].copy() # 견인이 한번이라도 되었던 구역
df_filtered['geometry'] = [create_square_polygon(lat, lon, 100) for lat, lon in zip(df_filtered['centroid_lat'], df_filtered['centroid_lon'])]
gdf_df = gpd.GeoDataFrame(df_filtered, geometry='geometry', crs='EPSG:4326')

# Point 변환
geometry = [Point(lon, lat) for lon, lat in zip(df_filtered['centroid_lon'], df_filtered['centroid_lat'])]
gdf_points = gpd.GeoDataFrame(df_filtered, geometry=geometry, crs='EPSG:4326')

In [9]:
gdf_df['in_polygon'] = gdf_df.apply(lambda row: check_intersection(row, cluster_polygons), axis=1)
gdf_df = gdf_df[gdf_df['tow_count'] >= 1]  # tow_count가 1 이상인 데이터만 필터링


# 폴리곤 내/외부에 있는 실제 견인데이터 개수
green_count = gdf_df[gdf_df['in_polygon'] == True].shape[0]
red_count = gdf_df[gdf_df['in_polygon'] == False].shape[0]

print(f"폴리곤 안에 있는 견인 수: {green_count}")
print(f"폴리곤 밖에 있는 견인 수: {red_count}")

폴리곤 안에 있는 견인 수: 2334
폴리곤 밖에 있는 견인 수: 4553


In [15]:
center_lat = gdf_df['centroid_lat'].mean()
center_lon = gdf_df['centroid_lon'].mean()
m = folium.Map(location=[center_lat, center_lon], zoom_start=13)

for _, row in gdf_df.iterrows():
    if row['in_polygon']:
        color = 'red' 
    else:
        continue
    folium.CircleMarker(
        location=[row['centroid_lat'], row['centroid_lon']],
        radius=1,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.8
    ).add_to(m)

for _, row in cluster_polygons.iterrows():
    folium.GeoJson(row['geometry'], style_function=lambda x: {'fillColor': 'blue', 'color': 'blue', 'weight': 1, 'fillOpacity': 0.1}).add_to(m)

m

In [16]:
m.save("cluster_map2.html")