In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import folium
import random
import numpy as np
from haversine import haversine, Unit

In [3]:
bus_df = pd.read_csv("데이터\입지후보데이터\노선_요약.csv")
subway_df = pd.read_csv("데이터\입지후보데이터\지하철_요약.csv")
cafe_df = pd.read_csv("데이터\입지후보데이터\일반음식점(카페)현황.csv", encoding='CP949')
park_df = pd.read_csv("데이터\입지후보데이터\도시공원정보현황(제공표준).csv", encoding='CP949')
trash_bin_df = pd.read_csv("데이터\입지후보데이터\경기도 성남시_쓰레기통_설치현황_20250325.csv")

In [None]:
cafe_df = cafe_df[(cafe_df['시군명'] == '성남시') & (cafe_df['영업상태명'] == '영업')]
cafe_df = cafe_df.rename(columns={'WGS84위도': '위도', 'WGS84경도': '경도', '사업장명': '이름'})

park_df = park_df[park_df['소재지지번주소'].str.startswith('경기도 성남시')].dropna(subset=['위도', '경도'])
park_df = park_df.rename(columns={'공원명': '이름'})

subway_df = subway_df.rename(columns={'역사명': '이름'})
bus_df = bus_df.dropna().rename(columns={'정류장명': '이름'})

scaler = MinMaxScaler()
cafe_df['가중치'] = scaler.fit_transform(cafe_df[['총시설규모(㎡)']])
park_df['가중치'] = scaler.fit_transform(park_df[['공원면적(㎡)']])
bus_df['혼잡도'] = bus_df['환승시간(분)'] * bus_df['노선개수']
bus_df['가중치'] = scaler.fit_transform(bus_df[['혼잡도']])
subway_df['일평균승하차인원'] = subway_df['승차총승객수'] + subway_df['하차총승객수']
subway_df['가중치'] = scaler.fit_transform(subway_df[['일평균승하차인원']])

In [None]:
bus_coords = bus_df[['위도', '경도', '가중치', '이름']].copy()
subway_coords = subway_df[['위도', '경도', '가중치', '이름']].copy()
cafe_coords = cafe_df[['위도', '경도', '가중치', '이름']].copy()
park_coords = park_df[['위도', '경도', '가중치', '이름']].copy()

all_df = pd.concat([bus_coords, subway_coords, cafe_coords, park_coords], ignore_index=True)
demand_df = all_df.copy()

In [6]:
existing_bins = set(zip(trash_bin_df['위도'].round(6), trash_bin_df['경도'].round(6)))


all_candidates_df = pd.concat([
    park_df[['위도', '경도', '이름']],
    bus_df[['위도', '경도', '이름']]
], ignore_index=True)

all_candidates_df['좌표'] = list(zip(all_candidates_df['위도'].round(6), all_candidates_df['경도'].round(6)))
new_candidates_df = all_candidates_df[~all_candidates_df['좌표'].isin(existing_bins)]

In [None]:
district_info = {
    '분당구': {'bounds': [[37.300, 127.000], [37.413, 127.170]], '유동인구': 198378136, '폐기물': 57097.45},
    '중원구': {'bounds': [[37.413, 127.120], [37.445, 127.190]], '유동인구': 62310816, '폐기물': 43988.86},
    '수정구1': {'bounds': [[37.413, 127.040], [37.468, 127.120]], '유동인구': 75331308, '폐기물': 63006.79},
    '수정구2': {'bounds': [[37.480, 127.120], [37.445, 127.190]], '유동인구': 75331308, '폐기물': 63006.79},
}

def assign_district_weight(row):
    lat, lon = row['위도'], row['경도']
    for name, info in district_info.items():
        (lat1, lon1), (lat2, lon2) = info['bounds']
        if min(lat1, lat2) <= lat <= max(lat1, lat2) and min(lon1, lon2) <= lon <= max(lon1, lon2):
            return pd.Series({
                '구역': name.replace("1", "").replace("2", ""),
                '구역_유동인구': info['유동인구'],
                '구역_폐기물': info['폐기물']
            })
    return pd.Series({'구역': None, '구역_유동인구': None, '구역_폐기물': None})

all_df[['구역', '구역_유동인구', '구역_폐기물']] = all_df.apply(assign_district_weight, axis=1)

In [None]:
delta = 0.05 

all_df['유동인구당_폐기물'] = all_df['구역_폐기물'] / all_df['구역_유동인구']

district_influence = all_df.groupby('구역')['유동인구당_폐기물'].mean().sort_values(ascending=False)

rank_adjustment = {
    district_influence.index[0]: +delta,
    district_influence.index[1]: 0,
    district_influence.index[2]: -delta,
}

all_df['조정비율'] = all_df['구역'].map(rank_adjustment)
all_df['가중치'] = all_df['가중치'] * (1 + all_df['조정비율'])


In [16]:
all_df

Unnamed: 0,위도,경도,가중치,이름,구역,구역_유동인구,구역_폐기물,유동인구당_폐기물,조정비율
0,37.411683,127.099350,0.508769,판교제2테크노밸리,분당구,198378136,57097.45,0.000288,-0.05
1,37.411333,127.099733,0.376227,판교제2테크노밸리,분당구,198378136,57097.45,0.000288,-0.05
2,37.422467,127.101233,0.101548,동산마을입구,수정구,75331308,63006.79,0.000836,0.05
3,37.422800,127.101400,0.066791,동산마을입구,수정구,75331308,63006.79,0.000836,0.05
4,37.428217,127.101650,0.397460,성남농협대왕지점.고등동우체국,수정구,75331308,63006.79,0.000836,0.05
...,...,...,...,...,...,...,...,...,...
1598,37.344885,127.118069,0.002955,하얀공원,분당구,198378136,57097.45,0.000288,-0.05
1599,37.344557,127.114146,0.004516,오리공원,분당구,198378136,57097.45,0.000288,-0.05
1600,37.337391,127.117303,0.006785,구미공원,분당구,198378136,57097.45,0.000288,-0.05
1601,37.415308,127.137469,0.001201,연꽃공원,중원구,62310816,43988.86,0.000706,0.00


In [38]:
coords = all_df[['위도', '경도']].values
weights = all_df['가중치'].values
total_weight = np.sum(weights)

candidate_coords = new_candidates_df[['위도', '경도']].values.round(6)
existing_coords = set(map(tuple, trash_bin_df[['위도', '경도']].values.round(6)))

demand_coords = coords  # alias

def get_fast_coverage(k, radius=68.8):
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=5).fit(coords)
    centroids = kmeans.cluster_centers_

    # 스냅핑
    snapped_bins = []
    for c in centroids:
        dists = np.array([haversine(tuple(c), tuple(cand), unit=Unit.METERS) for cand in candidate_coords])
        min_idx = np.argmin(dists)
        snapped = tuple(candidate_coords[min_idx])
        if snapped not in existing_coords:
            snapped_bins.append(snapped)

    # 수요 커버 여부 (벡터화 불가, 루프 최소화)
    covered_weight = 0
    for i, dp in enumerate(demand_coords):
        for sb in snapped_bins:
            if haversine(tuple(dp), sb, unit=Unit.METERS) <= radius:
                covered_weight += weights[i]
                break
    return covered_weight / total_weight, len(snapped_bins)


In [None]:
ks = range(5, 1000, 5)
coverage_ratios = []
used_bins = []
previous_ratio = 0


for k in ks:
    cov, used = get_fast_coverage(k)
    coverage_ratios.append(cov)
    used_bins.append(used)
    print(f"k={k}, coverage={cov:.4f}, bins_used={used}")
    previous_ratio = cov

# 그래프
plt.plot(ks[:len(coverage_ratios)], coverage_ratios, marker='o')
plt.title("Coverage-based Elbow (Fast Version)")
plt.xlabel("k")
plt.ylabel("Coverage Ratio")
plt.grid(True)
plt.show()


In [9]:
coords = all_df[['위도', '경도']].copy()
k = 920

kmeans = KMeans(n_clusters=k, random_state=42, n_init='auto')
kmeans.fit(coords)

centroids = kmeans.cluster_centers_




In [10]:
# 기존 쓰레기통 위치 중복 제거용
existing_bins = set(zip(trash_bin_df['위도'].round(6), trash_bin_df['경도'].round(6)))

# 근처 구역으로 스냅핑
snapped_centroids_named = []

for c_lat, c_lon in centroids:
    closest_point = None
    closest_name = None
    min_dist = float('inf')

    for _, row in new_candidates_df.iterrows():
        cand_coord = (row['위도'], row['경도'])
        dist = geodesic((c_lat, c_lon), cand_coord).meters

        if dist < min_dist:
            min_dist = dist
            closest_point = cand_coord
            closest_name = row['이름']  # 후보지 이름 (버스정류장명 or 공원명)

    if (round(closest_point[0], 6), round(closest_point[1], 6)) not in existing_bins:
        snapped_centroids_named.append((closest_point[0], closest_point[1], closest_name))

print(f"최종 설치 위치 수: {len(snapped_centroids_named)} / {len(centroids)}")


최종 설치 위치 수: 920 / 920


In [None]:
center_lat = coords['위도'].mean()
center_lon = coords['경도'].mean()
m = folium.Map(location=[center_lat, center_lon], zoom_start=13)

# 수요지 (주황색 원)
for _, row in coords.iterrows():
    folium.CircleMarker(
        location=[row['위도'], row['경도']],
        radius=2,
        color='gray',
        fill=True,
        fill_color='orange',
        fill_opacity=0.5
    ).add_to(m)

# 설치 위치
for lat, lon, name in snapped_centroids_named:
    folium.Marker(
        location=[lat, lon],
        icon=folium.Icon(color='blue', icon='trash', prefix='fa'),
        tooltip=f"설치: {name}"
    ).add_to(m)

m


In [12]:
# 1. 스냅핑된 설치 후보지 → DataFrame으로 변환
installed_bins_df = pd.DataFrame(snapped_centroids_named, columns=['위도', '경도', '이름'])

# 2. 설치 여부 열 추가
installed_bins_df['설치여부'] = 1  # 설치된 것이므로 모두 1

# 3. 저장
installed_bins_df.to_csv("k-means_installed_bins.csv", index=False, encoding="CP949")
print("CSV 저장 완료: k-means_installed_bins.csv")

CSV 저장 완료: k-means_installed_bins.csv
