In [1]:
import pandas as pd
from pulp import LpProblem, LpVariable, LpBinary, LpMinimize, lpSum
from sklearn.preprocessing import MinMaxScaler
from geopy.distance import geodesic
import folium
import numpy as np
from haversine import haversine, Unit
import matplotlib.pyplot as plt

In [2]:
bus_df = pd.read_csv("노선_요약.csv")
subway_df = pd.read_csv("지하철_요약.csv")
cafe_df = pd.read_csv("일반음식점(카페)현황.csv", encoding="CP949")
park_df = pd.read_csv("도시공원정보현황(제공표준).csv", encoding="CP949")
trash_bin_df = pd.read_csv("경기도 성남시_쓰레기통_설치현황_20250325.csv")

In [3]:
cafe_df = cafe_df[(cafe_df['시군명'] == '성남시') & (cafe_df['영업상태명'] == '영업')]
cafe_df = cafe_df.rename(columns={'WGS84위도': '위도', 'WGS84경도': '경도', '사업장명': '이름'})

park_df = park_df[park_df['소재지지번주소'].str.startswith('경기도 성남시')].dropna(subset=['위도', '경도'])
park_df = park_df.rename(columns={'공원명': '이름'})

subway_df = subway_df.rename(columns={'역사명': '이름'})
bus_df = bus_df.dropna().rename(columns={'정류장명': '이름'})

scaler = MinMaxScaler()
cafe_df['가중치'] = scaler.fit_transform(cafe_df[['총시설규모(㎡)']])
park_df['가중치'] = scaler.fit_transform(park_df[['공원면적(㎡)']])
bus_df['혼잡도'] = bus_df['환승시간(분)'] * bus_df['노선개수']
bus_df['가중치'] = scaler.fit_transform(bus_df[['혼잡도']])
subway_df['일평균승하차인원'] = subway_df['승차총승객수'] + subway_df['하차총승객수']
subway_df['가중치'] = scaler.fit_transform(subway_df[['일평균승하차인원']])

In [4]:
bus_coords = bus_df[['위도', '경도', '가중치', '이름']].copy()
subway_coords = subway_df[['위도', '경도', '가중치', '이름']].copy()
cafe_coords = cafe_df[['위도', '경도', '가중치', '이름']].copy()
park_coords = park_df[['위도', '경도', '가중치', '이름']].copy()

all_df = pd.concat([bus_coords, subway_coords, cafe_coords, park_coords], ignore_index=True)
demand_df = all_df.copy()

In [5]:
existing_bins = set(zip(trash_bin_df['위도'].round(6), trash_bin_df['경도'].round(6)))

candidate_df = pd.concat([bus_df, park_df], ignore_index=True)
all_candidates_df = pd.concat([
    park_df[['위도', '경도', '이름']],
    bus_df[['위도', '경도', '이름']]
], ignore_index=True)

all_candidates_df['좌표'] = list(zip(all_candidates_df['위도'].round(6), all_candidates_df['경도'].round(6)))
new_candidates_df = all_candidates_df[~all_candidates_df['좌표'].isin(existing_bins)]

In [6]:
district_info = {
    '분당구': {'bounds': [[37.300, 127.000], [37.413, 127.170]], '유동인구': 198378136, '폐기물': 57097.45},
    '중원구': {'bounds': [[37.413, 127.120], [37.445, 127.190]], '유동인구': 62310816, '폐기물': 43988.86},
    '수정구1': {'bounds': [[37.413, 127.040], [37.468, 127.120]], '유동인구': 75331308, '폐기물': 63006.79},
    '수정구2': {'bounds': [[37.480, 127.120], [37.445, 127.190]], '유동인구': 75331308, '폐기물': 63006.79},
}

def assign_district_weight(row):
    lat, lon = row['위도'], row['경도']
    for name, info in district_info.items():
        (lat1, lon1), (lat2, lon2) = info['bounds']
        if min(lat1, lat2) <= lat <= max(lat1, lat2) and min(lon1, lon2) <= lon <= max(lon1, lon2):
            return pd.Series({
                '구역': name.replace("1", "").replace("2", ""),
                '구역_유동인구': info['유동인구'],
                '구역_폐기물': info['폐기물']
            })
    return pd.Series({'구역': None, '구역_유동인구': None, '구역_폐기물': None})

all_df[['구역', '구역_유동인구', '구역_폐기물']] = all_df.apply(assign_district_weight, axis=1)

In [7]:
delta = 0.05 

all_df['유동인구당_폐기물'] = all_df['구역_폐기물'] / all_df['구역_유동인구']

district_influence = all_df.groupby('구역')['유동인구당_폐기물'].mean().sort_values(ascending=False)

rank_adjustment = {
    district_influence.index[0]: +delta,
    district_influence.index[1]: 0,
    district_influence.index[2]: -delta,
}

all_df['조정비율'] = all_df['구역'].map(rank_adjustment)
all_df['가중치'] = all_df['가중치'] * (1 + all_df['조정비율'])

In [8]:
all_df

Unnamed: 0,위도,경도,가중치,이름,구역,구역_유동인구,구역_폐기물,유동인구당_폐기물,조정비율
0,37.411683,127.099350,0.508769,판교제2테크노밸리,분당구,198378136,57097.45,0.000288,-0.05
1,37.411333,127.099733,0.376227,판교제2테크노밸리,분당구,198378136,57097.45,0.000288,-0.05
2,37.422467,127.101233,0.101548,동산마을입구,수정구,75331308,63006.79,0.000836,0.05
3,37.422800,127.101400,0.066791,동산마을입구,수정구,75331308,63006.79,0.000836,0.05
4,37.428217,127.101650,0.397460,성남농협대왕지점.고등동우체국,수정구,75331308,63006.79,0.000836,0.05
...,...,...,...,...,...,...,...,...,...
1598,37.344885,127.118069,0.002955,하얀공원,분당구,198378136,57097.45,0.000288,-0.05
1599,37.344557,127.114146,0.004516,오리공원,분당구,198378136,57097.45,0.000288,-0.05
1600,37.337391,127.117303,0.006785,구미공원,분당구,198378136,57097.45,0.000288,-0.05
1601,37.415308,127.137469,0.001201,연꽃공원,중원구,62310816,43988.86,0.000706,0.00


In [None]:
coords = all_df[['위도', '경도']].values
weights = all_df['가중치'].values
total_weight = np.sum(weights)

candidate_coords = new_candidates_df[['위도', '경도']].values.round(6)
existing_coords = set(map(tuple, trash_bin_df[['위도', '경도']].values.round(6)))

demand_coords = coords  # alias

# Haversine 거리 계산 (단위: 미터)
def fast_haversine(lat1, lon1, lat2, lon2):
    R = 6371000
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0)**2
    return 2 * R * np.arcsin(np.sqrt(a))

# 파라미터 및 데이터 정의
coords = all_df[['위도', '경도']].values
weights = all_df['가중치'].values
total_weight = np.sum(weights)

candidate_coords = new_candidates_df[['위도', '경도']].values.round(6)
candidate_names = new_candidates_df['이름'].values
existing_coords = set(map(tuple, trash_bin_df[['위도', '경도']].values.round(6)))
demand_coords = coords

radius = 68.8
p_max = 1500

coverage_ratios = []
p_values = []
selected_indices = []
selected_names = []

num_demand = len(demand_coords)
num_cand = len(candidate_coords)

# 거리 행렬 계산
distance_matrix = np.zeros((num_demand, num_cand))
for i in range(num_demand):
    for j in range(num_cand):
        distance_matrix[i, j] = fast_haversine(
            demand_coords[i][0], demand_coords[i][1],
            candidate_coords[j][0], candidate_coords[j][1]
        )

covered = np.zeros(num_demand, dtype=bool)
remaining = set(range(num_cand))

# Greedy 설치 위치 선택
for p in range(5, 1500):
    best_j = None
    best_gain = 0
    for j in remaining:
        gain = np.sum([
            weights[i] for i in range(num_demand)
            if not covered[i] and distance_matrix[i, j] <= radius
        ])
        if gain > best_gain:
            best_gain = gain
            best_j = j
    if best_j is None:
        break
    selected_indices.append(best_j)
    selected_names.append(candidate_names[best_j])
    for i in range(num_demand):
        if distance_matrix[i, best_j] <= radius:
            covered[i] = True
    remaining.remove(best_j)
    coverage = np.sum(weights[covered]) / total_weight
    coverage_ratios.append(coverage)
    p_values.append(p)

# 그래프 출력
plt.plot(p_values, coverage_ratios, marker='o')
plt.title("Greedy MCLP - Coverage Elbow Curve (68.8m)")
plt.xlabel("(p)")
plt.ylabel("cover_ratio")
plt.grid(True)
plt.tight_layout()
plt.show()


In [20]:
# 수요지점 및 후보지점
demand_lats = all_df['위도'].values
demand_lons = all_df['경도'].values
weights = all_df['가중치'].values
names = all_df['이름'].values
total_weight = np.sum(weights)

candidate_lats = candidate_df['위도'].values
candidate_lons = candidate_df['경도'].values
candidate_names = candidate_df['이름'].values

# 거리 행렬 (벡터화된 Haversine 또는 geodesic)
from haversine import haversine_vector, Unit

demand_coords = list(zip(demand_lats, demand_lons))
candidate_coords = list(zip(candidate_lats, candidate_lons))

distance_matrix = np.array([
    haversine_vector([dp] * len(candidate_coords), candidate_coords, unit=Unit.METERS)
    for dp in demand_coords
])

# 초기화
num_demand = len(demand_coords)
num_cand = len(candidate_coords)
coverage_radius = 68.8
p = 800

covered = np.zeros(num_demand, dtype=bool)
remaining = set(range(num_cand))
selected_indices = []
selected_names = []

# Greedy 선택 (벡터 연산으로 gain 계산)
for _ in range(p):
    best_j = None
    best_gain = 0
    best_cover = None

    for j in remaining:
        newly_covered = (distance_matrix[:, j] <= coverage_radius) & (~covered)
        gain = np.sum(weights[newly_covered])
        if gain > best_gain:
            best_gain = gain
            best_j = j
            best_cover = newly_covered

    if best_j is None or best_gain == 0:
        break

    selected_indices.append(best_j)
    selected_names.append(candidate_names[best_j])
    covered = covered | best_cover  # 벡터 논리합
    remaining.remove(best_j)

# 결과 정리
selected_coords = [(candidate_lats[j], candidate_lons[j]) for j in selected_indices]
selected_df = pd.DataFrame(selected_coords, columns=['위도', '경도'])
selected_df['이름'] = selected_names

# 커버 비율 계산
covered_ratio = np.sum(weights[covered]) / total_weight
print(f"최종 커버율: {covered_ratio:.4f} ({np.sum(covered)} / {len(covered)} 수요지 커버됨)")



최종 커버율: 0.9417 (1336 / 1603 수요지 커버됨)


In [24]:
# 지도 중심 계산
center_lat = np.mean([dp[0] for dp in demand_points])
center_lon = np.mean([dp[1] for dp in demand_points])

# 지도 초기화
m = folium.Map(location=[center_lat, center_lon], zoom_start=14)

# 수요지점 시각화
for i, (lat, lon, weight, name) in enumerate(demand_points):
    is_covered = covered[i]
    folium.CircleMarker(
        location=[lat, lon],
        radius=4 + weight * 10,  # 가중치 기반 크기
        color='black' if is_covered else 'lightgray',  # 커버 여부로 테두리 색
        fill=True,
        fill_color='orange' if is_covered else 'white',
        fill_opacity=0.8,
        tooltip=name,
        popup=f"가중치: {round(weight, 3)}"
    ).add_to(m)

# 설치된 쓰레기통 위치 표시
for idx in selected_indices:
    lat, lon, name = candidate_bins[idx]
    folium.Marker(
        location=[lat, lon],
        icon=folium.Icon(color='green', icon='trash', prefix='fa'),
        tooltip=name,
        popup=f"설치 위치: {name}"
    ).add_to(m)

# 지도 출력
m


In [25]:
# 설치된 후보지 정보 추출
selected_coords = [candidate_bins[j][:2] for j in selected_indices]
selected_names = [candidate_bins[j][2] for j in selected_indices]

# 결과 DataFrame 생성
installed_bins = pd.DataFrame(selected_coords, columns=['위도', '경도'])
installed_bins['이름'] = selected_names
installed_bins['설치여부'] = 1  # 모두 설치된 위치

# CSV로 저장 (한글 깨짐 방지: CP949)
installed_bins.to_csv("greedy_mclp_installed_bins.csv", index=False, encoding="CP949")
