In [10]:
import pandas as pd
from pulp import LpProblem, LpVariable, LpBinary, LpMinimize, lpSum
from sklearn.preprocessing import MinMaxScaler
from geopy.distance import geodesic
import folium
import numpy as np
from haversine import haversine, Unit

In [2]:
bus_df = pd.read_csv("데이터\입지후보데이터\노선_요약.csv")
subway_df = pd.read_csv("데이터\입지후보데이터\지하철_요약.csv")
cafe_df = pd.read_csv("데이터\입지후보데이터\일반음식점(카페)현황.csv", encoding="CP949")
park_df = pd.read_csv("데이터\입지후보데이터\도시공원정보현황(제공표준).csv", encoding="CP949")
trash_bin_df = pd.read_csv("데이터\입지후보데이터\경기도 성남시_쓰레기통_설치현황_20250325.csv")

In [3]:
cafe_df = cafe_df[(cafe_df['시군명'] == '성남시') & (cafe_df['영업상태명'] == '영업')]
cafe_df = cafe_df.rename(columns={'WGS84위도': '위도', 'WGS84경도': '경도', '사업장명': '이름'})

park_df = park_df[park_df['소재지지번주소'].str.startswith('경기도 성남시')].dropna(subset=['위도', '경도'])
park_df = park_df.rename(columns={'공원명': '이름'})

subway_df = subway_df.rename(columns={'역사명': '이름'})
bus_df = bus_df.dropna().rename(columns={'정류장명': '이름'})

scaler = MinMaxScaler()
cafe_df['가중치'] = scaler.fit_transform(cafe_df[['총시설규모(㎡)']])
park_df['가중치'] = scaler.fit_transform(park_df[['공원면적(㎡)']])
bus_df['혼잡도'] = bus_df['환승시간(분)'] * bus_df['노선개수']
bus_df['가중치'] = scaler.fit_transform(bus_df[['혼잡도']])
subway_df['일평균승하차인원'] = subway_df['승차총승객수'] + subway_df['하차총승객수']
subway_df['가중치'] = scaler.fit_transform(subway_df[['일평균승하차인원']])

In [4]:
bus_coords = bus_df[['위도', '경도', '가중치', '이름']].copy()
subway_coords = subway_df[['위도', '경도', '가중치', '이름']].copy()
cafe_coords = cafe_df[['위도', '경도', '가중치', '이름']].copy()
park_coords = park_df[['위도', '경도', '가중치', '이름']].copy()

all_df = pd.concat([bus_coords, subway_coords, cafe_coords, park_coords], ignore_index=True)
demand_df = all_df.copy()

In [5]:
existing_bins = set(zip(trash_bin_df['위도'].round(6), trash_bin_df['경도'].round(6)))


all_candidates_df = pd.concat([
    park_df[['위도', '경도', '이름']],
    bus_df[['위도', '경도', '이름']]
], ignore_index=True)

all_candidates_df['좌표'] = list(zip(all_candidates_df['위도'].round(6), all_candidates_df['경도'].round(6)))
new_candidates_df = all_candidates_df[~all_candidates_df['좌표'].isin(existing_bins)]

In [6]:
district_info = {
    '분당구': {'bounds': [[37.300, 127.000], [37.413, 127.170]], '유동인구': 198378136, '폐기물': 57097.45},
    '중원구': {'bounds': [[37.413, 127.120], [37.445, 127.190]], '유동인구': 62310816, '폐기물': 43988.86},
    '수정구1': {'bounds': [[37.413, 127.040], [37.468, 127.120]], '유동인구': 75331308, '폐기물': 63006.79},
    '수정구2': {'bounds': [[37.480, 127.120], [37.445, 127.190]], '유동인구': 75331308, '폐기물': 63006.79},
}

def assign_district_weight(row):
    lat, lon = row['위도'], row['경도']
    for name, info in district_info.items():
        (lat1, lon1), (lat2, lon2) = info['bounds']
        if min(lat1, lat2) <= lat <= max(lat1, lat2) and min(lon1, lon2) <= lon <= max(lon1, lon2):
            return pd.Series({
                '구역': name.replace("1", "").replace("2", ""),
                '구역_유동인구': info['유동인구'],
                '구역_폐기물': info['폐기물']
            })
    return pd.Series({'구역': None, '구역_유동인구': None, '구역_폐기물': None})

all_df[['구역', '구역_유동인구', '구역_폐기물']] = all_df.apply(assign_district_weight, axis=1)

In [7]:
delta = 0.05 

all_df['유동인구당_폐기물'] = all_df['구역_폐기물'] / all_df['구역_유동인구']

district_influence = all_df.groupby('구역')['유동인구당_폐기물'].mean().sort_values(ascending=False)

rank_adjustment = {
    district_influence.index[0]: +delta,
    district_influence.index[1]: 0,
    district_influence.index[2]: -delta,
}

all_df['조정비율'] = all_df['구역'].map(rank_adjustment)
all_df['가중치'] = all_df['가중치'] * (1 + all_df['조정비율'])

In [8]:
all_df

Unnamed: 0,위도,경도,가중치,이름,구역,구역_유동인구,구역_폐기물,유동인구당_폐기물,조정비율
0,37.411683,127.099350,0.508769,판교제2테크노밸리,분당구,198378136,57097.45,0.000288,-0.05
1,37.411333,127.099733,0.376227,판교제2테크노밸리,분당구,198378136,57097.45,0.000288,-0.05
2,37.422467,127.101233,0.101548,동산마을입구,수정구,75331308,63006.79,0.000836,0.05
3,37.422800,127.101400,0.066791,동산마을입구,수정구,75331308,63006.79,0.000836,0.05
4,37.428217,127.101650,0.397460,성남농협대왕지점.고등동우체국,수정구,75331308,63006.79,0.000836,0.05
...,...,...,...,...,...,...,...,...,...
1598,37.344885,127.118069,0.002955,하얀공원,분당구,198378136,57097.45,0.000288,-0.05
1599,37.344557,127.114146,0.004516,오리공원,분당구,198378136,57097.45,0.000288,-0.05
1600,37.337391,127.117303,0.006785,구미공원,분당구,198378136,57097.45,0.000288,-0.05
1601,37.415308,127.137469,0.001201,연꽃공원,중원구,62310816,43988.86,0.000706,0.00


In [16]:
from pulp import LpProblem, LpMinimize, LpVariable

prob = LpProblem("test", LpMinimize)
x = LpVariable("x", lowBound=0)
y = LpVariable("y", lowBound=0)
prob += x + y
prob += x + 2*y >= 4
status = prob.solve()
print("정상 실행 여부:", status)


정상 실행 여부: 1


In [11]:
coords = all_df[['위도', '경도']].values
weights = all_df['가중치'].values
total_weight = np.sum(weights)

candidate_coords = new_candidates_df[['위도', '경도']].values.round(6)
existing_coords = set(map(tuple, trash_bin_df[['위도', '경도']].values.round(6)))

demand_coords = coords  # alias

def find_p_for_target_coverage(target_coverage=0.8890, radius=68.8, p_max=1000):
    num_demand = len(demand_coords)
    num_cand = len(candidate_coords)

    # 거리 행렬
    distance_matrix = [
        [haversine(tuple(demand_coords[i]), tuple(candidate_coords[j]), unit=Unit.METERS)
         for j in range(num_cand)]
        for i in range(num_demand)
    ]

    for p in range(580, p_max + 1, 10):
        # 변수
        x = [LpVariable(f"x_{j}", cat=LpBinary) for j in range(num_cand)]
        z = [[LpVariable(f"z_{i}_{j}", cat=LpBinary) for j in range(num_cand)] for i in range(num_demand)]

        # 모델
        model = LpProblem("P-Median", LpMinimize)
        model += lpSum([weights[i] * distance_matrix[i][j] * z[i][j]
                        for i in range(num_demand) for j in range(num_cand)])

        for i in range(num_demand):
            model += lpSum(z[i]) == 1
            for j in range(num_cand):
                model += z[i][j] <= x[j]
        model += lpSum(x) <= p

        model.solve()

        # 커버된 수요 계산
        covered_weight = 0
        for i in range(num_demand):
            for j in range(num_cand):
                if z[i][j].varValue == 1 and distance_matrix[i][j] <= radius:
                    covered_weight += weights[i]
                    break

        coverage_ratio = covered_weight / total_weight
        print(f"p={p}, coverage={coverage_ratio:.4f}")

        if coverage_ratio >= target_coverage:
            print(f"✅ 목표 커버율 {target_coverage} 이상 달성! → 최소 p = {p}")
            return p, coverage_ratio

    print("❌ 목표 커버율을 달성할 수 없습니다.")
    return None, None



In [12]:
target_p, achieved_cov = find_p_for_target_coverage(target_coverage=0.8890)

PulpSolverError: Pulp: Error while trying to execute, use msg=True for more detailsC:\ProgramData\anaconda3\Lib\site-packages\pulp\apis\../solverdir/cbc/win/i64/cbc.exe

In [14]:
distance_matrix = [
    [geodesic(dp[:2], cb[:2]).meters for cb in new_candidates_df]
    for dp in demand_df
]

num_demands = len(demand_df)
num_candidates = len(new_candidates_df)
p = 20  # 개수


ValueError: Failed to create Point instance from string: unknown format.

In [6]:
x = [LpVariable(f"x_{j}", cat=LpBinary) for j in range(num_candidates)]
y = [[LpVariable(f"y_{i}_{j}", cat=LpBinary) for j in range(num_candidates)] for i in range(num_demands)]

model = LpProblem("P_Median", LpMinimize)
model += lpSum(distance_matrix[i][j] * y[i][j] for i in range(num_demands) for j in range(num_candidates))


In [7]:
for i in range(num_demands):
    model += lpSum(y[i][j] for j in range(num_candidates)) == 1

for i in range(num_demands):
    for j in range(num_candidates):
        model += y[i][j] <= x[j]

model += lpSum(x[j] for j in range(num_candidates)) <= p


In [8]:
model.solve()

selected_locations = []
for j in range(num_candidates):
    if x[j].varValue > 0.5:
        lat, lon, name = candidate_sample[j]
        selected_locations.append((lat, lon, name))

print(f"\n설치 위치 {len(selected_locations)}개")
for loc in selected_locations[:10]:
    print(f"설치 위치: {loc[2]} @ ({loc[0]:.5f}, {loc[1]:.5f})")



설치 위치 20개
설치 위치: 금상소공원 @ (37.44178, 127.16350)
설치 위치: 중앙시장 @ (37.44163, 127.13160)
설치 위치: 황새울공원 @ (37.38326, 127.11814)
설치 위치: 판교풍경채5단지 @ (37.36500, 127.06910)
설치 위치: SK지식산업센터 @ (37.43517, 127.17407)
설치 위치: 벌말공원 @ (37.34765, 127.11086)
설치 위치: 단대지구1호(하늘빛)공원 @ (37.45196, 127.15819)
설치 위치: I_SQUARE.창업주택 @ (37.41208, 127.09580)
설치 위치: 삼환아파트 @ (37.37335, 127.13523)
설치 위치: 한림아파트 @ (37.38850, 127.09608)


In [9]:
center_lat = sum([pt[0] for pt in demand_sample]) / len(demand_sample)
center_lon = sum([pt[1] for pt in demand_sample]) / len(demand_sample)
m = folium.Map(location=[center_lat, center_lon], zoom_start=13)

for lat, lon, _, name in demand_sample:
    folium.CircleMarker(
        location=[lat, lon],
        radius=3,
        color='gray',
        fill=True,
        fill_color='orange',
        fill_opacity=0.5,
        tooltip=name
    ).add_to(m)

for lat, lon, name in selected_locations:
    folium.Marker(
        location=[lat, lon],
        icon=folium.Icon(color='blue', icon='trash', prefix='fa'),
        tooltip=f"설치 위치: {name}"
    ).add_to(m)

m


In [10]:
selected_locations = []
selected_indices = []
for j in range(num_candidates):
    if x[j].varValue > 0.5:
        lat, lon, name = candidate_sample[j]
        selected_locations.append((lat, lon, name))
        selected_indices.append(j)

before_distances = []
after_distances = []

for i, dp in enumerate(demand_sample):
    demand_coord = dp[:2]
    
    dist_to_existing = [geodesic(demand_coord, bin_loc).meters for bin_loc in zip(trash_bin_df['위도'], trash_bin_df['경도'])]
    before_distances.append(min(dist_to_existing))
    
    dist_to_selected = [geodesic(demand_coord, candidate_sample[j][:2]).meters for j in selected_indices]
    after_distances.append(min(dist_to_selected))

import numpy as np
print(f"평균 거리 (기존): {np.mean(before_distances):.2f}m")
print(f"평균 거리 (p-Median): {np.mean(after_distances):.2f}m")
print(f"최대 거리 (기존): {np.max(before_distances):.2f}m")
print(f"최대 거리 (p-Median): {np.max(after_distances):.2f}m")


평균 거리 (기존): 1116.79m
평균 거리 (p-Median): 722.38m
최대 거리 (기존): 6436.12m
최대 거리 (p-Median): 2506.21m
