In [13]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point


raw = pd.read_csv(r"2018_2024_지오코딩안되어있는결측치제거.csv", encoding='utf-8') 
art = pd.read_csv(r"art_data.csv", encoding='utf-8')
school = pd.read_csv(r"school_data.csv", encoding='utf-8')
subway = pd.read_csv(r"subway.csv", encoding='utf-8')
park = pd.read_csv(r"park_data.csv", encoding='utf-8')
bus_stop = pd.read_csv(r"bus_stop_data.csv", encoding='utf-8')
hospital = pd.read_csv(r"hospital_data.csv", encoding='euc-kr')
big_market = pd.read_csv(r"big_market.csv", encoding='euc-kr')

gdf_real = gpd.GeoDataFrame(
    raw,
    geometry=gpd.points_from_xy(raw.lng, raw.lat),
    crs="EPSG:4326"
)

gdf_art = gpd.GeoDataFrame(
    art,
    geometry=gpd.points_from_xy(art.lng, art.lat),
    crs="EPSG:4326"
)

gdf_school = gpd.GeoDataFrame(
    school,
    geometry=gpd.points_from_xy(school.lng, school.lat),
    crs="EPSG:4326"
)

gdf_subway = gpd.GeoDataFrame(
    subway,
    geometry=gpd.points_from_xy(subway.lng, subway.lat),
    crs="EPSG:4326"
)

gdf_park = gpd.GeoDataFrame(
    park,
    geometry=gpd.points_from_xy(park.lng, park.lat),
    crs="EPSG:4326"
)

gdf_bus_stop = gpd.GeoDataFrame(
    bus_stop,
    geometry=gpd.points_from_xy(bus_stop.lng, bus_stop.lat),
    crs="EPSG:4326"
)

gdf_hospital = gpd.GeoDataFrame(
    hospital,
    geometry=gpd.points_from_xy(hospital.lng, hospital.lat),
    crs="EPSG:4326"
)

gdf_big_market = gpd.GeoDataFrame(
    big_market,
    geometry=gpd.points_from_xy(big_market.lng, big_market.lat),
    crs="EPSG:5174"
)

gdf_real = gdf_real.to_crs(epsg=5186)
gdf_art = gdf_art.to_crs(epsg=5186)
gdf_school = gdf_school.to_crs(epsg=5186)
gdf_subway = gdf_subway.to_crs(epsg=5186)
gdf_park = gdf_park.to_crs(epsg=5186)
gdf_bus_stop = gdf_bus_stop.to_crs(epsg=5186)
gdf_hospital = gdf_hospital.to_crs(epsg=5186)
gdf_big_market = gdf_big_market.to_crs(epsg=5186)

gdf_art_buffer = gdf_art.copy()
gdf_art_buffer['geometry'] = gdf_art.geometry.buffer(3000)  # 문화시설 3km

gdf_school_buffer = gdf_school.copy()
gdf_school_buffer['geometry'] = gdf_school.geometry.buffer(500)  # 학교 500m

gdf_subway_buffer = gdf_subway.copy()
gdf_subway_buffer['geometry'] = gdf_subway.geometry.buffer(500)  # 지하철 500m

gdf_park_buffer = gdf_park.copy()
gdf_park_buffer['geometry'] = gdf_park.geometry.buffer(500)  # 공원 500m

gdf_bus_stop_buffer = gdf_bus_stop.copy()
gdf_bus_stop_buffer['geometry'] = gdf_bus_stop.geometry.buffer(500)

gdf_hospital_buffer = gdf_hospital.copy()
gdf_hospital_buffer['geometry'] = gdf_hospital.geometry.buffer(500)

gdf_big_market_buffer = gdf_big_market.copy()
gdf_big_market_buffer['geometry'] = gdf_big_market.geometry.buffer(3000)


join_art = gpd.sjoin(gdf_real, gdf_art_buffer, how='left', predicate='within')
art_flag = join_art.groupby(join_art.index)['index_right'].apply(lambda x: x.notna().any())
gdf_real['near_art'] = art_flag

join_school = gpd.sjoin(gdf_real, gdf_school_buffer, how='left', predicate='within')
school_flag = join_school.groupby(join_school.index)['index_right'].apply(lambda x: x.notna().any())
gdf_real['near_school'] = school_flag

join_subway = gpd.sjoin(gdf_real, gdf_subway_buffer, how='left', predicate='within')
subway_flag = join_subway.groupby(join_subway.index)['index_right'].apply(lambda x: x.notna().any())
gdf_real['near_subway'] = subway_flag

join_park = gpd.sjoin(gdf_real, gdf_park_buffer, how='left', predicate='within')
park_flag = join_park.groupby(join_park.index)['index_right'].apply(lambda x: x.notna().any())
gdf_real['near_park'] = park_flag

join_bus_stop = gpd.sjoin(gdf_real, gdf_bus_stop_buffer, how='left', predicate='within')
bus_stop_flag = join_bus_stop.groupby(join_bus_stop.index)['index_right'].apply(lambda x: x.notna().any())
gdf_real['near_bus_stop'] = bus_stop_flag

join_hospital = gpd.sjoin(gdf_real, gdf_hospital_buffer, how='left', predicate='within')
hospital_flag = join_hospital.groupby(join_hospital.index)['index_right'].apply(lambda x: x.notna().any())
gdf_real['near_hospital'] = hospital_flag

join_big_market = gpd.sjoin(gdf_real, gdf_big_market_buffer, how='left', predicate='within')
big_market_flag = join_big_market.groupby(join_big_market.index)['index_right'].apply(lambda x: x.notna().any())
gdf_real['near_big_market'] = big_market_flag

gdf_real['infra_count'] = gdf_real[['near_art', 'near_school', 'near_subway', 'near_park',
                                    'near_bus_stop', 'near_hospital', 'near_big_market']].sum(axis=1)


max_infra = gdf_real['infra_count'].max()

infra_max_filtered = gdf_real[
    (gdf_real['infra_count'] == max_infra)
]


final_result = infra_max_filtered.drop(columns='geometry')
final_result

Unnamed: 0,주소,건물명_x,lat,lng,near_art,near_school,near_subway,near_park,near_bus_stop,near_hospital,near_big_market,infra_count
10,서울특별시 종로구 숭인동 181-121,도시그린빌라,37.576533,127.020072,True,True,True,True,True,True,True,7
56,서울특별시 강서구 등촌동 632-2,태영,37.555792,126.864432,True,True,True,True,True,True,True,7
80,서울특별시 종로구 숭인동 766,종로청계힐스테이트,37.575785,127.020959,True,True,True,True,True,True,True,7
148,서울특별시 강동구 명일동 335-7,해밀,37.547998,127.144018,True,True,True,True,True,True,True,7
213,서울특별시 강동구 천호동 26-12,평대보민아파트,37.548913,127.142583,True,True,True,True,True,True,True,7
...,...,...,...,...,...,...,...,...,...,...,...,...
8361,서울특별시 은평구 불광동 486-13,연신내역트리플라102동,37.620342,126.920988,True,True,True,True,True,True,True,7
8483,서울특별시 성북구 보문동7가 139-1,DH팰리스타워,37.578168,127.022793,True,True,True,True,True,True,True,7
8496,서울특별시 강남구 도곡동 193-45,필로스(193-45),37.486983,127.044305,True,True,True,True,True,True,True,7
8538,서울특별시 강남구 도곡동 193-67,PLUMHILLS,37.487593,127.044125,True,True,True,True,True,True,True,7


In [None]:
import pandas as pd
import geopandas as gpd
import folium
import json
import matplotlib.pyplot as plt

df = final_result.copy()

gdf_real = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df.lng, df.lat),
    crs="EPSG:4326"
)


center_lat = gdf_real['lat'].mean()
center_lng = gdf_real['lng'].mean()
m = folium.Map(location=[center_lat, center_lng], zoom_start=12)


with open("seoul_sggg.geojson", encoding="utf-8") as f:
    seoul_geo = json.load(f)

folium.GeoJson(
    seoul_geo,
    name="Seoul Districts",
    style_function=lambda feature: {
        'fillColor': 'gray',
        'color': 'white',
        'weight': 1,
        'fillOpacity': 0.4
    }


for _, row in gdf_real.iterrows():
    folium.CircleMarker(
        location=[row['lat'], row['lng']],
        radius=1,
        color="red",
        fill=True,
        fill_opacity=0.6
    ).add_to(m)

folium.LayerControl().add_to(m)

# 지도 표시
m

In [12]:
import pandas as pd
import geopandas as gpd

raw_unique = pd.read_csv(r"2018_2024_지오코딩안되어있는결측치제거.csv", encoding='utf-8')   # 중복 제거 버전
raw_full   = pd.read_csv(r"2018_2024_결측치 제거된 로우데이터.csv", encoding='utf-8')        # 중복 포함 전체 데이터

art = pd.read_csv(r"art_data.csv", encoding='utf-8')
school = pd.read_csv(r"school_data.csv", encoding='utf-8')
subway = pd.read_csv(r"subway.csv", encoding='utf-8')
park = pd.read_csv(r"park_data.csv", encoding='utf-8')
bus_stop = pd.read_csv(r"bus_stop_data.csv", encoding='utf-8')
hospital = pd.read_csv(r"hospital_data.csv", encoding='euc-kr')
big_market = pd.read_csv(r"big_market.csv", encoding='euc-kr')


gdf_real = gpd.GeoDataFrame(
    raw_unique,
    geometry=gpd.points_from_xy(raw_unique.lng, raw_unique.lat),
    crs="EPSG:4326"
)

gdf_art = gpd.GeoDataFrame(art, geometry=gpd.points_from_xy(art.lng, art.lat), crs="EPSG:4326")
gdf_school = gpd.GeoDataFrame(school, geometry=gpd.points_from_xy(school.lng, school.lat), crs="EPSG:4326")
gdf_subway = gpd.GeoDataFrame(subway, geometry=gpd.points_from_xy(subway.lng, subway.lat), crs="EPSG:4326")
gdf_park = gpd.GeoDataFrame(park, geometry=gpd.points_from_xy(park.lng, park.lat), crs="EPSG:4326")
gdf_bus_stop = gpd.GeoDataFrame(bus_stop, geometry=gpd.points_from_xy(bus_stop.lng, bus_stop.lat), crs="EPSG:4326")
gdf_hospital = gpd.GeoDataFrame(hospital, geometry=gpd.points_from_xy(hospital.lng, hospital.lat), crs="EPSG:4326")
gdf_big_market = gpd.GeoDataFrame(big_market, geometry=gpd.points_from_xy(big_market.lng, big_market.lat), crs="EPSG:5174")

# CRS 통일
target_crs = 5186
gdf_real = gdf_real.to_crs(epsg=target_crs)
gdf_art = gdf_art.to_crs(epsg=target_crs)
gdf_school = gdf_school.to_crs(epsg=target_crs)
gdf_subway = gdf_subway.to_crs(epsg=target_crs)
gdf_park = gdf_park.to_crs(epsg=target_crs)
gdf_bus_stop = gdf_bus_stop.to_crs(epsg=target_crs)
gdf_hospital = gdf_hospital.to_crs(epsg=target_crs)
gdf_big_market = gdf_big_market.to_crs(epsg=target_crs)


gdf_art_buffer = gdf_art.copy()
gdf_art_buffer["geometry"] = gdf_art.geometry.buffer(3000)

gdf_school_buffer = gdf_school.copy()
gdf_school_buffer["geometry"] = gdf_school.geometry.buffer(500)

gdf_subway_buffer = gdf_subway.copy()
gdf_subway_buffer["geometry"] = gdf_subway.geometry.buffer(500)

gdf_park_buffer = gdf_park.copy()
gdf_park_buffer["geometry"] = gdf_park.geometry.buffer(500)

gdf_bus_stop_buffer = gdf_bus_stop.copy()
gdf_bus_stop_buffer["geometry"] = gdf_bus_stop.geometry.buffer(500)

gdf_hospital_buffer = gdf_hospital.copy()
gdf_hospital_buffer["geometry"] = gdf_hospital.geometry.buffer(500)

gdf_big_market_buffer = gdf_big_market.copy()
gdf_big_market_buffer["geometry"] = gdf_big_market.geometry.buffer(3000)


def add_flag(real, buffer, col):
    join_df = gpd.sjoin(real, buffer, how="left", predicate="within")
    flag = join_df.groupby(join_df.index)["index_right"].apply(lambda x: x.notna().any())
    real[col] = flag

add_flag(gdf_real, gdf_art_buffer, "near_art")
add_flag(gdf_real, gdf_school_buffer, "near_school")
add_flag(gdf_real, gdf_subway_buffer, "near_subway")
add_flag(gdf_real, gdf_park_buffer, "near_park")
add_flag(gdf_real, gdf_bus_stop_buffer, "near_bus_stop")
add_flag(gdf_real, gdf_hospital_buffer, "near_hospital")
add_flag(gdf_real, gdf_big_market_buffer, "near_big_market")

gdf_real["infra_count"] = gdf_real[
    ["near_art","near_school","near_subway","near_park",
     "near_bus_stop","near_hospital","near_big_market"]
].sum(axis=1)


max_infra = gdf_real["infra_count"].max()

best_addresses = gdf_real.loc[
    gdf_real["infra_count"] == max_infra, "주소"
]

raw_full_filtered = raw_full[
    (raw_full["물건금액(만원)"] >= 50000) &
    (raw_full["건물면적(㎡)"] >= 70) &
]

final_result = raw_full[ raw_full["주소"].isin(best_addresses) ]
final_result

Unnamed: 0,계약 연도,시명,자치구코드,자치구명,법정동코드,법정동명,본번,부번,주소,건물명,물건금액(만원),건물면적(㎡),평단가(만원),평수(평),층,취소일,건축년도,건물용도,lat,lng
10,2018,서울특별시,11110,종로구,17500,숭인동,181,121.0,서울특별시 종로구 숭인동 181-121,도시그린빌라,24300,57.45,1429,17,3,,1997,아파트,37.576533,127.020072
56,2018,서울특별시,11500,강서구,10200,등촌동,632,2.0,서울특별시 강서구 등촌동 632-2,태영,57500,84.98,2212,26,7,,1998,아파트,37.555792,126.864432
81,2018,서울특별시,11110,종로구,17500,숭인동,766,,서울특별시 종로구 숭인동 766,종로청계힐스테이트,78700,84.95,3027,26,9,,2009,아파트,37.575785,127.020959
155,2018,서울특별시,11740,강동구,10100,명일동,335,7.0,서울특별시 강동구 명일동 335-7,해밀,37000,69.80,1762,21,2,,2007,아파트,37.547998,127.144018
244,2018,서울특별시,11740,강동구,10900,천호동,26,12.0,서울특별시 강동구 천호동 26-12,평대보민아파트,39500,72.00,1795,22,6,,2003,아파트,37.548913,127.142583
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
376417,2023,서울특별시,11740,강동구,10200,고덕동,693,,서울특별시 강동구 고덕동 693,고덕그라시움,157000,84.24,6038,26,21,,2019,아파트,37.556177,127.159177
376445,2023,서울특별시,11680,강남구,10400,청담동,65,,서울특별시 강남구 청담동 65,진흥아파트,333500,145.69,7580,44,9,,1984,아파트,37.520020,127.054217
376457,2023,서울특별시,11290,성북구,10300,돈암동,609,1.0,서울특별시 성북구 돈암동 609-1,한신,88500,132.96,2212,40,8,,1998,아파트,37.594473,127.012706
376501,2023,서울특별시,11590,동작구,10800,대방동,501,,서울특별시 동작구 대방동 501,대림아파트,162800,134.25,3971,41,9,,1993,아파트,37.508743,126.924518


In [14]:
print(raw[["lat","lng"]].head())
print(raw.dtypes)

         lat         lng
0  37.548263  126.840981
1  37.559454  126.946520
2  37.487613  126.970501
3  37.600482  126.918435
4  37.656838  127.027578
주소        object
건물명_x     object
lat      float64
lng      float64
dtype: object
