In [75]:
import folium
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import box
import numpy as np

In [76]:
import matplotlib.font_manager as fm

# 한국어 출력을 위한 폰트 설정
plt.rc('font', family='NanumGothic') 

# 마이너스 기호가 깨지는 것을 방지
plt.rcParams['axes.unicode_minus'] = False

In [77]:
df = gpd.read_file("data/seoul_utf_8.shp", encoding='utf-8')

In [78]:
df

Unnamed: 0,pop,total_b,res_single,res_multi,b_over_20,EMD_CD,EMD_KOR_NM,geometry
0,364.0,13.0,3.0,4.0,,11560111,당산동1가,"POLYGON ((126.89910 37.51987, 126.89909 37.520..."
1,448.0,21.0,3.0,18.0,16.0,11590108,대방동,"POLYGON ((126.92866 37.50109, 126.92866 37.501..."
2,448.0,21.0,3.0,18.0,16.0,11590102,상도동,"POLYGON ((126.92866 37.50109, 126.92866 37.501..."
3,426.0,2.0,,2.0,,11710109,장지동,"POLYGON ((127.13469 37.47576, 127.13469 37.476..."
4,215.0,10.0,1.0,6.0,,11470103,신월동,"POLYGON ((126.84134 37.52497, 126.84133 37.525..."
...,...,...,...,...,...,...,...,...
73889,,,,,,11350102,월계동,"POLYGON ((127.06374 37.61794, 127.06374 37.618..."
73890,,,,,,11650109,내곡동,"POLYGON ((127.06813 37.44669, 127.06813 37.447..."
73891,,8.0,,,,11680104,청담동,"POLYGON ((127.04051 37.52681, 127.04050 37.527..."
73892,,8.0,,,,11680107,신사동,"POLYGON ((127.04051 37.52681, 127.04050 37.527..."


In [None]:
df.info()

In [None]:
df['geometry'].nunique()

In [66]:
# 중복된 'geometry'를 가지는 모든 행 찾기
duplicates = df[df.duplicated('geometry', keep=False)]

# 중복된 'geometry'를 가지는 행들을 정렬하여 보기 쉽게 만들기
# 여기서는 'geometry' 열을 기준으로 정렬합니다. 다른 열을 기준으로 정렬하려면 해당 열 이름을 사용하세요.
duplicates_sorted = duplicates.sort_values(by='geometry')

In [67]:
duplicates_sorted

Unnamed: 0,pop,total_b,res_single,res_multi,b_over_20,EMD_CD,EMD_KOR_NM,geometry
40190,,1.0,,,,11530109,궁동,"POLYGON ((126.82691 37.49334, 126.82690 37.494..."
40189,,1.0,,,,11530110,온수동,"POLYGON ((126.82691 37.49334, 126.82690 37.494..."
60218,,,,,,11530109,궁동,"POLYGON ((126.82692 37.49244, 126.82691 37.493..."
60216,,,,,,11530110,온수동,"POLYGON ((126.82692 37.49244, 126.82691 37.493..."
60217,,,,,,11530108,오류동,"POLYGON ((126.82692 37.49244, 126.82691 37.493..."
...,...,...,...,...,...,...,...,...
19469,11.0,3.0,2.0,,,11710113,거여동,"POLYGON ((127.15725 37.48935, 127.15725 37.490..."
778,122.0,13.0,5.0,4.0,,11710114,마천동,"POLYGON ((127.15612 37.49025, 127.15611 37.491..."
777,122.0,13.0,5.0,4.0,,11710113,거여동,"POLYGON ((127.15612 37.49025, 127.15611 37.491..."
13715,201.0,28.0,11.0,10.0,26.0,11710114,마천동,"POLYGON ((127.15725 37.49025, 127.15724 37.491..."


In [68]:
# 'geometry'를 기준으로 중복된 항목 중 첫 번째만 유지하고 나머지는 제거
gdf = df.drop_duplicates(subset='geometry', keep='first')

In [69]:
gdf

Unnamed: 0,pop,total_b,res_single,res_multi,b_over_20,EMD_CD,EMD_KOR_NM,geometry
0,364.0,13.0,3.0,4.0,,11560111,당산동1가,"POLYGON ((126.89910 37.51987, 126.89909 37.520..."
1,448.0,21.0,3.0,18.0,16.0,11590108,대방동,"POLYGON ((126.92866 37.50109, 126.92866 37.501..."
3,426.0,2.0,,2.0,,11710109,장지동,"POLYGON ((127.13469 37.47576, 127.13469 37.476..."
4,215.0,10.0,1.0,6.0,,11470103,신월동,"POLYGON ((126.84134 37.52497, 126.84133 37.525..."
5,52.0,17.0,3.0,,13.0,11680105,삼성동,"POLYGON ((127.04400 37.51060, 127.04400 37.511..."
...,...,...,...,...,...,...,...,...
73887,,,,,,11350105,상계동,"POLYGON ((127.08835 37.67661, 127.08834 37.677..."
73888,,,,,,11500109,방화동,"POLYGON ((126.81818 37.58252, 126.81817 37.583..."
73889,,,,,,11350102,월계동,"POLYGON ((127.06374 37.61794, 127.06374 37.618..."
73890,,,,,,11650109,내곡동,"POLYGON ((127.06813 37.44669, 127.06813 37.447..."


In [70]:
gdf['geometry'].nunique()

61647

In [71]:
gdf.reset_index(drop=True, inplace=True)

In [72]:
gdf.insert(0, 'id', gdf.index + 1)

In [73]:
gdf

Unnamed: 0,id,pop,total_b,res_single,res_multi,b_over_20,EMD_CD,EMD_KOR_NM,geometry
0,1,364.0,13.0,3.0,4.0,,11560111,당산동1가,"POLYGON ((126.89910 37.51987, 126.89909 37.520..."
1,2,448.0,21.0,3.0,18.0,16.0,11590108,대방동,"POLYGON ((126.92866 37.50109, 126.92866 37.501..."
2,3,426.0,2.0,,2.0,,11710109,장지동,"POLYGON ((127.13469 37.47576, 127.13469 37.476..."
3,4,215.0,10.0,1.0,6.0,,11470103,신월동,"POLYGON ((126.84134 37.52497, 126.84133 37.525..."
4,5,52.0,17.0,3.0,,13.0,11680105,삼성동,"POLYGON ((127.04400 37.51060, 127.04400 37.511..."
...,...,...,...,...,...,...,...,...,...
61642,61643,,,,,,11350105,상계동,"POLYGON ((127.08835 37.67661, 127.08834 37.677..."
61643,61644,,,,,,11500109,방화동,"POLYGON ((126.81818 37.58252, 126.81817 37.583..."
61644,61645,,,,,,11350102,월계동,"POLYGON ((127.06374 37.61794, 127.06374 37.618..."
61645,61646,,,,,,11650109,내곡동,"POLYGON ((127.06813 37.44669, 127.06813 37.447..."


In [74]:
gdf.to_file("data/seoul_geo.shp", encoding='utf-8')

In [79]:
total_pop = gdf['pop'].sum()
print("전체 인구 수:", total_pop)


전체 인구 수: 9133449.0
