In [37]:
import folium
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import box
import numpy as np

In [38]:
import matplotlib.font_manager as fm

# 한국어 출력을 위한 폰트 설정
plt.rc('font', family='NanumGothic') 

# 마이너스 기호가 깨지는 것을 방지
plt.rcParams['axes.unicode_minus'] = False

In [39]:
population = gpd.read_file("data/총 인구 수/nlsp_021001001.shp", encoding='utf-8')
building_20_25 = gpd.read_file("data/시기별 건축물 수(20년 이상 25년 미만/nlsp_021002017.shp", encoding='utf-8')
building_25_30 = gpd.read_file("data/시기별 건축물 수(25년 이상 30년 미만)", encoding='utf-8')
building_30_35 = gpd.read_file("data/시기별 건축물 수(30년 이상 35년 미만/nlsp_021002019.shp", encoding='utf-8')
building_over_35 = gpd.read_file("data/시기별 건축물 수(35년 이상)/nlsp_021002020.shp", encoding='utf-8')
total_buildings = gpd.read_file("data/건축물 수 합계 통계/nlsp_021002021.shp", encoding='utf-8')
residential_single = gpd.read_file("data/주거용건물 용도별 건축물 수(단독)/nlsp_021002022.shp", encoding='utf-8')
residential_multi = gpd.read_file("data/주거용건물 용도별 건축물 수(공동)/nlsp_021002023.shp", encoding='utf-8')

In [40]:
population.drop(columns=['lbl'], inplace=True)
building_20_25.drop(columns=['lbl'], inplace=True)
building_25_30.drop(columns=['lbl'], inplace=True)
building_30_35.drop(columns=['lbl'], inplace=True)
building_over_35.drop(columns=['lbl'], inplace=True)
total_buildings.drop(columns=['lbl'], inplace=True)
residential_single.drop(columns=['lbl'], inplace=True)
residential_multi.drop(columns=['lbl'], inplace=True)

In [41]:
dataframes = [population, building_20_25, building_25_30, building_30_35, building_over_35, total_buildings, residential_single, residential_multi]
new_names = ['pop', 'b_20_25', 'b_25_30', 'b_30_35', 'b_over_35', 'total_b', 'res_single', 'res_multi']

for df, new_name in zip(dataframes, new_names):
    df.rename(columns={'val': new_name}, inplace=True)

In [42]:
from functools import reduce

# 데이터프레임 리스트 생성
dataframes = [population, building_20_25, building_25_30, building_30_35, building_over_35, total_buildings, residential_single, residential_multi]

# 모든 데이터프레임을 gid와 geometry 열을 기준으로 병합
merged_df = reduce(lambda left, right: pd.merge(left, right, on=['gid', 'geometry']), dataframes)

In [43]:
# 'b_over_20' 열을 생성하고 'b_20_25', 'b_25_30', 'b_30_35', 'b_over_35' 열의 값들을 합칩니다.
merged_df['b_over_20'] = merged_df['b_20_25'] + merged_df['b_25_30'] + merged_df['b_30_35'] + merged_df['b_over_35']

# 더 이상 필요 없는 열들을 삭제합니다.
merged_df.drop(['b_20_25', 'b_25_30', 'b_30_35', 'b_over_35'], axis=1, inplace=True)

In [None]:
merged_df.to_file("data/Seoul_merged_data.shp", encoding='utf-8')

In [None]:
merged_df.to_file

In [45]:
seoul = gpd.read_file("data/seoul_100_grid.shp", encoding='euc-kr')
seoul

Unnamed: 0,EMD_CD,EMD_KOR_NM,geometry
0,11110101,청운동,"POLYGON ((126.96239 37.58619, 126.96232 37.586..."
1,11110101,청운동,"MULTIPOLYGON (((126.96152 37.58656, 126.96143 ..."
2,11110101,청운동,"POLYGON ((126.96322 37.58596, 126.96278 37.586..."
3,11110101,청운동,"POLYGON ((126.96239 37.58703, 126.96239 37.587..."
4,11110101,청운동,"POLYGON ((126.96240 37.58752, 126.96260 37.587..."
...,...,...,...
73803,11740110,강일동,"POLYGON ((127.18201 37.56023, 127.18202 37.560..."
73804,11740110,강일동,"POLYGON ((127.18200 37.56099, 127.18197 37.560..."
73805,11740110,강일동,"POLYGON ((127.18153 37.56208, 127.18156 37.562..."
73806,11740110,강일동,"POLYGON ((127.18119 37.56302, 127.18128 37.562..."


In [51]:
# merged_df의 CRS를 seoul 데이터프레임의 CRS로 변환
merged_df = merged_df.to_crs(seoul.crs)

# 공간 조인을 수행, 이때 op 대신 predicate 매개변수 사용
result = gpd.sjoin(seoul, merged_df)

# 결과 확인
result

Unnamed: 0,EMD_CD,EMD_KOR_NM,geometry,index_right,gid,pop,total_b,res_single,res_multi,b_over_20
0,11110101,청운동,"POLYGON ((126.96239 37.58619, 126.96232 37.586...",37165,다사524542,,,,,
0,11110101,청운동,"POLYGON ((126.96239 37.58619, 126.96232 37.586...",32127,다사525542,,,,,
1,11110101,청운동,"MULTIPOLYGON (((126.96152 37.58656, 126.96143 ...",37165,다사524542,,,,,
1,11110101,청운동,"MULTIPOLYGON (((126.96152 37.58656, 126.96143 ...",32127,다사525542,,,,,
1,11110101,청운동,"MULTIPOLYGON (((126.96152 37.58656, 126.96143 ...",52705,다사525543,,,,,
...,...,...,...,...,...,...,...,...,...,...
73805,11740110,강일동,"POLYGON ((127.18153 37.56208, 127.18156 37.562...",39680,다사719513,,,,,
73805,11740110,강일동,"POLYGON ((127.18153 37.56208, 127.18156 37.562...",42518,다사718514,,,,,
73806,11740110,강일동,"POLYGON ((127.18119 37.56302, 127.18128 37.562...",42518,다사718514,,,,,
73806,11740110,강일동,"POLYGON ((127.18119 37.56302, 127.18128 37.562...",32149,다사718515,,,,,


In [49]:
result.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 268688 entries, 0 to 73807
Data columns (total 10 columns):
 #   Column       Non-Null Count   Dtype   
---  ------       --------------   -----   
 0   EMD_CD       268688 non-null  object  
 1   EMD_KOR_NM   268688 non-null  object  
 2   geometry     268688 non-null  geometry
 3   index_right  268688 non-null  int64   
 4   gid          268688 non-null  object  
 5   pop          136955 non-null  float64 
 6   total_b      149566 non-null  float64 
 7   res_single   87818 non-null   float64 
 8   res_multi    108634 non-null  float64 
 9   b_over_20    44214 non-null   float64 
dtypes: float64(5), geometry(1), int64(1), object(3)
memory usage: 22.5+ MB


In [50]:
# 각 그리드별로 가장 면적이 큰 행정 구역을 선택하기 위한 코드 (가정 예시)
result['area'] = result.geometry.area
result = result.sort_values('area', ascending=False).drop_duplicates(['gid'])

# 이후 필요하지 않은 'area' 열 제거
result.drop('area', inplace=True, axis=1)


  result['area'] = result.geometry.area


In [20]:
# index_right 열 삭제
result.drop(columns=['index_right'], inplace=True)

# id 열 생성 및 할당
result['id'] = range(1, len(result) + 1)

# 결과 확인
print(result.head())

     EMD_CD EMD_KOR_NM                                           geometry  \
0  11110101        청운동  POLYGON ((126.96239 37.58619, 126.96232 37.586...   
0  11110101        청운동  POLYGON ((126.96239 37.58619, 126.96232 37.586...   
1  11110101        청운동  MULTIPOLYGON (((126.96152 37.58656, 126.96143 ...   
1  11110101        청운동  MULTIPOLYGON (((126.96152 37.58656, 126.96143 ...   
1  11110101        청운동  MULTIPOLYGON (((126.96152 37.58656, 126.96143 ...   

        gid  pop  total_b  res_single  res_multi  b_over_20  id  
0  다사524542  NaN      NaN         NaN        NaN        NaN   1  
0  다사525542  NaN      NaN         NaN        NaN        NaN   2  
1  다사524542  NaN      NaN         NaN        NaN        NaN   3  
1  다사525542  NaN      NaN         NaN        NaN        NaN   4  
1  다사525543  NaN      NaN         NaN        NaN        NaN   5  
