In [1]:
import geopandas as gpd
import pandas as pd

In [2]:
# Data Load
forest = gpd.read_file("../data/processed/서울_임상도_전처리.gpkg")
sig = gpd.read_file("../data/processed/대한민국_시군구.gpkg")

In [3]:
forest.head(5)

Unnamed: 0,수종코드,수종,면적,geometry
0,0,기타,1847.918451,"MULTIPOLYGON (((940406.869 1943144.09, 940402...."
1,0,기타,12748.59262,"MULTIPOLYGON (((941247.331 1942388.402, 941247..."
2,1,침엽수,2053.461999,"MULTIPOLYGON (((940653.223 1942760.494, 940653..."
3,1,침엽수,22703.551703,"MULTIPOLYGON (((941377.1 1942494.683, 941384.4..."
4,1,침엽수,2877.794364,"MULTIPOLYGON (((941220.202 1942849.027, 941220..."


In [4]:
sig.head(5)

Unnamed: 0,시군구코드,시군구명,면적,시도코드,시도명,geometry
0,11110,종로구,23971610.0,11,서울특별시,"MULTIPOLYGON (((956615.453 1953567.199, 956621..."
1,11140,중구,9962768.0,11,서울특별시,"MULTIPOLYGON (((957890.386 1952616.746, 957909..."
2,11170,용산구,21897560.0,11,서울특별시,"MULTIPOLYGON (((953115.761 1950834.084, 953114..."
3,11200,성동구,16800780.0,11,서울특별시,"MULTIPOLYGON (((959681.109 1952649.605, 959842..."
4,11215,광진구,17028810.0,11,서울특별시,"MULTIPOLYGON (((964825.058 1952633.25, 964875...."


In [5]:
# 좌표계 통일
forest = forest.to_crs(sig.crs)

In [6]:
# 공간 조인
joined = gpd.sjoin(forest, sig, how="inner", predicate="intersects")
joined["면적"] = joined.geometry.area

In [7]:
# 구별 수종 비율 계산 후 저장
summary = joined.groupby(["시군구명", "수종"])["면적"].sum().unstack(fill_value=0)
summary["총면적"] = summary.sum(axis=1)
for kind in ["침엽수", "활엽수", "혼효림", "기타"]:
    if kind not in summary.columns:
        summary[kind] = 0
    summary[f"{kind}_비율"] = (summary[kind] / summary["총면적"]) * 100
result = summary[[f"{k}_비율" for k in ["침엽수", "활엽수", "혼효림", "기타"]]].round(2).reset_index()

In [10]:
result.head(5)

수종,시군구명,침엽수_비율,활엽수_비율,혼효림_비율,기타_비율
0,강남구,14.22,74.73,8.28,2.77
1,강동구,10.75,78.63,9.84,0.78
2,강북구,10.66,59.5,24.71,5.13
3,강서구,6.68,77.57,15.19,0.55
4,계양구,0.0,100.0,0.0,0.0


In [11]:
# 혼효림 반반 나누기
result["침엽수_tmp"] = result["침엽수_비율"] + result["혼효림_비율"] * 0.5
result["활엽수_tmp"] = result["활엽수_비율"] + result["혼효림_비율"] * 0.5

# 전체 합계 (기타는 제외)
total = result["침엽수_tmp"] + result["활엽수_tmp"]

# 정규화: 100% 기준 재비율화
result["침엽수_비율"] = (result["침엽수_tmp"] / total) * 100
result["활엽수_비율"] = (result["활엽수_tmp"] / total) * 100

# 반올림
result[["침엽수_비율", "활엽수_비율"]] = result[["침엽수_비율", "활엽수_비율"]].round(2)

# 불필요한 컬럼 제거
result = result.drop(columns=["혼효림_비율", "기타_비율", "침엽수_tmp", "활엽수_tmp"], errors='ignore')

In [12]:
result.head(5)

수종,시군구명,침엽수_비율,활엽수_비율
0,강남구,18.88,81.12
1,강동구,15.79,84.21
2,강북구,24.26,75.74
3,강서구,14.36,85.64
4,계양구,0.0,100.0


In [16]:
# 결과 저장
result.to_csv("../data/processed/시군구별_수종비율.csv", index=False, encoding="utf-8-sig")