In [18]:
import geopandas as gpd
import pandas as pd

In [19]:
# Data Load
forest = gpd.read_file("../data/processed/전국_임상도_전처리.gpkg")
sig = gpd.read_file("../data/processed/대한민국_시군구.gpkg")

In [23]:
forest.head(5)

Unnamed: 0,수종코드,수종,면적,시도코드,geometry
0,2,활엽수,17429.849446,41,"MULTIPOLYGON (((900993.565 1901996.113, 900992..."
1,2,활엽수,17324.991319,41,"MULTIPOLYGON (((900847.734 1901935.302, 900843..."
2,2,활엽수,1328.873274,41,"MULTIPOLYGON (((901668.99 1901621.141, 901663...."
3,2,활엽수,4137.8719,41,"MULTIPOLYGON (((901759.336 1901561.155, 901756..."
4,2,활엽수,47846.806902,41,"MULTIPOLYGON (((901375.832 1901817.612, 901378..."


In [21]:
sig.head(5)

Unnamed: 0,시군구코드,시군구명,면적,시도코드,시도명,geometry
0,11110,종로구,23971610.0,11,서울특별시,"MULTIPOLYGON (((956615.453 1953567.199, 956621..."
1,11140,중구,9962768.0,11,서울특별시,"MULTIPOLYGON (((957890.386 1952616.746, 957909..."
2,11170,용산구,21897560.0,11,서울특별시,"MULTIPOLYGON (((953115.761 1950834.084, 953114..."
3,11200,성동구,16800780.0,11,서울특별시,"MULTIPOLYGON (((959681.109 1952649.605, 959842..."
4,11215,광진구,17028810.0,11,서울특별시,"MULTIPOLYGON (((964825.058 1952633.25, 964875...."


In [24]:
# 좌표계 통일
forest = forest.to_crs(sig.crs)

In [25]:
# 공간 조인
joined = gpd.sjoin(forest, sig, how="inner", predicate="intersects")
joined["면적"] = joined.geometry.area

In [26]:
# 구별 수종 비율 계산 후 저장
summary = joined.groupby(["시군구명", "수종"])["면적"].sum().unstack(fill_value=0)
summary["총면적"] = summary.sum(axis=1)
for kind in ["침엽수", "활엽수", "혼효림", "기타"]:
    if kind not in summary.columns:
        summary[kind] = 0
    summary[f"{kind}_비율"] = (summary[kind] / summary["총면적"]) * 100
result = summary[[f"{k}_비율" for k in ["침엽수", "활엽수", "혼효림", "기타"]]].round(2).reset_index()

total_area = joined.groupby("시군구명")["면적"].sum().reset_index(name="총산림면적")
result = pd.merge(result, total_area, on="시군구명", how="left")

In [27]:
result.head(5)

Unnamed: 0,시군구명,침엽수_비율,활엽수_비율,혼효림_비율,기타_비율,총산림면적
0,가평군,28.1,61.48,7.79,2.63,702880300.0
1,강남구,0.13,96.23,1.45,2.18,165263.0
2,강동구,0.29,96.8,2.91,0.0,220237.1
3,강북구,7.29,86.81,4.79,1.12,808239.7
4,강서구,1.86,98.14,0.0,0.0,105594.9


In [28]:
# 혼효림 반반 나누기
result["침엽수_tmp"] = result["침엽수_비율"] + result["혼효림_비율"] * 0.5
result["활엽수_tmp"] = result["활엽수_비율"] + result["혼효림_비율"] * 0.5

# 전체 합계 (기타는 제외)
total = result["침엽수_tmp"] + result["활엽수_tmp"]

# 정규화: 100% 기준 재비율화
result["침엽수_비율"] = (result["침엽수_tmp"] / total) * 100
result["활엽수_비율"] = (result["활엽수_tmp"] / total) * 100

# 반올림
result[["침엽수_비율", "활엽수_비율"]] = result[["침엽수_비율", "활엽수_비율"]].round(2)

# 불필요한 컬럼 제거
result = result.drop(columns=["혼효림_비율", "기타_비율", "침엽수_tmp", "활엽수_tmp"], errors='ignore')

code_map = sig[["시군구명", "시군구코드"]].drop_duplicates()
result = pd.merge(result, code_map, on="시군구명", how="left")

In [31]:
result.head(5)

Unnamed: 0,시군구명,침엽수_비율,활엽수_비율,총산림면적,시군구코드
0,가평군,32.86,67.14,702880300.0,41820
1,강남구,0.87,99.13,165263.0,11680
2,강동구,1.74,98.26,220237.1,11740
3,강북구,9.79,90.21,808239.7,11305
4,강서구,1.86,98.14,105594.9,11500


In [32]:
# 결과 저장
result.to_csv("../data/processed/시군구별_수종비율.csv", index=False, encoding="utf-8-sig")

In [3]:
import geopandas as gpd
import pandas as pd

# Data Load
forest = gpd.read_file("../data/processed/전국_임상도_전처리.gpkg")
li = gpd.read_file("../data/processed/대한민국_리.gpkg")

# 좌표계 통일
forest = forest.to_crs(li.crs)

# 공간 조인
joined = gpd.sjoin(forest, li, how="inner", predicate="intersects")
joined["면적"] = joined.geometry.area

# 리 단위 수종 비율 계산
summary = joined.groupby(["리명", "수종"])["면적"].sum().unstack(fill_value=0)
summary["총면적"] = summary.sum(axis=1)

for kind in ["침엽수", "활엽수", "혼효림", "기타"]:
    if kind not in summary.columns:
        summary[kind] = 0
    summary[f"{kind}_비율"] = (summary[kind] / summary["총면적"]) * 100

result = summary[[f"{k}_비율" for k in ["침엽수", "활엽수", "혼효림", "기타"]]].round(2).reset_index()

# 전체 면적도 추가
total_area = joined.groupby("리명")["면적"].sum().reset_index(name="총산림면적")
result = pd.merge(result, total_area, on="리명", how="left")

# 혼효림 비율 분배 (침엽수/활엽수 각각 50%씩)
result["침엽수_tmp"] = result["침엽수_비율"] + result["혼효림_비율"] * 0.5
result["활엽수_tmp"] = result["활엽수_비율"] + result["혼효림_비율"] * 0.5

# 재정규화
total = result["침엽수_tmp"] + result["활엽수_tmp"]
result["침엽수_비율"] = (result["침엽수_tmp"] / total) * 100
result["활엽수_비율"] = (result["활엽수_tmp"] / total) * 100

# 반올림
result[["침엽수_비율", "활엽수_비율"]] = result[["침엽수_비율", "활엽수_비율"]].round(2)

# 불필요한 컬럼 제거
result = result.drop(columns=["혼효림_비율", "기타_비율", "침엽수_tmp", "활엽수_tmp"], errors='ignore')

# 리 코드 붙이기
code_map = li[["리명", "리코드"]].drop_duplicates()
result = pd.merge(result, code_map, on="리명", how="left")

# 결과 저장
result.to_csv("../data/processed/리별_수종비율.csv", index=False, encoding="utf-8-sig")

### 화재가 발생한 곳의 수종 분포 확인

In [1]:
import geopandas as gpd

In [2]:
# 1. 데이터 불러오기
forest = gpd.read_file("../data/processed/전국_임상도_전처리.gpkg")
fires = gpd.read_file("../data/processed/산불_전처리.gpkg")

In [3]:
# 2. 좌표계 통일 (EPSG:5179 기준)
forest = forest.to_crs(epsg=5179)
fires = fires.to_crs(epsg=5179)

In [4]:
# 3. 산불 geometry가 포인트인지 확인 (필요 시 centroid 처리)
if fires.geometry.geom_type.isin(["Polygon", "MultiPolygon"]).any():
    fires["geometry"] = fires.geometry.centroid

In [5]:
# 4. 공간 조인 (산불 Point가 포함된 수종 polygon 찾기)
fires_with_forest = gpd.sjoin(fires, forest[["수종", "geometry"]], how="left", predicate="within")

In [6]:
# 5. 결과 확인
print(fires_with_forest[["발생일시", "피해면적_ha", "수종"]].head())

                 발생일시  피해면적_ha   수종
0 1991-03-26 14:20:00      2.0  침엽수
1 1991-11-26 17:30:00     10.0  활엽수
2 1991-12-06 13:40:00      4.0  활엽수
3 1991-12-06 16:45:00      1.5  NaN
4 1991-04-02 15:10:00      7.0  침엽수


In [7]:
# 저장 경로 설정
gpkg_path = "../data/processed/산불_수종_결합.gpkg"
csv_path = "../data/processed/산불_수종_결합.csv"

# 디렉토리 없으면 생성
import os
os.makedirs(os.path.dirname(gpkg_path), exist_ok=True)

# 1. GeoPackage로 저장 (공간 정보 포함)
fires_with_forest.to_file(gpkg_path, driver="GPKG")
print(f"✅ GPKG 저장 완료: {gpkg_path}")

# 2. CSV로 저장 (geometry 제거)
fires_with_forest.drop(columns="geometry").to_csv(csv_path, index=False)
print(f"✅ CSV 저장 완료: {csv_path}")

✅ GPKG 저장 완료: ../data/processed/산불_수종_결합.gpkg
✅ CSV 저장 완료: ../data/processed/산불_수종_결합.csv


In [2]:
import geopandas as gpd
import pandas as pd
import os

# 1. 데이터 불러오기
forest = gpd.read_file("../data/processed/전국_임상도_전처리.gpkg")
fires = gpd.read_file("../data/processed/산불_전처리.gpkg")

# 2. 좌표계 통일
forest = forest.to_crs(epsg=5179)
fires = fires.to_crs(epsg=5179)

# 3. Polygon → 중심점
if fires.geometry.geom_type.isin(["Polygon", "MultiPolygon"]).any():
    fires["geometry"] = fires.geometry.centroid

# 4. 산림 면적 계산
forest["산림면적_ha"] = forest.geometry.area / 10_000  # m² → ha

# 5. 공간 조인 (산불 포인트 → 산림)
fires_with_forest = gpd.sjoin(
    fires,
    forest[["수종", "산림면적_ha", "geometry"]],
    how="left",
    predicate="within"
)

# 6. 계절 파생 변수
def get_season(date):
    month = pd.to_datetime(date).month
    if month in [3, 4, 5]:
        return "봄"
    elif month in [6, 7, 8]:
        return "여름"
    elif month in [9, 10, 11]:
        return "가을"
    else:
        return "겨울"

fires_with_forest["계절"] = fires_with_forest["발생일시"].apply(get_season)

# 7. 피해비율 계산
fires_with_forest["피해비율"] = fires_with_forest["피해면적_ha"] / fires_with_forest["산림면적_ha"]

# 8. 진화 시간 및 확산 속도 계산
fires_with_forest["발생일시"] = pd.to_datetime(fires_with_forest["발생일시"], errors="coerce")
fires_with_forest["진화일시"] = pd.to_datetime(fires_with_forest["진화일시"], errors="coerce")
fires_with_forest["진화시간_시"] = (fires_with_forest["진화일시"] - fires_with_forest["발생일시"]).dt.total_seconds() / 3600
fires_with_forest["확산속도_ha_per_hr"] = fires_with_forest["피해면적_ha"] / fires_with_forest["진화시간_시"]

# 9. 주요 분석 변수 누락 제거
fires_clean = fires_with_forest.dropna(
    subset=["피해면적_ha", "산림면적_ha", "수종", "발생일시", "진화일시"]
).copy()

# 10. 저장
gpkg_path = "../data/processed/산불_수종_결합_정규화.gpkg"
csv_path = "../data/processed/산불_수종_결합_정규화.csv"
os.makedirs(os.path.dirname(gpkg_path), exist_ok=True)

fires_clean.to_file(gpkg_path, driver="GPKG")
fires_clean.drop(columns="geometry").to_csv(csv_path, index=False)

print(f"✅ GPKG 저장 완료: {gpkg_path}")
print(f"✅ CSV 저장 완료: {csv_path}")

✅ GPKG 저장 완료: ../data/processed/산불_수종_결합_정규화.gpkg
✅ CSV 저장 완료: ../data/processed/산불_수종_결합_정규화.csv
