In [2]:
import pandas as pd
from pathlib import Path

In [4]:
YEARS = list(range(2005, 2024))
OUT_PATH = "data/merged_data/merged_final.csv"

FILES = {
    "weather": "data/first_processing_data/weather_metrics_no_fog.csv",
    "fire": "data/first_processing_data/fire_count.csv",
    "pop": "data/first_processing_data/Resident_population_preprocessing_year_sum.csv",
    "usage": "data/first_processing_data/Traffic_accidents_vehicle_tidy.csv",
    "accident": "data/first_processing_data/Traffic_accident_type_preprocessing.csv",
    "acc_metrics": "data/first_processing_data/Traffic_accidents_filtered_metrics.csv"
}

try:
    acc = pd.read_csv(FILES["accident"])
    acc['부상자수'] = pd.to_numeric(acc['부상자수'], errors='coerce').fillna(0)
    acc['사망자수'] = pd.to_numeric(acc['사망자수'], errors='coerce').fillna(0)
    acc['사상자수'] = acc['사망자수'] + acc['부상자수']

    cnt = acc.pivot_table(index=['자치구', '연도'], columns='사고유형', values='발생건수', fill_value=0).rename(columns={
        '차대사람': '차대사람_발생건수',
        '차대차': '차대차_발생건수',
        '차량단독': '차량단독_발생건수',
        '합계': '합계_발생건수'
    })
    inj = acc.pivot_table(index=['자치구', '연도'], columns='사고유형', values='사상자수', fill_value=0).rename(columns={
        '차대사람': '차대사람_사상자수',
        '차대차': '차대차_사상자수',
        '차량단독': '차량단독_사상자수',
        '합계': '합계_사상자수'
    })
    acc_wide = pd.concat([cnt, inj], axis=1).reset_index().rename(columns={'자치구': '구'})
except Exception:
    acc_wide = pd.DataFrame(columns=["구", "연도"])

try:
    usage = pd.read_csv(FILES["usage"])
    usage = usage.rename(columns={"구": "구", "연도": "연도", "용도": "용도", "발생건수": "발생건수", "사상자수": "사상자수"})
    u = usage.pivot_table(index=["구", "연도"], columns="용도", values=["발생건수", "사상자수"], fill_value=0)
    u.columns = [f"{usage_type}_{metric}" for metric, usage_type in u.columns]
    u = u.reset_index()
except Exception:
    u = pd.DataFrame(columns=["구", "연도"])

try:
    weather = pd.read_csv(FILES["weather"])
    w = weather.pivot_table(index=['district', 'year'], columns='weather', values=['발생건수', '사상자수'], fill_value=0)
    w.columns = [f"{wtype}_{metric}" for metric, wtype in w.columns]
    w = w.reset_index().rename(columns={'district': '구', 'year': '연도'})
except Exception:
    w = pd.DataFrame(columns=["구", "연도"])

try:
    fire = pd.read_csv(FILES["fire"]).rename(columns={'district': '구', 'year': '연도', '소계': '화재_소계'})
except Exception:
    fire = pd.DataFrame(columns=["구", "연도", "화재_소계"])
try:
    pop = pd.read_csv(FILES["pop"]).rename(columns={'동별': '구', '연도': '연도', '합계': '거주인구'})
except Exception:
    pop = pd.DataFrame(columns=["구", "연도", "거주인구"])

merged = (
    w
    .merge(fire, on=['구', '연도'], how='outer')
    .merge(pop[['구', '연도', '거주인구']], on=['구', '연도'], how='outer')
    .merge(u, on=['구', '연도'], how='outer')
    .merge(acc_wide, on=['구', '연도'], how='outer')
)

districts = merged['구'].dropna().unique()
full_index = pd.MultiIndex.from_product([districts, YEARS], names=['구', '연도'])
merged = merged.set_index(['구', '연도']).reindex(full_index, fill_value=0).reset_index().sort_values(['구', '연도'])
merged = merged.fillna(0).drop(columns=['합계_사상자수', '합계_발생건수'], errors='ignore')

try:
    acc2 = pd.read_csv(FILES["acc_metrics"])
    acc2 = acc2.rename(columns={c: '구' for c in acc2.columns if c in ['자치구', '동별', 'district']})
    acc2 = acc2.rename(columns={c: '연도' for c in acc2.columns if c in ['year']})
    acc_pivot = acc2.pivot_table(index=['구', '연도'], columns='metric', values='value', fill_value=0).reset_index()
    final_df = merged.merge(acc_pivot, on=['구', '연도'], how='left').fillna(0)
except Exception:
    final_df = merged

# ───── 컬럼 삭제 ─────
final_df = final_df.drop(columns=['버스승객수', '지하철_승객_수', '승객수'], errors='ignore')

# ───── 컬럼명 변경 ─────
final_df = final_df.rename(columns={
    '화물_발생건수': '화물계_발생건수',
    '화물_사상자수': '화물계_사상자수'
})

# ───── 저장 ─────
Path(OUT_PATH).parent.mkdir(parents=True, exist_ok=True)
final_df.to_csv(OUT_PATH, index=False, encoding="utf-8-sig")
print("✅ merged_final.csv 형식으로 저장 완료:", OUT_PATH)
print("Shape:", final_df.shape)
print("Columns:", final_df.columns.tolist())


✅ merged_final.csv 형식으로 저장 완료: data/merged_data/merged_final.csv
Shape: (494, 27)
Columns: ['구', '연도', '눈_발생건수', '맑음_발생건수', '비_발생건수', '흐림_발생건수', '눈_사상자수', '맑음_사상자수', '비_사상자수', '흐림_사상자수', '화재_소계', '거주인구', '버스계_발생건수', '승용계_발생건수', '화물계_발생건수', '버스계_사상자수', '승용계_사상자수', '화물계_사상자수', '차대사람_발생건수', '차대차_발생건수', '차량단독_발생건수', '차대사람_사상자수', '차대차_사상자수', '차량단독_사상자수', '인구 10만명당 부상자수 (명)', '인구 10만명당 사망자수 (명)', '자동차 1만대당 발생건수 (건)']
