In [1]:
#버스 전체 데이터 (2005~2025)

#교통사고 + 버스승객수 데이터 병합
import pandas as pd


bus_df = pd.read_csv("data/first_processing_data/Seoul_bus_preprocessing.csv", encoding="utf-8-sig")
accident_df = pd.read_csv("data/first_processing_data/Traffic_accident_month_preprocessing.csv", encoding="utf-8-sig")

bus_df = bus_df.rename(columns={"자치구_명칭": "자치구"})  # 혹시 모를 컬럼명 이슈 예방
accident_df = accident_df.rename(columns={"구": "자치구", "연월": "년월"})

accident_df["년월"] = accident_df["년월"].astype(str).apply(
    lambda x: f"{x.split('-')[0]}-{x.split('-')[1].zfill(2)}"
)

for df in [bus_df, accident_df]:
    df["자치구"] = df["자치구"].astype(str)
    df["년월"] = df["년월"].astype(str)

gu_list = accident_df["자치구"].unique()
full_months = [f"{y}-{m:02d}" for y in range(2005, 2026) for m in range(1, 13)]
full_months = full_months[:(2025 - 2005) * 12 + 4]
full_index = pd.MultiIndex.from_product([gu_list, full_months], names=["자치구", "년월"])
base_df = pd.DataFrame(index=full_index).reset_index()

base_df["자치구"] = base_df["자치구"].astype(str)
base_df["년월"] = base_df["년월"].astype(str)
base_df = pd.merge(base_df, accident_df, on=["자치구", "년월"], how="left")
base_df[["발생건수", "사망자수", "부상자수"]] = base_df[["발생건수", "사망자수", "부상자수"]].fillna(0)

final_df = pd.merge(base_df, bus_df, on=["자치구", "년월"], how="left")
final_df["버스승객수"] = final_df["버스승객수"].fillna(0)
final_df = final_df.sort_values(by=["자치구", "년월"]).reset_index(drop=True)
final_df.to_csv("data/merged_data/Bus_accident_total_data.csv", index=False, encoding="utf-8-sig")

print("저장완료")

저장완료


In [3]:
import pandas as pd

bus_df = pd.read_csv("data/first_processing_data/Seoul_bus_preprocessing.csv", encoding="utf-8-sig")
accident_df = pd.read_csv("data/first_processing_data/Traffic_accident_month_preprocessing.csv", encoding="utf-8-sig")

bus_df = bus_df.rename(columns={"자치구": "구"})
bus_df = bus_df[~bus_df["구"].astype(str).str.contains("소계|합계", na=False)]
accident_df = accident_df[~accident_df["구"].astype(str).str.contains("소계|합계", na=False)]

bus_df["년월"] = "'" + bus_df["년월"].astype(str).str.replace("-", ".").str[:7]
accident_df["년월"] = "'" + accident_df["연월"].astype(str).str.replace("-", ".").str[:7]

merged = pd.merge(bus_df, accident_df, on=["년월", "구"], how="outer")
merged = merged.fillna(0)
merged[["버스승객수", "발생건수", "사망자수", "부상자수"]] = \
    merged[["버스승객수", "발생건수", "사망자수", "부상자수"]].astype(int)

merged = merged.rename(columns={"구": "자치구"})
merged = merged[["자치구", "년월", "버스승객수", "발생건수", "사망자수", "부상자수"]]

merged["연도"] = merged["년월"].str.extract(r"(\d{4})").astype(int)
merged = merged[(merged["연도"] >= 2017) & (merged["연도"] <= 2025)].drop(columns=["연도"])

gu_order = [
    "강남구", "강동구", "강북구", "강서구", "관악구", "광진구", "구로구", "금천구",
    "노원구", "도봉구", "동대문구", "동작구", "마포구", "서대문구", "서초구", "성동구",
    "성북구", "송파구", "양천구", "영등포구", "용산구", "은평구", "종로구", "중구", "중랑구"
]
merged["자치구"] = pd.Categorical(merged["자치구"], categories=gu_order, ordered=True)
merged = merged.sort_values(["자치구", "년월"]).reset_index(drop=True)
merged = merged[merged["자치구"].notna() & (merged["자치구"].astype(str).str.strip() != "")]

subtotal = (
    merged.groupby("년월")[["버스승객수", "발생건수", "사망자수", "부상자수"]]
    .sum()
    .reset_index()
)
subtotal["자치구"] = "소계"
subtotal = subtotal[["자치구", "년월", "버스승객수", "발생건수", "사망자수", "부상자수"]]

subtotal["연도"] = subtotal["년월"].str.extract(r"(\d{4})").astype(int)
subtotal = subtotal[(subtotal["연도"] >= 2017) & (subtotal["연도"] <= 2025)].drop(columns=["연도"])

merged_with_subtotal = pd.concat([merged, subtotal], ignore_index=True)
merged_with_subtotal["자치구"] = pd.Categorical(
    merged_with_subtotal["자치구"],
    categories=gu_order + ["소계"],
    ordered=True
)
merged_with_subtotal = merged_with_subtotal.sort_values(["자치구", "년월"]).reset_index(drop=True)
merged_with_subtotal.to_csv("data/merged_data/Bus_Accident_total_data_2017_2025.csv", index=False, encoding="utf-8-sig")

print("저장완료")


저장완료


In [5]:
#감전사고 + 재난사고
import pandas as pd

shock_df = pd.read_csv("data/first_processing_data/Electric_shock_accident_preprocessing.csv", encoding="utf-8-sig")
disaster_df = pd.read_csv("data/first_processing_data/Disaster_Accident_preprocessing.csv", encoding="utf-8-sig")

shock_df = shock_df.rename(columns={'시점': '년월', '사상자수': '감전사고_사상자수'})

def normalize_ym(ym):
    ym = str(ym).strip()
    ym = ''.join(filter(str.isdigit, ym)).zfill(6)
    return f"{ym[:4]}-{ym[4:]}"

shock_df['년월'] = shock_df['년월'].apply(normalize_ym)
disaster_df['년월'] = disaster_df['년월'].apply(normalize_ym)

merged_df = pd.merge(disaster_df, shock_df, on="년월", how="outer")

merged_df = merged_df.fillna(0)
merged_df['감전사고_사상자수'] = merged_df['감전사고_사상자수'].astype(int)
merged_df = merged_df.sort_values(by="년월").reset_index(drop=True)
merged_df.to_csv("data/merged_data/Disaster_Electric_total_data.csv", index=False, encoding="utf-8-sig")

print("저장 완료")

저장 완료
