In [6]:
import pandas as pd
import os

# 1. 보행자 → pedestrain_processing.csv
df = pd.read_csv("data/raw_data/pedestrain.csv", header=None)
header1 = df.iloc[0].astype(str)
header2 = df.iloc[1].astype(str)
header3 = df.iloc[2].astype(str)

years = header1.str.extract(r"(\d{4})")[0].ffill().astype(str)
columns = years + "_" + header2.astype(str) + "_" + header3.astype(str)
columns = columns.str.replace(r"_+", "_", regex=True).str.strip("_")

df_data = df.iloc[3:].copy()
df_data.columns = ["자치구"] + columns[1:].tolist()
df_data = df_data[[col for col in df_data.columns if "소계" not in col]]

df_melted = df_data.melt(id_vars="자치구", var_name="구분", value_name="값")
df_melted[['연도', '사고유형', '지표']] = df_melted["구분"].str.extract(r"(?P<연도>\d{4})_(?P<사고유형>.+)_(?P<지표>사망자수|부상자수)")
df_melted["값"] = df_melted["값"].replace("-", 0).fillna(0).astype(float)

df_final = df_melted.pivot_table(index=["연도", "자치구"], columns=["사고유형", "지표"], values="값", aggfunc="sum").reset_index()
df_final.columns = ['연도', '자치구'] + [f"{a}_{b}" for a, b in df_final.columns[2:]]
df_final.to_csv("data/first_processing_data/pedestrain_processing.csv", index=False, encoding="utf-8-sig")

# 2. 뺑소니 → 뺑소니_전처리완료.csv (열 이름은 뺑소니 접두어 유지)
df = pd.read_csv("data/raw_data/hit_and_run.csv", header=None)
header1 = df.iloc[0].astype(str)
header2 = df.iloc[1].astype(str)

years = header1.str.extract(r"(\d{4})")[0].ffill().astype(str)
columns = years + "_" + header2.astype(str)
columns = columns.str.replace(r"_+", "_", regex=True).str.strip("_")

df_data = df.iloc[2:].copy()
df_data.columns = ["자치구"] + columns[1:].tolist()

df_melted = df_data.melt(id_vars="자치구", var_name="구분", value_name="값")
df_melted[['연도', '지표']] = df_melted["구분"].str.extract(r"(?P<연도>\d{4})_(?P<지표>.+)")
df_melted["값"] = df_melted["값"].replace("-", 0).fillna(0).astype(float)

df_final = df_melted.pivot_table(index=["연도", "자치구"], columns="지표", values="값", aggfunc="sum").reset_index()
df_final.columns = ['연도', '자치구'] + [f"뺑소니_{col}".replace(" (건)", "").replace(" (명)", "") for col in df_final.columns[2:]]
df_final.to_csv("data/first_processing_data/hit_and_run_processing.csv", index=False, encoding="utf-8-sig")

# 3. 자전거 → bike_processing.csv
df = pd.read_csv("data/raw_data/bike.csv", header=None)
df.columns = df.iloc[0]
df = df[1:]
df = df[df["자치구"] != "소계"]

df = df.replace("-", 0).fillna(0)
for col in df.columns[2:]:
    df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)

df["자전거_발생건수"] = df["가해_발생건수"] + df["피해_발생건수"]
df["자전거_사망자수"] = df["가해_사망자수"] + df["피해_사망자수"]
df["자전거_부상자수"] = df["가해_부상자수"] + df["피해_부상자수"]

df_final = df[["연도", "자치구", "자전거_발생건수", "자전거_사망자수", "자전거_부상자수"]]
df_final.to_csv("data/first_processing_data/bike_processing.csv", index=False, encoding="utf-8-sig")

# 4. 대중교통 → transportation_processing.csv
df = pd.read_csv("data/raw_data/transportation.csv", header=None)
df.columns = ["연도", "자치구", "대중교통_승객수"]
df = df[1:]

df["대중교통_승객수"] = df["대중교통_승객수"].replace("-", 0).fillna(0)
df["대중교통_승객수"] = df["대중교통_승객수"].astype(str).str.replace(",", "")
df["대중교통_승객수"] = pd.to_numeric(df["대중교통_승객수"], errors="coerce").fillna(0).astype(int)

df.to_csv("data/first_processing_data/transportation_processing.csv", index=False, encoding="utf-8-sig")
