In [4]:
import pandas as pd
from pathlib import Path

raw_path = Path("../data/RoadSafety.csv")
df_raw = pd.read_csv(raw_path)

# First row actually contains the true headers. Use it and drop the row.
new_cols = list(df_raw.iloc[0])
df = df_raw.iloc[1:].copy()
df.columns = [str(c).strip() for c in new_cols]

# Rename columns to clean, DB-friendly names
rename_map = {
    "Country": "country",
    "Year": "year",
    "Drivers/passengers of 4-wheeled vehicles": "share_4w",
    "Drivers/passengers of motorized 2- or 3-wheelers": "share_2_3w",
    "Cyclists": "share_cyclists",
    "Pedestrians": "share_pedestrians",
    "Other/unspecified road users": "share_other_unspecified",
}
df = df.rename(columns=rename_map)

# Type fixes
df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64")
for c in ["share_4w","share_2_3w","share_cyclists","share_pedestrians","share_other_unspecified"]:
    df[c] = pd.to_numeric(df[c], errors="coerce")

# (Optional) simple sanity checks
assert set(["country","year","share_4w","share_2_3w","share_cyclists","share_pedestrians","share_other_unspecified"]).issubset(df.columns)

# Save a clean CSV (safe to import to Postgres)
out_path = Path("../data/cleaned_road_safety.csv")
df.to_csv(out_path, index=False)
out_path

PosixPath('../data/cleaned_road_safety.csv')