# Road Accident Severity Prediction 
## Data Preprocessing

In [None]:
# If dataset already has Severity_Class, use it; else create from Accident_Severity
if "Severity_Class" in acc.columns:
    target_col = "Severity_Class"
else:
    # assuming Accident_Severity is categorical with 'Slight','Serious','Fatal'
    mapping = {"Slight": 0, "Serious": 1, "Fatal": 2}
    acc["Severity_Class"] = acc["Accident_Severity"].map(mapping).astype("int8")
    target_col = "Severity_Class"

print(acc[target_col].value_counts())

## Time based feature Engineering

In [None]:
# ============================================================
# 4. TIME-BASED FEATURE ENGINEERING
# ============================================================

def extract_hour(t):
    try:
        return int(str(t).split(":")[0])
    except Exception:
        return -1

acc["Year"]  = acc["Date"].dt.year
acc["Month"] = acc["Date"].dt.month
acc["Day"]   = acc["Date"].dt.day
acc["Hour"]  = acc["Time"].apply(extract_hour)

# Weekend flag (assuming 1=Sunday or 1=Monday doesn't matter much – just "weekend or not")
# If Day_of_Week already numeric 1–7, adjust if needed.
if acc["Day_of_Week"].dtype != "O":
    acc["Is_Weekend"] = acc["Day_of_Week"].isin([6, 7]).astype(int)
else:
    # if it's string like 'Monday', 'Saturday', etc.
    acc["Is_Weekend"] = acc["Day_of_Week"].isin(["Saturday", "Sunday"]).astype(int)

acc[["Date", "Hour", "Is_Weekend"]].head()


## Vehicle Level Feature Engineering

In [None]:
# ============================================================
# 5. VEHICLE-LEVEL FEATURE ENGINEERING  (AGGREGATED TO ACCIDENT)
#    (lightweight, safe for laptop but still useful)
# ============================================================

# convert to numeric where possible
veh["Age_of_Vehicle"] = pd.to_numeric(veh["Age_of_Vehicle"], errors="coerce")
veh["Engine_Capacity_.CC."] = pd.to_numeric(veh["Engine_Capacity_.CC."], errors="coerce")

veh_agg = veh.groupby("Accident_Index").agg({
    "Vehicle_Reference": "count",
    "Age_of_Vehicle": "mean",
    "Engine_Capacity_.CC.": "mean"
}).rename(columns={
    "Vehicle_Reference": "veh_count",
    "Age_of_Vehicle": "veh_age_mean",
    "Engine_Capacity_.CC.": "engine_mean"
})

acc = acc.merge(veh_agg, on="Accident_Index", how="left")

# fill missing vehicle info (e.g., single-vehicle accidents not in veh file)
acc["veh_count"]    = acc["veh_count"].fillna(0)
acc["veh_age_mean"] = acc["veh_age_mean"].fillna(-1)
acc["engine_mean"]  = acc["engine_mean"].fillna(-1)

acc[["Accident_Index", "veh_count", "veh_age_mean", "engine_mean"]].head()
