<a href="https://colab.research.google.com/github/kan0222/DATA-SCIENCE-PROJECTS/blob/main/ASSIGNMENT_TWO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import pickle


np.random.seed(42)

n_samples = 500
df = pd.DataFrame({
    "vehicle_speed": np.random.randint(20, 140, n_samples),
    "driver_age": np.random.randint(18, 70, n_samples),
    "num_vehicles_involved": np.random.randint(1, 4, n_samples),
    "weather": np.random.choice(["Clear", "Rainy", "Foggy"], n_samples),
    "road_surface": np.random.choice(["Dry", "Wet", "Snow"], n_samples),
    "lighting": np.random.choice(["Daylight", "Night", "Dawn"], n_samples),
})

df["severity_score"] = (
    0.03 * df["vehicle_speed"]
    + 0.01 * (70 - df["driver_age"])
    + 0.5 * df["num_vehicles_involved"]
    + np.where(df["weather"] == "Rainy", 1.0, 0)
    + np.where(df["weather"] == "Foggy", 0.7, 0)
    + np.where(df["road_surface"] == "Wet", 0.5, 0)
    + np.where(df["lighting"] == "Night", 0.8, 0)
    + np.random.normal(0, 1, n_samples)
)

X = df.drop(columns=["severity_score"])
y = df["severity_score"]

numeric_features = ["vehicle_speed", "driver_age", "num_vehicles_involved"]
categorical_features = ["weather", "road_surface", "lighting"]

numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(handle_unknown="ignore"))])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)


pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", LinearRegression())
])


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
pipeline.fit(X_train, y_train)


r2 = pipeline.score(X_test, y_test)
print(f"âœ… Model trained successfully! RÂ² score on test data: {r2:.2f}")


saved_obj = {
    "pipeline": pipeline,
    "numeric_features": numeric_features,
    "categorical_features": categorical_features
}

with open("accident_severity_model.pkl", "wb") as f:
    pickle.dump(saved_obj, f)

print("âœ… Model saved as 'accident_severity_model.pkl'!")


âœ… Model trained successfully! RÂ² score on test data: 0.75
âœ… Model saved as 'accident_severity_model.pkl'!


In [3]:

example = pd.DataFrame([{
    "vehicle_speed": 110,
    "driver_age": 25,
    "num_vehicles_involved": 2,
    "weather": "Rainy",
    "road_surface": "Wet",
    "lighting": "Night"
}])

predicted_score = pipeline.predict(example)[0]


def map_severity(score):
    if score < 3:
        return "Minor"
    elif score < 5:
        return "Serious"
    else:
        return "Fatal"

severity_category = map_severity(predicted_score)

print(f"ðŸš— Predicted Severity Score: {predicted_score:.2f}")
print(f"ðŸ©¸ Predicted Accident Severity Category: {severity_category}")


ðŸš— Predicted Severity Score: 7.05
ðŸ©¸ Predicted Accident Severity Category: Fatal
