In [None]:
# STEP 1: IMPORT LIBRARIES
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

%matplotlib inline
sns.set(style="whitegrid")

In [None]:
# STEP 2: LOAD DATASET
from google.colab import files
uploaded = files.upload()
df = pd.read_csv(next(iter(uploaded)))

In [None]:
# STEP 3: DATA PREPROCESSING
df.fillna(method='ffill', inplace=True)
df.drop(columns=['Accident_ID', 'Report_Number'], errors='ignore', inplace=True)
categorical_cols = df.select_dtypes(include='object').columns
le = LabelEncoder()
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])
scaler = StandardScaler()
num_cols = df.select_dtypes(include=np.number).columns.drop('Accident_Severity')
df[num_cols] = scaler.fit_transform(df[num_cols])

In [None]:
# STEP 4: FEATURE ENGINEERING
df['Hour'] = pd.to_datetime(df['Time'], errors='coerce').dt.hour.fillna(0).astype(int)
df['Day_of_Week'] = pd.to_datetime(df['Date'], errors='coerce').dt.dayofweek.fillna(0).astype(int)
df['Is_Weekend'] = df['Day_of_Week'].isin([5, 6]).astype(int)
df['Is_Night'] = df['Light_Conditions'].isin(['Darkness', 'Dark']).astype(int)
df['Is_Bad_Weather'] = df['Weather_Conditions'].isin(['Rain', 'Snow', 'Fog']).astype(int)

In [None]:
# STEP 5: TRAIN-TEST SPLIT
X = df.drop('Accident_Severity', axis=1)
y = df['Accident_Severity']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# STEP 6: MODEL TRAINING
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
# STEP 7: EVALUATION
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
plt.figure(figsize=(6, 4))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()