In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier
import joblib

# 1. Generate synthetic but more realistic data
np.random.seed(42)
n_samples = 5000

data = {
    'v': np.clip(np.random.normal(loc=60, scale=20, size=n_samples), 0, 150),
    'x': np.clip(np.random.exponential(scale=25, size=n_samples), 0, 100),
    'y': np.random.beta(2, 5, n_samples),  # more low intent cases
    'brake': np.clip(np.random.normal(loc=40, scale=20, size=n_samples), 0, 100),
    'steer': np.random.uniform(-45, 45, n_samples),
    'a': np.clip(np.random.normal(loc=0, scale=3, size=n_samples), -10, 10),
    'angle_to_ped': np.random.uniform(0, 180, n_samples),
    'μ': np.random.uniform(0.2, 1.0, n_samples),
    'vis': np.random.uniform(0.2, 1.0, n_samples),
    'bdist': np.clip(np.random.normal(loc=50, scale=20, size=n_samples), 0, 120),
}
df = pd.DataFrame(data)

In [2]:
df

Unnamed: 0,v,x,y,brake,steer,a,angle_to_ped,μ,vis,bdist
0,69.934283,4.605181,0.168286,61.979400,37.396980,-4.490590,174.564089,0.424493,0.796490,78.286737
1,57.234714,5.266012,0.126683,28.840462,-11.918788,1.447129,132.861602,0.337127,0.907841,58.803089
2,72.953771,15.437646,0.348115,44.650267,2.145172,3.668875,177.302129,0.406360,0.395427,42.871299
3,90.460597,8.431609,0.325335,23.697234,-30.417645,-1.268347,133.819330,0.260643,0.931681,77.329793
4,55.316933,7.107627,0.565656,36.998563,21.237588,8.370694,91.425397,0.473247,0.689553,37.274813
...,...,...,...,...,...,...,...,...,...,...
4995,59.020699,3.778859,0.277940,66.413989,-27.910370,0.864086,76.017193,0.792686,0.430055,83.143004
4996,74.228212,54.412937,0.074583,0.000000,29.469531,-6.433805,43.620988,0.728415,0.742171,32.358146
4997,122.258204,32.712290,0.232355,31.787149,-16.907642,5.993638,33.993678,0.266326,0.463922,59.779899
4998,76.160724,4.498218,0.331822,29.839825,3.216319,-3.921280,25.117509,0.935204,0.353211,59.778705


In [3]:
conditions = [
    # A3 (Emergency)
    ((df['x'] < 15) & (df['y'] > 0.75) & (df['v'] > 60) & (df['brake'] < 20)),
    ((df['x'] < 12) & (df['y'] > 0.7) & (df['a'] > 2) & (df['μ'] < 0.4)),
    ((df['x'] < 20) & (df['y'] > 0.8) & (df['angle_to_ped'] < 20)),

    # A2 (Alert)
    ((df['x'] < 30) & (df['y'] > 0.6) & (df['v'] > 40)),
    ((df['x'] < 40) & (df['y'] > 0.5) & (df['steer'].abs() > 12)),
    ((df['x'] < 50) & (df['a'] > 3) & (df['brake'] < 30)),
]

labels = [2, 2, 2, 1, 1, 1]
df['risk_level'] = np.select(conditions, labels, default=0)

In [5]:
# df.head()
df['ttc'] = df['x'] / (df['v'] + 0.1)  # Time to collision
df['risk_score'] = df['y'] * df['v'] / (df['x'] + 1)

# 4. Prepare training
features = df.drop('risk_level', axis=1)
labels = df['risk_level']

In [7]:
# X_train, X_test, y_train, y_test = train_test_split(
#     features, labels, test_size=0.2, stratify=labels, random_state=42
# )
X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, random_state=42
)

# 5. Train optimized model
model = XGBClassifier(
    objective='multi:softprob',
    num_class=3,
    eval_metric='mlogloss',
    use_label_encoder=False,
    max_depth=6,
    learning_rate=0.05,
    n_estimators=300,
    subsample=0.9,
    colsample_bytree=0.8,
    random_state=42
)

In [8]:
model.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [10]:
from sklearn.metrics import classification_report

print(classification_report(
    y_test,
    y_pred,
    labels=[0, 1, 2],  # force all 3 labels
    target_names=["A1: Safe", "A2: Alert", "A3: Emergency"],
    zero_division=0  # avoids divide-by-zero warning
))


               precision    recall  f1-score   support

     A1: Safe       1.00      1.00      1.00       891
    A2: Alert       0.98      0.99      0.99       109
A3: Emergency       0.00      0.00      0.00         0

    micro avg       1.00      1.00      1.00      1000
    macro avg       0.66      0.66      0.66      1000
 weighted avg       1.00      1.00      1.00      1000



In [11]:
print("🧮 Confusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))

🧮 Confusion Matrix:

[[889   2]
 [  1 108]]


In [13]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from xgboost import XGBClassifier
import joblib

# ----------------------------
# 1. Set Random Seed
# ----------------------------
np.random.seed(42)

# ----------------------------
# 2. Generate Synthetic Data
# ----------------------------
n_samples = 2000  # Increased to ensure better class distribution

data = {
    'v': np.random.uniform(0, 150, n_samples),            # Speed (km/h)
    'x': np.random.uniform(0, 100, n_samples),            # Distance to pedestrian (m)
    'y': np.random.uniform(0, 1, n_samples),              # Pedestrian intent
    'brake': np.random.uniform(0, 100, n_samples),        # Brake %
    'steer': np.random.uniform(-45, 45, n_samples),       # Steering angle (deg)
    'a': np.random.uniform(-10, 10, n_samples),           # Acceleration (m/s²)
    'angle_to_ped': np.random.uniform(0, 180, n_samples), # Angle to pedestrian (deg)
    'μ': np.random.uniform(0.1, 1.0, n_samples),          # Friction coefficient
    'vis': np.random.uniform(0, 1, n_samples),            # Visibility
    'bdist': np.random.uniform(0, 120, n_samples),        # Braking distance
}
df = pd.DataFrame(data)

In [14]:
# 3. Define Risk Labeling Logic
# ----------------------------
conditions = [
    # A3: Emergency
    ((df['x'] < 15) & (df['y'] > 0.8) & (df['v'] > 50) & (df['brake'] < 20)),
    ((df['x'] < 10) & (df['y'] > 0.6) & (df['a'] > 2) & (df['μ'] < 0.4)),
    ((df['x'] < 20) & (df['y'] > 0.7) & (df['angle_to_ped'] < 30)),

    # A2: Alert
    ((df['x'] < 30) & (df['y'] > 0.5) & (df['v'] > 40)),
    ((df['x'] < 35) & (df['y'] > 0.6) & (df['steer'].abs() > 10)),
    ((df['x'] < 40) & (df['a'] > 3) & (df['brake'] < 30)),
]

labels = [2, 2, 2, 1, 1, 1]  # 2 = A3, 1 = A2, 0 = A1 (default)
df['risk_level'] = np.select(conditions, labels, default=0)

# Ensure class balance for train/test split
label_counts = df['risk_level'].value_counts()
print("Label Distribution Before Split:\n", label_counts)


Label Distribution Before Split:
 0    1664
1     312
2      24
Name: risk_level, dtype: int64


In [15]:
# 4. Prepare Features and Labels
# ----------------------------
X = df.drop('risk_level', axis=1)
y = df['risk_level']

# ----------------------------
# 5. Train-Test Split
# ----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [16]:
# 6. Initialize and Train XGBoost Model
# ----------------------------
model = XGBClassifier(
    objective='multi:softprob',
    num_class=3,
    eval_metric='mlogloss',
    use_label_encoder=False,
    max_depth=6,
    learning_rate=0.1,
    n_estimators=100,
    random_state=42
)

model.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [17]:
# 7. Evaluate Model
# ----------------------------
y_pred = model.predict(X_test)

print("\n📊 Classification Report:\n")
print(classification_report(
    y_test,
    y_pred,
    labels=[0, 1, 2],
    target_names=["A1: Safe", "A2: Alert", "A3: Emergency"],
    zero_division=0
))


📊 Classification Report:

               precision    recall  f1-score   support

     A1: Safe       0.99      0.99      0.99       333
    A2: Alert       0.86      0.92      0.89        62
A3: Emergency       0.00      0.00      0.00         5

     accuracy                           0.96       400
    macro avg       0.62      0.64      0.63       400
 weighted avg       0.96      0.96      0.96       400



In [18]:
joblib.dump(model, 'xgb_risk_model2.pkl')
print("\n✅ Model saved as 'xgb_risk_model2.pkl'")


✅ Model saved as 'xgb_risk_model2.pkl'
