In [24]:
import argparse
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.utils import resample
import numpy as np
import xgboost as xgb
from sklearn.svm import SVC

In [13]:
# Load the dataset
data = pd.read_csv('datasets_try/processed_features.csv')

# Check the distribution of the 'action' column
action_distribution = data['action'].value_counts()
print(action_distribution)

1.0    2475
4.0    2081
2.0     226
0.0     189
3.0      29
Name: action, dtype: int64


Normal


In [28]:
# Create a binary target variable
data['binary_target'] = data['action'].apply(lambda x: 1 if x in [1, 4] else 0)

# Split the data into features and binary target
X_binary = data.drop(columns=['action', 'binary_target'])
y_binary = data['binary_target']

# Split the dataset into train, test, and eval sets
X_train_binary, X_temp_binary, y_train_binary, y_temp_binary = train_test_split(X_binary, y_binary, test_size=0.2, random_state=42)
X_test_binary, X_eval_binary, y_test_binary, y_eval_binary = train_test_split(X_temp_binary, y_temp_binary, test_size=0.5, random_state=42)

# Train the binary classifier
binary_rf_model = RandomForestClassifier(random_state=42)
binary_rf_model.fit(X_train_binary, y_train_binary)

# Evaluate the binary classifier
y_pred_binary_test = binary_rf_model.predict(X_test_binary)
accuracy_binary_test = accuracy_score(y_test_binary, y_pred_binary_test)
print(f"Binary Classifier Accuracy on Test Data: {accuracy_binary_test * 100:.2f}%")
# Calculate precision, recall, and F1-score
precision_test = precision_score(y_test_binary, y_pred_binary_test)
recall_test = recall_score(y_test_binary, y_pred_binary_test)
f1_test = f1_score(y_test_binary, y_pred_binary_test)

# Print metrics
print(f"Precision on Test Data: {precision_test * 100:.2f}%")
print(f"Recall on Test Data: {recall_test * 100:.2f}%")
print(f"F1-Score on Test Data: {f1_test * 100:.2f}%")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test_binary, y_pred_binary_test))

# Generate a confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_binary, y_pred_binary_test))

# Save the binary classifier
joblib.dump(binary_rf_model, 'models_try/binary_rf_model.pkl')
#print(data[5:10])

Binary Classifier Accuracy on Test Data: 95.00%
Precision on Test Data: 96.19%
Recall on Test Data: 98.48%
F1-Score on Test Data: 97.33%

Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.53      0.62        38
           1       0.96      0.98      0.97       462

    accuracy                           0.95       500
   macro avg       0.85      0.76      0.79       500
weighted avg       0.95      0.95      0.95       500


Confusion Matrix:
[[ 20  18]
 [  7 455]]


['models_try/binary_rf_model.pkl']

downsampling

In [27]:

# Assuming 'data' is your DataFrame
# Create binary target variable
data['binary_target'] = data['action'].apply(lambda x: 1 if x in [1, 4] else 0)

# Separate majority and minority classes
majority_class = data[data['binary_target'] == 1]
minority_class = data[data['binary_target'] == 0]

# Downsample majority class
majority_downsampled = resample(majority_class, 
                                replace=True,    # Sample without replacement
                                n_samples=len(minority_class),  # Match minority class size
                                random_state=42)

# Combine minority class with downsampled majority class
balanced_data = pd.concat([majority_downsampled, minority_class])

# Split the balanced data into features and target
X_binary = balanced_data.drop(columns=['action', 'binary_target'])
y_binary = balanced_data['binary_target']

# Split the dataset into train, test, and eval sets
X_train_binary, X_temp_binary, y_train_binary, y_temp_binary = train_test_split(X_binary, y_binary, test_size=0.2, random_state=42)
X_test_binary, X_eval_binary, y_test_binary, y_eval_binary = train_test_split(X_temp_binary, y_temp_binary, test_size=0.5, random_state=42)

# Train the binary classifier
binary_rf_model = RandomForestClassifier(random_state=42)
binary_rf_model.fit(X_train_binary, y_train_binary)

balanced_data.to_csv('datasets_try/downsampled_data.csv', index=False)

# Evaluate the binary classifier
y_pred_binary_test = binary_rf_model.predict(X_test_binary)
accuracy_binary_test = accuracy_score(y_test_binary, y_pred_binary_test)
print(f"Binary Classifier Accuracy on Test Data: {accuracy_binary_test * 100:.2f}%")

print(f"Precision on Test Data: {precision_test * 100:.2f}%")
print(f"Recall on Test Data: {recall_test * 100:.2f}%")
print(f"F1-Score on Test Data: {f1_test * 100:.2f}%")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test_binary, y_pred_binary_test))

# Generate a confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_binary, y_pred_binary_test))


# Save the binary classifier
joblib.dump(binary_rf_model, 'models_try/binary_rf_model_down.pkl')
print(balanced_data['action'].value_counts())

Binary Classifier Accuracy on Test Data: 85.39%
Precision on Test Data: 67.74%
Recall on Test Data: 95.45%
F1-Score on Test Data: 79.25%

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.84      0.85        45
           1       0.84      0.86      0.85        44

    accuracy                           0.85        89
   macro avg       0.85      0.85      0.85        89
weighted avg       0.85      0.85      0.85        89


Confusion Matrix:
[[38  7]
 [ 6 38]]
1.0    227
2.0    226
4.0    217
0.0    189
3.0     29
Name: action, dtype: int64


xgb

In [23]:

# Initialize the XGBoost model
xgb_model = xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')

# Train the model
xgb_model.fit(X_train_binary, y_train_binary)

# Make predictions on the test set
y_pred_test = xgb_model.predict(X_test_binary)

# Evaluate the model
accuracy_test = accuracy_score(y_test_binary, y_pred_test)
precision_test = precision_score(y_test_binary, y_pred_test)
recall_test = recall_score(y_test_binary, y_pred_test)
f1_test = f1_score(y_test_binary, y_pred_test)

print(f"XGBoost Accuracy on Test Data: {accuracy_test * 100:.2f}%")
print(f"Precision on Test Data: {precision_test * 100:.2f}%")
print(f"Recall on Test Data: {recall_test * 100:.2f}%")
print(f"F1-Score on Test Data: {f1_test * 100:.2f}%")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test_binary, y_pred_test))

# Generate a confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_binary, y_pred_test))

# Save the trained XGBoost model
joblib.dump(xgb_model, 'models_try/xgb_model.pkl')

Parameters: { "use_label_encoder" } are not used.



XGBoost Accuracy on Test Data: 84.27%
Precision on Test Data: 82.61%
Recall on Test Data: 86.36%
F1-Score on Test Data: 84.44%

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.82      0.84        45
           1       0.83      0.86      0.84        44

    accuracy                           0.84        89
   macro avg       0.84      0.84      0.84        89
weighted avg       0.84      0.84      0.84        89


Confusion Matrix:
[[37  8]
 [ 6 38]]


['models_try/xgb_model.pkl']

svm

In [26]:
# Initialize the SVM classifier
svm_model = SVC(kernel='rbf', random_state=42)  # Using radial basis function kernel

# Train the model
svm_model.fit(X_train_binary, y_train_binary)

# Make predictions on the test set
y_pred_test = svm_model.predict(X_test_binary)

# Evaluate the model
accuracy_test = accuracy_score(y_test_binary, y_pred_test)
precision_test = precision_score(y_test_binary, y_pred_test)
recall_test = recall_score(y_test_binary, y_pred_test)
f1_test = f1_score(y_test_binary, y_pred_test)

print(f"svm Accuracy on Test Data: {accuracy_test * 100:.2f}%")
print(f"Precision on Test Data: {precision_test * 100:.2f}%")
print(f"Recall on Test Data: {recall_test * 100:.2f}%")
print(f"F1-Score on Test Data: {f1_test * 100:.2f}%")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test_binary, y_pred_test))

# Generate a confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_binary, y_pred_test))

# Save the trained XGBoost model
joblib.dump(xgb_model, 'models_try/xgb_model.pkl')

svm Accuracy on Test Data: 75.28%
Precision on Test Data: 67.74%
Recall on Test Data: 95.45%
F1-Score on Test Data: 79.25%

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.56      0.69        45
           1       0.68      0.95      0.79        44

    accuracy                           0.75        89
   macro avg       0.80      0.76      0.74        89
weighted avg       0.80      0.75      0.74        89


Confusion Matrix:
[[25 20]
 [ 2 42]]


['models_try/xgb_model.pkl']

class  weights

In [6]:
# Create binary target variable
data['binary_target'] = data['action'].apply(lambda x: 1 if x in [1, 4] else 0)

# Split the data into features and target
X_binary = data.drop(columns=['action', 'binary_target'])
y_binary = data['binary_target']

# Split the dataset into train, test, and eval sets
X_train_binary, X_temp_binary, y_train_binary, y_temp_binary = train_test_split(X_binary, y_binary, test_size=0.2, random_state=42)
X_test_binary, X_eval_binary, y_test_binary, y_eval_binary = train_test_split(X_temp_binary, y_temp_binary, test_size=0.5, random_state=42)

# Train the binary classifier with class weights
binary_rf_model = RandomForestClassifier(class_weight='balanced', random_state=42)
binary_rf_model.fit(X_train_binary, y_train_binary)

# Evaluate the binary classifier
y_pred_binary_test = binary_rf_model.predict(X_test_binary)
accuracy_binary_test = accuracy_score(y_test_binary, y_pred_binary_test)
print(f"Binary Classifier Accuracy on Test Data: {accuracy_binary_test * 100:.2f}%")
print(f"Precision on Test Data: {precision_test * 100:.2f}%")
print(f"Recall on Test Data: {recall_test * 100:.2f}%")
print(f"F1-Score on Test Data: {f1_test * 100:.2f}%")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test_binary, y_pred_binary_test))

# Generate a confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_binary, y_pred_binary_test))

# Save the binary classifier
joblib.dump(binary_rf_model, 'models_try/binary_rf_model.pkl')

Binary Classifier Accuracy on Test Data: 94.60%
Precision on Test Data: 96.19%
Recall on Test Data: 98.48%
F1-Score on Test Data: 97.33%

Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.39      0.53        38
           1       0.95      0.99      0.97       462

    accuracy                           0.95       500
   macro avg       0.87      0.69      0.75       500
weighted avg       0.94      0.95      0.94       500


Confusion Matrix:
[[ 15  23]
 [  4 458]]


['models_try/binary_rf_model.pkl']

minor action classifier

In [None]:
# Filter the data for classes 0, 2, and 3
filtered_data = data[data['binary_target'] == 0]
print(filtered_data[0:5])

# Split the dataset into features and target
X = filtered_data.drop(columns=['action', 'binary_target'])
y = filtered_data['action']  # Multi-class target

# Split into train, test, and eval sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_test, X_eval, y_test, y_eval = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Train a single Random Forest classifier
rf_model_minor = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_minor.fit(X_train, y_train)

# Make predictions
y_pred_test = rf_model_minor.predict(X_test)
y_pred_eval = rf_model_minor.predict(X_eval)

# Evaluate performance
print("Test Accuracy:", accuracy_score(y_test, y_pred_test))
print("Evaluation Accuracy:", accuracy_score(y_eval, y_pred_eval))
print("\nClassification Report on Test Data:\n", classification_report(y_test, y_pred_test))

# Save the binary classifier
joblib.dump(rf_model_minor, 'models_try/minor_rf_model.pkl')


    vehicles_in_ego_lane  vehicles_in_left_lane  vehicles_in_right_lane  \
24                   2.0                    1.0                     3.0   
30                   3.0                    3.0                     0.0   
36                   3.0                    2.0                     0.0   
41                   2.0                    0.0                     3.0   
51                   3.0                    0.0                     0.0   

    closest_in_ego_lane_dist  closest_left_lane_dist  closest_right_lane_dist  \
24                  18.81757                 9.06516                 40.23717   
30                   9.58334                29.39624                  0.00000   
36                   9.92166                19.45256                  0.00000   
41                  20.96758                 0.00000                 51.98688   
51                  19.54549             10000.00000                  0.00000   

    relative_velocity_ego_lane  relative_velocity_left_lane  \

['models_try/minor_rf_model.pkl']

major action classifier

In [None]:
# Filter the data for classes 1 and 4
filtered_data = data[data['binary_target'] == 1]
print(filtered_data[0:5])

# Split the dataset into features and target
X = filtered_data.drop(columns=['action', 'binary_target'])
y = filtered_data['action']  # Multi-class target

# Split into train, test, and eval sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_test, X_eval, y_test, y_eval = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Train a single Random Forest classifier
rf_model_major = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_major.fit(X_train, y_train)

# Make predictions
y_pred_test = rf_model_major.predict(X_test)
y_pred_eval = rf_model_major.predict(X_eval)

# Evaluate performance
print("Test Accuracy:", accuracy_score(y_test, y_pred_test))
print("Evaluation Accuracy:", accuracy_score(y_eval, y_pred_eval))
print("\nClassification Report on Test Data:\n", classification_report(y_test, y_pred_test))

# Save the binary classifier
joblib.dump(rf_model_major, 'models_try/major_rf_model.pkl')


   vehicles_in_ego_lane  vehicles_in_left_lane  vehicles_in_right_lane  \
0                   1.0                    3.0                     4.0   
1                   2.0                    1.0                     1.0   
2                   4.0                    0.0                     4.0   
3                   2.0                    1.0                     0.0   
4                   3.0                    0.0                     1.0   

   closest_in_ego_lane_dist  closest_left_lane_dist  closest_right_lane_dist  \
0                  61.76268                11.00473                 21.79465   
1                  31.40597                52.71649                 84.88040   
2                  10.71805                 0.00000                 21.06313   
3                  10.47482                31.08163                  0.00000   
4                  11.60712                 0.00000                 96.90985   

   relative_velocity_ego_lane  relative_velocity_left_lane  \
0           

['models_try/major_rf_model.pkl']