In [10]:
import argparse
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import seaborn as sns

In [25]:
# Load the dataset
data = pd.read_csv('datasets_try/processed_features.csv')

# Check the distribution of the 'action' column
action_distribution = data['action'].value_counts()
print(action_distribution)

1.0    2475
4.0    2081
2.0     226
0.0     189
3.0      29
Name: action, dtype: int64


In [26]:
# Create a binary target variable
data['binary_target'] = data['action'].apply(lambda x: 1 if x in [1, 4] else 0)

# Split the data into features and binary target
X_binary = data.drop(columns=['action', 'binary_target'])
y_binary = data['binary_target']

# Split the dataset into train, test, and eval sets
X_train_binary, X_temp_binary, y_train_binary, y_temp_binary = train_test_split(X_binary, y_binary, test_size=0.3, random_state=42)
X_test_binary, X_eval_binary, y_test_binary, y_eval_binary = train_test_split(X_temp_binary, y_temp_binary, test_size=0.5, random_state=42)

# Train the binary classifier
binary_rf_model = RandomForestClassifier(random_state=42)
binary_rf_model.fit(X_train_binary, y_train_binary)

# Evaluate the binary classifier
y_pred_binary_test = binary_rf_model.predict(X_test_binary)
accuracy_binary_test = accuracy_score(y_test_binary, y_pred_binary_test)
print(f"Binary Classifier Accuracy on Test Data: {accuracy_binary_test * 100:.2f}%")

# Save the binary classifier
joblib.dump(binary_rf_model, 'models_try/binary_rf_model.pkl')
print(data[:5])

Binary Classifier Accuracy on Test Data: 92.80%
   vehicles_in_ego_lane  vehicles_in_left_lane  vehicles_in_right_lane  \
0                   1.0                    3.0                     4.0   
1                   2.0                    1.0                     1.0   
2                   4.0                    0.0                     4.0   
3                   2.0                    1.0                     0.0   
4                   3.0                    0.0                     1.0   

   closest_in_ego_lane_dist  closest_left_lane_dist  closest_right_lane_dist  \
0                  61.76268                11.00473                 21.79465   
1                  31.40597                52.71649                 84.88040   
2                  10.71805                 0.00000                 21.06313   
3                  10.47482                31.08163                  0.00000   
4                  11.60712                 0.00000                 96.90985   

   relative_velocity_ego_l

minor action classifier

In [23]:
# Filter the data for classes 0, 2, and 3
filtered_data = data[data['binary_target'] == 0]

# Split the dataset into features and target
X = filtered_data.drop(columns=['action', 'binary_target'])
y = filtered_data['action']  # Multi-class target

# Split into train, test, and eval sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_test, X_eval, y_test, y_eval = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Train a single Random Forest classifier
rf_model_minor = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_minor.fit(X_train, y_train)

# Make predictions
y_pred_test = rf_model_minor.predict(X_test)
y_pred_eval = rf_model_minor.predict(X_eval)

# Evaluate performance
print("Test Accuracy:", accuracy_score(y_test, y_pred_test))
print("Evaluation Accuracy:", accuracy_score(y_eval, y_pred_eval))
print("\nClassification Report on Test Data:\n", classification_report(y_test, y_pred_test))

# Save the binary classifier
joblib.dump(rf_model_minor, 'models_try/minor_rf_model.pkl')


Test Accuracy: 0.9850746268656716
Evaluation Accuracy: 1.0

Classification Report on Test Data:
               precision    recall  f1-score   support

         0.0       1.00      0.97      0.98        31
         2.0       0.97      1.00      0.98        32
         3.0       1.00      1.00      1.00         4

    accuracy                           0.99        67
   macro avg       0.99      0.99      0.99        67
weighted avg       0.99      0.99      0.99        67



['models_try/minor_rf_model.pkl']

major action classifier

In [22]:
# Filter the data for classes 1 and 4
filtered_data = data[data['binary_target'] == 1]

# Split the dataset into features and target
X = filtered_data.drop(columns=['action', 'binary_target'])
y = filtered_data['action']  # Multi-class target

# Split into train, test, and eval sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_test, X_eval, y_test, y_eval = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Train a single Random Forest classifier
rf_model_major = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_major.fit(X_train, y_train)

# Make predictions
y_pred_test = rf_model_major.predict(X_test)
y_pred_eval = rf_model_major.predict(X_eval)

# Evaluate performance
print("Test Accuracy:", accuracy_score(y_test, y_pred_test))
print("Evaluation Accuracy:", accuracy_score(y_eval, y_pred_eval))
print("\nClassification Report on Test Data:\n", classification_report(y_test, y_pred_test))

# Save the binary classifier
joblib.dump(rf_model_major, 'models_try/major_rf_model.pkl')


Test Accuracy: 0.8755490483162518
Evaluation Accuracy: 0.8581871345029239

Classification Report on Test Data:
               precision    recall  f1-score   support

         1.0       0.86      0.91      0.89       367
         4.0       0.89      0.83      0.86       316

    accuracy                           0.88       683
   macro avg       0.88      0.87      0.87       683
weighted avg       0.88      0.88      0.88       683



['models_try/major_rf_model.pkl']