In [29]:
import argparse
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, precision_score, recall_score, f1_score
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.utils import resample
import numpy as np
import xgboost as xgb
from sklearn.svm import SVC

In [45]:
def merge_and_sort_csv(file1, file2, output_file):
    # Load the CSV files into pandas DataFrames
    df1 = pd.read_csv(file1)
    df2 = pd.read_csv(file2)

    # Combine the data from both DataFrames
    combined_df = pd.concat([df1, df2], ignore_index=True)

    # Sort the combined data by the 'action' column (ascending order)
    sorted_df = combined_df.sort_values(by='action', ascending=True)

    # Save the sorted data to a new CSV file
    sorted_df.to_csv(output_file, index=False)

    print(f"Data merged and sorted successfully. Saved to {output_file}")

# Example usage
file1 = 'datasets_try/processed_features5.csv'  # Path to your first CSV file
file2 = 'datasets_try/processed_features4.csv'  # Path to your second CSV file
output_file = 'datasets_try/merged_sorted_data.csv'  # Output CSV file

merge_and_sort_csv(file1, file2, output_file)


Data merged and sorted successfully. Saved to datasets_try/merged_sorted_data.csv


In [46]:
# Load the dataset
data = pd.read_csv('datasets_try/merged_sorted_data.csv')

# Check the distribution of the 'action' column
action_distribution = data['action'].value_counts()
print(action_distribution)

1.0    3477
4.0    2575
3.0     414
2.0     307
0.0     229
Name: action, dtype: int64


Normal


In [52]:
# Create a binary target variable
data['binary_target'] = data['action'].apply(lambda x: 1 if x in [1, 4] else 0)

# Split the data into features and binary target
X_binary = data.drop(columns=['action', 'binary_target'])
y_binary = data['binary_target']

# Split the dataset into train, test, and eval sets
X_train_binary, X_temp_binary, y_train_binary, y_temp_binary = train_test_split(X_binary, y_binary, test_size=0.2, random_state=42)
X_test_binary, X_eval_binary, y_test_binary, y_eval_binary = train_test_split(X_temp_binary, y_temp_binary, test_size=0.5, random_state=42)

# Train the binary classifier
binary_rf_model = RandomForestClassifier(random_state=42)
binary_rf_model.fit(X_train_binary, y_train_binary)

# Evaluate the binary classifier
y_pred_binary_test = binary_rf_model.predict(X_test_binary)
accuracy_binary_test = accuracy_score(y_test_binary, y_pred_binary_test)
print(f"Binary Classifier Accuracy on Test Data: {accuracy_binary_test * 100:.2f}%")
# Calculate precision, recall, and F1-score
precision_test = precision_score(y_test_binary, y_pred_binary_test)
recall_test = recall_score(y_test_binary, y_pred_binary_test)
f1_test = f1_score(y_test_binary, y_pred_binary_test)

# Print metrics
print(f"Precision on Test Data: {precision_test * 100:.2f}%")
print(f"Recall on Test Data: {recall_test * 100:.2f}%")
print(f"F1-Score on Test Data: {f1_test * 100:.2f}%")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test_binary, y_pred_binary_test))

# Generate a confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_binary, y_pred_binary_test))

# Save the binary classifier
joblib.dump(binary_rf_model, 'models_try/binary_rf_model.pkl')
#print(data[5:10])

Binary Classifier Accuracy on Test Data: 89.29%
Precision on Test Data: 91.47%
Recall on Test Data: 96.72%
F1-Score on Test Data: 94.02%

Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.39      0.48        90
           1       0.91      0.97      0.94       610

    accuracy                           0.89       700
   macro avg       0.78      0.68      0.71       700
weighted avg       0.88      0.89      0.88       700


Confusion Matrix:
[[ 35  55]
 [ 20 590]]


['models_try/binary_rf_model.pkl']

downsampling

In [48]:

# Assuming 'data' is your DataFrame
# Create binary target variable
data['binary_target'] = data['action'].apply(lambda x: 1 if x in [1, 4] else 0)

# Separate majority and minority classes
majority_class = data[data['binary_target'] == 1]
minority_class = data[data['binary_target'] == 0]

# Downsample majority class
majority_downsampled = resample(majority_class, 
                                replace=True,    # Sample without replacement
                                n_samples=len(minority_class),  # Match minority class size
                                random_state=42)

# Combine minority class with downsampled majority class
balanced_data = pd.concat([majority_downsampled, minority_class])

# Split the balanced data into features and target
X_binary = balanced_data.drop(columns=['action', 'binary_target'])
y_binary = balanced_data['binary_target']

# Split the dataset into train, test, and eval sets
X_train_binary, X_temp_binary, y_train_binary, y_temp_binary = train_test_split(X_binary, y_binary, test_size=0.2, random_state=42)
X_test_binary, X_eval_binary, y_test_binary, y_eval_binary = train_test_split(X_temp_binary, y_temp_binary, test_size=0.5, random_state=42)

# Train the binary classifier
binary_rf_model = RandomForestClassifier(random_state=42)
binary_rf_model.fit(X_train_binary, y_train_binary)

balanced_data.to_csv('datasets_try/downsampled_data.csv', index=False)

# Evaluate the binary classifier
y_pred_binary_test = binary_rf_model.predict(X_test_binary)
accuracy_binary_test = accuracy_score(y_test_binary, y_pred_binary_test)
print(f"Binary Classifier Accuracy on Test Data: {accuracy_binary_test * 100:.2f}%")

print(f"Precision on Test Data: {precision_test * 100:.2f}%")
print(f"Recall on Test Data: {recall_test * 100:.2f}%")
print(f"F1-Score on Test Data: {f1_test * 100:.2f}%")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test_binary, y_pred_binary_test))

# Generate a confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_binary, y_pred_binary_test))


# Save the binary classifier
joblib.dump(binary_rf_model, 'models_try/binary_rf_model_down.pkl')
print(balanced_data['action'].value_counts())

Binary Classifier Accuracy on Test Data: 79.47%
Precision on Test Data: 91.47%
Recall on Test Data: 96.72%
F1-Score on Test Data: 94.02%

Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.78      0.79        92
           1       0.80      0.81      0.80        98

    accuracy                           0.79       190
   macro avg       0.79      0.79      0.79       190
weighted avg       0.79      0.79      0.79       190


Confusion Matrix:
[[72 20]
 [19 79]]
1.0    535
4.0    415
3.0    414
2.0    307
0.0    229
Name: action, dtype: int64


xgb

In [23]:

# Initialize the XGBoost model
xgb_model = xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss')

# Train the model
xgb_model.fit(X_train_binary, y_train_binary)

# Make predictions on the test set
y_pred_test = xgb_model.predict(X_test_binary)

# Evaluate the model
accuracy_test = accuracy_score(y_test_binary, y_pred_test)
precision_test = precision_score(y_test_binary, y_pred_test)
recall_test = recall_score(y_test_binary, y_pred_test)
f1_test = f1_score(y_test_binary, y_pred_test)

print(f"XGBoost Accuracy on Test Data: {accuracy_test * 100:.2f}%")
print(f"Precision on Test Data: {precision_test * 100:.2f}%")
print(f"Recall on Test Data: {recall_test * 100:.2f}%")
print(f"F1-Score on Test Data: {f1_test * 100:.2f}%")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test_binary, y_pred_test))

# Generate a confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_binary, y_pred_test))

# Save the trained XGBoost model
joblib.dump(xgb_model, 'models_try/xgb_model.pkl')

Parameters: { "use_label_encoder" } are not used.



XGBoost Accuracy on Test Data: 84.27%
Precision on Test Data: 82.61%
Recall on Test Data: 86.36%
F1-Score on Test Data: 84.44%

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.82      0.84        45
           1       0.83      0.86      0.84        44

    accuracy                           0.84        89
   macro avg       0.84      0.84      0.84        89
weighted avg       0.84      0.84      0.84        89


Confusion Matrix:
[[37  8]
 [ 6 38]]


['models_try/xgb_model.pkl']

svm

In [26]:
# Initialize the SVM classifier
svm_model = SVC(kernel='rbf', random_state=42)  # Using radial basis function kernel

# Train the model
svm_model.fit(X_train_binary, y_train_binary)

# Make predictions on the test set
y_pred_test = svm_model.predict(X_test_binary)

# Evaluate the model
accuracy_test = accuracy_score(y_test_binary, y_pred_test)
precision_test = precision_score(y_test_binary, y_pred_test)
recall_test = recall_score(y_test_binary, y_pred_test)
f1_test = f1_score(y_test_binary, y_pred_test)

print(f"svm Accuracy on Test Data: {accuracy_test * 100:.2f}%")
print(f"Precision on Test Data: {precision_test * 100:.2f}%")
print(f"Recall on Test Data: {recall_test * 100:.2f}%")
print(f"F1-Score on Test Data: {f1_test * 100:.2f}%")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test_binary, y_pred_test))

# Generate a confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_binary, y_pred_test))

# Save the trained XGBoost model
joblib.dump(xgb_model, 'models_try/xgb_model.pkl')

svm Accuracy on Test Data: 75.28%
Precision on Test Data: 67.74%
Recall on Test Data: 95.45%
F1-Score on Test Data: 79.25%

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.56      0.69        45
           1       0.68      0.95      0.79        44

    accuracy                           0.75        89
   macro avg       0.80      0.76      0.74        89
weighted avg       0.80      0.75      0.74        89


Confusion Matrix:
[[25 20]
 [ 2 42]]


['models_try/xgb_model.pkl']

class  weights

In [49]:
# Create binary target variable
data['binary_target'] = data['action'].apply(lambda x: 1 if x in [1, 4] else 0)

# Split the data into features and target
X_binary = data.drop(columns=['action', 'binary_target'])
y_binary = data['binary_target']

# Split the dataset into train, test, and eval sets
X_train_binary, X_temp_binary, y_train_binary, y_temp_binary = train_test_split(X_binary, y_binary, test_size=0.2, random_state=42)
X_test_binary, X_eval_binary, y_test_binary, y_eval_binary = train_test_split(X_temp_binary, y_temp_binary, test_size=0.5, random_state=42)

# Train the binary classifier with class weights
binary_rf_model = RandomForestClassifier(class_weight='balanced', random_state=42)
binary_rf_model.fit(X_train_binary, y_train_binary)

# Evaluate the binary classifier
y_pred_binary_test = binary_rf_model.predict(X_test_binary)
accuracy_binary_test = accuracy_score(y_test_binary, y_pred_binary_test)
print(f"Binary Classifier Accuracy on Test Data: {accuracy_binary_test * 100:.2f}%")
print(f"Precision on Test Data: {precision_test * 100:.2f}%")
print(f"Recall on Test Data: {recall_test * 100:.2f}%")
print(f"F1-Score on Test Data: {f1_test * 100:.2f}%")

# Generate a detailed classification report
print("\nClassification Report:")
print(classification_report(y_test_binary, y_pred_binary_test))

# Generate a confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_binary, y_pred_binary_test))

# Save the binary classifier
joblib.dump(binary_rf_model, 'models_try/binary_rf_model.pkl')

Binary Classifier Accuracy on Test Data: 89.14%
Precision on Test Data: 91.47%
Recall on Test Data: 96.72%
F1-Score on Test Data: 94.02%

Classification Report:
              precision    recall  f1-score   support

           0       0.64      0.36      0.46        90
           1       0.91      0.97      0.94       610

    accuracy                           0.89       700
   macro avg       0.78      0.66      0.70       700
weighted avg       0.88      0.89      0.88       700


Confusion Matrix:
[[ 32  58]
 [ 18 592]]


['models_try/binary_rf_model.pkl']

minor action classifier

In [50]:
# Filter the data for classes 0, 2, and 3
filtered_data = data[data['binary_target'] == 0]
print(filtered_data[0:5])

# Split the dataset into features and target
X = filtered_data.drop(columns=['action', 'binary_target'])
y = filtered_data['action']  # Multi-class target

# Split into train, test, and eval sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_test, X_eval, y_test, y_eval = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Train a single Random Forest classifier
rf_model_minor = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_minor.fit(X_train, y_train)

# Make predictions
y_pred_test = rf_model_minor.predict(X_test)
y_pred_eval = rf_model_minor.predict(X_eval)

# Evaluate performance
print("Test Accuracy:", accuracy_score(y_test, y_pred_test))
print("Evaluation Accuracy:", accuracy_score(y_eval, y_pred_eval))
print("\nClassification Report on Test Data:\n", classification_report(y_test, y_pred_test))

# Save the binary classifier
joblib.dump(rf_model_minor, 'models_try/minor_rf_model.pkl')


   vehicles_in_ego_lane  vehicles_in_left_lane  vehicles_in_right_lane  \
0                   4.0                    1.0                     0.0   
1                   4.0                    2.0                     0.0   
2                   1.0                    3.0                     0.0   
3                   4.0                    0.0                     3.0   
4                   6.0                    0.0                     1.0   

   closest_in_ego_lane_dist  closest_left_lane_dist  closest_right_lane_dist  \
0                  10.01390                 82.7292                  0.00000   
1                  10.74086                 84.2901                  0.00000   
2                  50.06064                 22.3463                  0.00000   
3                  10.81774              10000.0000                 31.06534   
4                  10.85504              10000.0000                 53.04366   

   relative_velocity_ego_lane  relative_velocity_left_lane  \
0           

['models_try/minor_rf_model.pkl']

major action classifier

In [51]:
# Filter the data for classes 1 and 4
filtered_data = data[data['binary_target'] == 1]
print(filtered_data[0:5])

# Split the dataset into features and target
X = filtered_data.drop(columns=['action', 'binary_target'])
y = filtered_data['action']  # Multi-class target

# Split into train, test, and eval sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_test, X_eval, y_test, y_eval = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Train a single Random Forest classifier
rf_model_major = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model_major.fit(X_train, y_train)

# Make predictions
y_pred_test = rf_model_major.predict(X_test)
y_pred_eval = rf_model_major.predict(X_eval)

# Evaluate performance
print("Test Accuracy:", accuracy_score(y_test, y_pred_test))
print("Evaluation Accuracy:", accuracy_score(y_eval, y_pred_eval))
print("\nClassification Report on Test Data:\n", classification_report(y_test, y_pred_test))

# Save the binary classifier
joblib.dump(rf_model_major, 'models_try/major_rf_model.pkl')


     vehicles_in_ego_lane  vehicles_in_left_lane  vehicles_in_right_lane  \
229                   2.0                    0.0                     1.0   
230                   2.0                    0.0                     2.0   
231                   2.0                    2.0                     0.0   
232                   1.0                    1.0                     4.0   
233                   1.0                    4.0                     3.0   

     closest_in_ego_lane_dist  closest_left_lane_dist  \
229                  40.09975                 0.00000   
230                  42.02866                 0.00000   
231                  32.03241                10.65169   
232                  32.28675                85.29086   
233                  32.70313                22.67897   

     closest_right_lane_dist  relative_velocity_ego_lane  \
229                 20.06145                   -2.027105   
230                 21.69714                   -1.779930   
231                 

['models_try/major_rf_model.pkl']