In [1]:
import pandas as pd
import numpy as np
import os
import time
import joblib
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, matthews_corrcoef, accuracy_score, precision_score, recall_score, f1_score
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import ADASYN
from sklearn.model_selection import GridSearchCV, cross_val_score

In [2]:

# Directories
train_directories = [
    './Documents/Research/can-train-and-test/set_01/train_01/',
    './Documents/Research/can-train-and-test/set_02/train_01/',
    './Documents/Research/can-train-and-test/set_03/train_01/',
    './Documents/Research/can-train-and-test/set_04/train_01/'
]

# Test files for DoS attack
dos_files = [
    './Documents/Research/can-train-and-test/set_01/test_01_known_vehicle_known_attack/DoS-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_01_known_vehicle_known_attack/DoS-4.csv',
    './Documents/Research/can-train-and-test/set_01/test_02_unknown_vehicle_known_attack/DoS-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_02_unknown_vehicle_known_attack/DoS-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_03_known_vehicle_unknown_attack/DoS-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_03_known_vehicle_unknown_attack/DoS-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_04_unknown_vehicle_unknown_attack/DoS-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_04_unknown_vehicle_unknown_attack/DoS-4.csv',
    './Documents/Research/can-train-and-test/set_03/test_01_known_vehicle_known_attack/DoS-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_01_known_vehicle_known_attack/DoS-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_02_unknown_vehicle_known_attack/DoS-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_02_unknown_vehicle_known_attack/DoS-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_03_known_vehicle_unknown_attack/DoS-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_03_known_vehicle_unknown_attack/DoS-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_04_unknown_vehicle_unknown_attack/DoS-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_04_unknown_vehicle_unknown_attack/DoS-2.csv'
]

# Test files for Force neutral attack
fn_files = [
    './Documents/Research/can-train-and-test/set_01/test_01_known_vehicle_known_attack/force-neutral-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_01_known_vehicle_known_attack/force-neutral-4.csv',
    './Documents/Research/can-train-and-test/set_01/test_02_unknown_vehicle_known_attack/force-neutral-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_02_unknown_vehicle_known_attack/force-neutral-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_03_known_vehicle_unknown_attack/force-neutral-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_03_known_vehicle_unknown_attack/force-neutral-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_04_unknown_vehicle_unknown_attack/force-neutral-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_04_unknown_vehicle_unknown_attack/force-neutral-4.csv',
    './Documents/Research/can-train-and-test/set_03/test_01_known_vehicle_known_attack/force-neutral-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_01_known_vehicle_known_attack/force-neutral-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_02_unknown_vehicle_known_attack/force-neutral-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_02_unknown_vehicle_known_attack/force-neutral-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_03_known_vehicle_unknown_attack/force-neutral-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_03_known_vehicle_unknown_attack/force-neutral-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_04_unknown_vehicle_unknown_attack/force-neutral-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_04_unknown_vehicle_unknown_attack/force-neutral-2.csv'
]

# Test files for RPM attack
rpm_files = [
    './Documents/Research/can-train-and-test/set_01/test_01_known_vehicle_known_attack/rpm-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_01_known_vehicle_known_attack/rpm-4.csv',
    './Documents/Research/can-train-and-test/set_01/test_02_unknown_vehicle_known_attack/rpm-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_02_unknown_vehicle_known_attack/rpm-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_03_known_vehicle_unknown_attack/rpm-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_03_known_vehicle_unknown_attack/rpm-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_04_unknown_vehicle_unknown_attack/rpm-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_04_unknown_vehicle_unknown_attack/rpm-4.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/rpm-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/rpm-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/rpm-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/rpm-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/rpm-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/rpm-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/rpm-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/rpm-2.csv'
]

# Test files for standstill attack
standstill_files = [
    './Documents/Research/can-train-and-test/set_01/test_01_known_vehicle_known_attack/standstill-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_01_known_vehicle_known_attack/standstill-4.csv',
    './Documents/Research/can-train-and-test/set_01/test_02_unknown_vehicle_known_attack/standstill-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_02_unknown_vehicle_known_attack/standstill-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_03_known_vehicle_unknown_attack/standstill-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_03_known_vehicle_unknown_attack/standstill-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_04_unknown_vehicle_unknown_attack/standstill-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_04_unknown_vehicle_unknown_attack/standstill-4.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/standstill-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/standstill-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/standstill-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/standstill-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/standstill-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/standstill-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/standstill-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/standstill-2.csv'
]

In [3]:
# Function to Load Data
def load_data_from_directory(directory_path):
    data_frames = []
    for file_name in os.listdir(directory_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(directory_path, file_name)
            df = pd.read_csv(file_path)
            data_frames.append(df)
    return pd.concat(data_frames, ignore_index=True)

# Function to Preprocess Data
def hex_to_int(x):
    try:
        return int(str(x), 16)
    except ValueError:
        return 0  # Handle non-hexadecimal values
        
def preprocess_data(df):
    df['timestamp'] = pd.to_datetime(df['timestamp']).astype(np.int64) // 10**9
    df['arbitration_id'] = df['arbitration_id'].apply(hex_to_int)
    df['data_field'] = df['data_field'].apply(hex_to_int)
    return df

# Function to Extract Features and Labels
def extract_features_labels(df, label_col='attack'):
    X = df.drop(columns=label_col)
    y = df[label_col]
    return X, y

# Function to load and combine attack files
def load_combine_attack_files(files):
    data_frames = []
    for file in files:
        df = pd.read_csv(file)
        data_frames.append(df)
    combined_df = pd.concat(data_frames, ignore_index=True)
    return combined_df

In [4]:
# Function to Train Gradient Boosting Model
def train_gradient_boosting(X_train, y_train):
    # Apply Random Under-Sampling for Handling Class Imbalance
    rus = RandomUnderSampler(random_state=42)
    X_train_resampled, y_train_resampled = rus.fit_resample(X_train, y_train)

    # Standardize the Features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_resampled)

    # Hyperparameter tuning for Gradient Boosting
    param_grid = {
        'n_estimators': [100, 200],
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [3, 5, 7],
        'min_samples_split': [2, 5, 10]
    }
    grid_search = GridSearchCV(GradientBoostingClassifier(random_state=42), param_grid, cv=5, scoring='f1')
    
    # Record training start time
    start_time = time.time()
    
    grid_search.fit(X_train_scaled, y_train_resampled)

    # Calculate training time
    training_time = time.time() - start_time
    
    best_gb = grid_search.best_estimator_
    return best_gb, scaler, training_time



In [5]:
# Function to Test Gradient Boosting Model
def test_gradient_boosting(gb, scaler, X_test, y_test):
    # Standardize the Features
    X_test_scaled = scaler.transform(X_test)

    # Record testing start time
    start_time = time.time()
    
    # Predict on Test Data
    y_test_pred = gb.predict(X_test_scaled)

    # Calculate testing time
    testing_time = time.time() - start_time
    
    # Evaluate Test Performance
    conf_matrix_test = confusion_matrix(y_test, y_test_pred)
    mcc_test = matthews_corrcoef(y_test, y_test_pred)
    accuracy_test = accuracy_score(y_test, y_test_pred)
    precision_test = precision_score(y_test, y_test_pred, average='weighted', zero_division=0)
    recall_test = recall_score(y_test, y_test_pred, average='weighted', zero_division=0)
    f1_test = f1_score(y_test, y_test_pred, average='weighted', zero_division=0)
    informedness_test = recall_test - (1 - recall_test)
    markedness_test = precision_test - (1 - precision_test)

    return {
        'conf_matrix': conf_matrix_test,
        'mcc': mcc_test,
        'accuracy': accuracy_test,
        'precision': precision_test,
        'recall': recall_test,
        'f1_score': f1_test,
        'informedness': informedness_test,
        'markedness': markedness_test,
        'testing_time': testing_time
    }


In [6]:
# Load and preprocess training data
df_train = pd.concat([load_data_from_directory(train_dir) for train_dir in train_directories], ignore_index=True)
df_train = preprocess_data(df_train)
X_train, y_train = extract_features_labels(df_train)

# Train Gradient Boosting Model
gb, scaler, training_time = train_gradient_boosting(X_train, y_train)

In [7]:
# Save the trained model and scaler
joblib.dump(gb, 'gradient_boosting_model.pkl')
joblib.dump(scaler, 'gb_scaler.pkl')
joblib.dump(training_time,'gb_training_time.pkl')

['gb_training_time.pkl']

In [8]:
# Load and preprocess testing data - DOS attack
df_test = pd.concat([pd.read_csv(file) for file in dos_files], ignore_index=True)
df_test = preprocess_data(df_test)
X_test, y_test = extract_features_labels(df_test)

# Benchmark testing the Gradient Boosting Model
test_metrics = test_gradient_boosting(gb, scaler, X_test, y_test)
y_pred = gb.predict(X_test)

# Print Combined Testing Metrics
print(f"\n===============================================================================")
print(f"\nTesting Metrics (DOS Attack Test Files):")
print(f"\n-----------------------------------------------------------------------")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
print(f"\n-----------------------------------------------------------------------")
print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
print(f"Informedness: {test_metrics['informedness']:.4f}")
print(f"Markedness: {test_metrics['markedness']:.4f}")





Testing Metrics (DOS Attack Test Files):

-----------------------------------------------------------------------

Training Time: 38217.12 seconds
Testing Time: 52.59 seconds

-----------------------------------------------------------------------
Accuracy: 98.63%
Precision: 98.61%
Recall: 98.63%
F1-Score: 98.62%
Matthews Correlation Coefficient: 0.8564
Informedness: 0.9727
Markedness: 0.9722


In [9]:
# Load and preprocess testing data - Force Neutral
df_test = pd.concat([pd.read_csv(file) for file in fn_files], ignore_index=True)
df_test = preprocess_data(df_test)
X_test, y_test = extract_features_labels(df_test)

# Benchmark testing the Gradient Boosting Model
test_metrics = test_gradient_boosting(gb, scaler, X_test, y_test)
y_pred = gb.predict(X_test)

# Print Combined Testing Metrics
print(f"\n===============================================================================")
print(f"\nTesting Metrics (Force neutral Attack Test Files):")
print(f"\n-----------------------------------------------------------------------")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
print(f"\n-----------------------------------------------------------------------")
print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
print(f"Informedness: {test_metrics['informedness']:.4f}")
print(f"Markedness: {test_metrics['markedness']:.4f}")





Testing Metrics (Force neutral Attack Test Files):

-----------------------------------------------------------------------

Training Time: 38217.12 seconds
Testing Time: 97.88 seconds

-----------------------------------------------------------------------
Accuracy: 98.91%
Precision: 99.87%
Recall: 98.91%
F1-Score: 99.37%
Matthews Correlation Coefficient: 0.1378
Informedness: 0.9782
Markedness: 0.9974


In [10]:
# Load and preprocess testing data
df_test = pd.concat([pd.read_csv(file) for file in rpm_files], ignore_index=True)
df_test = preprocess_data(df_test)
X_test, y_test = extract_features_labels(df_test)

# Benchmark testing the Gradient Boosting Model
test_metrics = test_gradient_boosting(gb, scaler, X_test, y_test)
y_pred = gb.predict(X_test)

# Print Combined Testing Metrics
print(f"\n===============================================================================")
print(f"\nTesting Metrics (RPM Attack Test Files):")
print(f"\n-----------------------------------------------------------------------")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
print(f"\n-----------------------------------------------------------------------")
print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
print(f"Informedness: {test_metrics['informedness']:.4f}")
print(f"Markedness: {test_metrics['markedness']:.4f}")





Testing Metrics (RPM Attack Test Files):

-----------------------------------------------------------------------

Training Time: 38217.12 seconds
Testing Time: 92.69 seconds

-----------------------------------------------------------------------
Accuracy: 99.24%
Precision: 99.84%
Recall: 99.24%
F1-Score: 99.52%
Matthews Correlation Coefficient: 0.2031
Informedness: 0.9847
Markedness: 0.9967


In [11]:
# Load and preprocess testing data
df_test = pd.concat([pd.read_csv(file) for file in standstill_files], ignore_index=True)
df_test = preprocess_data(df_test)
X_test, y_test = extract_features_labels(df_test)

# Benchmark testing the Gradient Boosting Model
test_metrics = test_gradient_boosting(gb, scaler, X_test, y_test)
y_pred = gb.predict(X_test)

# Print Combined Testing Metrics
print(f"\n===============================================================================")
print(f"\nTesting Metrics (Standstill Attack Test Files):")
print(f"\n-----------------------------------------------------------------------")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
print(f"\n-----------------------------------------------------------------------")
print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
print(f"Informedness: {test_metrics['informedness']:.4f}")
print(f"Markedness: {test_metrics['markedness']:.4f}")





Testing Metrics (Standstill Attack Test Files):

-----------------------------------------------------------------------

Training Time: 38217.12 seconds
Testing Time: 66.44 seconds

-----------------------------------------------------------------------
Accuracy: 99.42%
Precision: 99.83%
Recall: 99.42%
F1-Score: 99.58%
Matthews Correlation Coefficient: 0.5315
Informedness: 0.9883
Markedness: 0.9967


In [20]:
# Test files for DoS attack
double_files = [
    './Documents/Research/can-train-and-test/set_01/test_03_known_vehicle_unknown_attack/double-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_03_known_vehicle_unknown_attack/double-4.csv',
    './Documents/Research/can-train-and-test/set_01/test_04_unknown_vehicle_unknown_attack/double-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_04_unknown_vehicle_unknown_attack/double-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_01_known_vehicle_known_attack/double-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_01_known_vehicle_known_attack/double-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_02_unknown_vehicle_known_attack/double-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_02_unknown_vehicle_known_attack/double-4.csv',
    './Documents/Research/can-train-and-test/set_03/test_01_known_vehicle_known_attack/double-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_01_known_vehicle_known_attack/double-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_02_unknown_vehicle_known_attack/double-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_02_unknown_vehicle_known_attack/double-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_03_known_vehicle_unknown_attack/double-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_03_known_vehicle_unknown_attack/double-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_04_unknown_vehicle_unknown_attack/double-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_04_unknown_vehicle_unknown_attack/double-2.csv'
]

# Test files for Force neutral attack
triple_files = [
    './Documents/Research/can-train-and-test/set_01/test_03_known_vehicle_unknown_attack/triple-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_03_known_vehicle_unknown_attack/triple-4.csv',
    './Documents/Research/can-train-and-test/set_01/test_04_unknown_vehicle_unknown_attack/triple-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_04_unknown_vehicle_unknown_attack/triple-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_01_known_vehicle_known_attack/triple-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_01_known_vehicle_known_attack/triple-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_02_unknown_vehicle_known_attack/triple-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_02_unknown_vehicle_known_attack/triple-4.csv',
    './Documents/Research/can-train-and-test/set_03/test_01_known_vehicle_known_attack/triple-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_01_known_vehicle_known_attack/triple-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_02_unknown_vehicle_known_attack/triple-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_02_unknown_vehicle_known_attack/triple-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_03_known_vehicle_unknown_attack/triple-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_03_known_vehicle_unknown_attack/triple-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_04_unknown_vehicle_unknown_attack/triple-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_04_unknown_vehicle_unknown_attack/triple-2.csv'
]

fuzzing_files = [
    './Documents/Research/can-train-and-test/set_01/test_03_known_vehicle_unknown_attack/fuzzing-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_03_known_vehicle_unknown_attack/fuzzing-4.csv',
    './Documents/Research/can-train-and-test/set_01/test_04_unknown_vehicle_unknown_attack/fuzzing-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_04_unknown_vehicle_unknown_attack/fuzzing-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_01_known_vehicle_known_attack/fuzzing-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_01_known_vehicle_known_attack/fuzzing-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_02_unknown_vehicle_known_attack/fuzzing-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_02_unknown_vehicle_known_attack/fuzzing-4.csv',
    './Documents/Research/can-train-and-test/set_03/test_01_known_vehicle_known_attack/fuzzing-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_01_known_vehicle_known_attack/fuzzing-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_02_unknown_vehicle_known_attack/fuzzing-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_02_unknown_vehicle_known_attack/fuzzing-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_03_known_vehicle_unknown_attack/fuzzing-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_03_known_vehicle_unknown_attack/fuzzing-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_04_unknown_vehicle_unknown_attack/fuzzing-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_04_unknown_vehicle_unknown_attack/fuzzing-2.csv'
]

interval_files = [
    './Documents/Research/can-train-and-test/set_01/test_03_known_vehicle_unknown_attack/interval-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_03_known_vehicle_unknown_attack/interval-4.csv',
    './Documents/Research/can-train-and-test/set_01/test_04_unknown_vehicle_unknown_attack/interval-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_04_unknown_vehicle_unknown_attack/interval-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_01_known_vehicle_known_attack/interval-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_01_known_vehicle_known_attack/interval-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_02_unknown_vehicle_known_attack/interval-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_02_unknown_vehicle_known_attack/interval-4.csv',
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/interval-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/interval-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/interval-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/interval-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/interval-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/interval-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/interval-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/interval-2.csv'
]

speed_files = [
    './Documents/Research/can-train-and-test/set_01/test_03_known_vehicle_unknown_attack/speed-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_03_known_vehicle_unknown_attack/speed-4.csv',
    './Documents/Research/can-train-and-test/set_01/test_04_unknown_vehicle_unknown_attack/speed-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_04_unknown_vehicle_unknown_attack/speed-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_01_known_vehicle_known_attack/speed-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_01_known_vehicle_known_attack/speed-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_02_unknown_vehicle_known_attack/speed-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_02_unknown_vehicle_known_attack/speed-4.csv',
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/speed-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/speed-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/speed-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/speed-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/speed-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/speed-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/speed-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/speed-2.csv'
]

systematic_files = [
    './Documents/Research/can-train-and-test/set_01/test_03_known_vehicle_unknown_attack/systematic-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_03_known_vehicle_unknown_attack/systematic-4.csv',
    './Documents/Research/can-train-and-test/set_01/test_04_unknown_vehicle_unknown_attack/systematic-3.csv',
    './Documents/Research/can-train-and-test/set_01/test_04_unknown_vehicle_unknown_attack/systematic-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_01_known_vehicle_known_attack/systematic-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_01_known_vehicle_known_attack/systematic-4.csv',
    './Documents/Research/can-train-and-test/set_02/test_02_unknown_vehicle_known_attack/systematic-3.csv',
    './Documents/Research/can-train-and-test/set_02/test_02_unknown_vehicle_known_attack/systematic-4.csv',
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/systematic-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/systematic-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/systematic-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/systematic-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/systematic-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/systematic-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/systematic-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/systematic-2.csv'
]

rpmaccessory_files = [
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/rpm-accessory-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/rpm-accessory-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/rpm-accessory-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/rpm-accessory-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/rpm-accessory-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/rpm-accessory-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/rpm-accessory-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/rpm-accessory-2.csv'
]

speedaccessory_files = [
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/speed-accessory-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/speed-accessory-2.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/speed-accessory-1.csv',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/speed-accessory-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/speed-accessory-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/speed-accessory-2.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/speed-accessory-1.csv',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/speed-accessory-2.csv'
]

In [14]:
# Load and preprocess testing data
df_test = pd.concat([pd.read_csv(file) for file in double_files], ignore_index=True)
df_test = preprocess_data(df_test)
X_test, y_test = extract_features_labels(df_test)

# Benchmark testing the Gradient Boosting Model
test_metrics = test_gradient_boosting(gb, scaler, X_test, y_test)
y_pred = gb.predict(X_test)

# Print Combined Testing Metrics
print(f"\n===============================================================================")
print(f"\nTesting Metrics (Double Attack Test Files):")
print(f"\n-----------------------------------------------------------------------")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
print(f"\n-----------------------------------------------------------------------")
print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
print(f"Informedness: {test_metrics['informedness']:.4f}")
print(f"Markedness: {test_metrics['markedness']:.4f}")





Testing Metrics (Double Attack Test Files):

-----------------------------------------------------------------------

Training Time: 38217.12 seconds
Testing Time: 66.08 seconds

-----------------------------------------------------------------------
Accuracy: 99.25%
Precision: 99.56%
Recall: 99.25%
F1-Score: 99.39%
Matthews Correlation Coefficient: 0.3771
Informedness: 0.9851
Markedness: 0.9912


In [15]:
# Load and preprocess testing data
df_test = pd.concat([pd.read_csv(file) for file in triple_files], ignore_index=True)
df_test = preprocess_data(df_test)
X_test, y_test = extract_features_labels(df_test)

# Benchmark testing the Gradient Boosting Model
test_metrics = test_gradient_boosting(gb, scaler, X_test, y_test)
y_pred = gb.predict(X_test)

# Print Combined Testing Metrics
print(f"\n===============================================================================")
print(f"\nTesting Metrics (Triple Attack Test Files):")
print(f"\n-----------------------------------------------------------------------")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
print(f"\n-----------------------------------------------------------------------")
print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
print(f"Informedness: {test_metrics['informedness']:.4f}")
print(f"Markedness: {test_metrics['markedness']:.4f}")





Testing Metrics (Triple Attack Test Files):

-----------------------------------------------------------------------

Training Time: 38217.12 seconds
Testing Time: 63.46 seconds

-----------------------------------------------------------------------
Accuracy: 99.09%
Precision: 99.12%
Recall: 99.09%
F1-Score: 99.10%
Matthews Correlation Coefficient: 0.6300
Informedness: 0.9818
Markedness: 0.9824


In [16]:
# Load and preprocess testing data
df_test = pd.concat([pd.read_csv(file) for file in fuzzing_files], ignore_index=True)
df_test = preprocess_data(df_test)
X_test, y_test = extract_features_labels(df_test)

# Benchmark testing the Gradient Boosting Model
test_metrics = test_gradient_boosting(gb, scaler, X_test, y_test)
y_pred = gb.predict(X_test)

# Print Combined Testing Metrics
print(f"\n===============================================================================")
print(f"\nTesting Metrics (Fuzzing Attack Test Files):")
print(f"\n-----------------------------------------------------------------------")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
print(f"\n-----------------------------------------------------------------------")
print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
print(f"Informedness: {test_metrics['informedness']:.4f}")
print(f"Markedness: {test_metrics['markedness']:.4f}")





Testing Metrics (Fuzzing Attack Test Files):

-----------------------------------------------------------------------

Training Time: 38217.12 seconds
Testing Time: 50.94 seconds

-----------------------------------------------------------------------
Accuracy: 99.37%
Precision: 99.48%
Recall: 99.37%
F1-Score: 99.40%
Matthews Correlation Coefficient: 0.8802
Informedness: 0.9874
Markedness: 0.9895


In [17]:
# Load and preprocess testing data
df_test = pd.concat([pd.read_csv(file) for file in interval_files], ignore_index=True)
df_test = preprocess_data(df_test)
X_test, y_test = extract_features_labels(df_test)

# Benchmark testing the Gradient Boosting Model
test_metrics = test_gradient_boosting(gb, scaler, X_test, y_test)
y_pred = gb.predict(X_test)

# Print Combined Testing Metrics
print(f"\n===============================================================================")
print(f"\nTesting Metrics (Interval Attack Test Files):")
print(f"\n-----------------------------------------------------------------------")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
print(f"\n-----------------------------------------------------------------------")
print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
print(f"Informedness: {test_metrics['informedness']:.4f}")
print(f"Markedness: {test_metrics['markedness']:.4f}")





Testing Metrics (Interval Attack Test Files):

-----------------------------------------------------------------------

Training Time: 38217.12 seconds
Testing Time: 58.07 seconds

-----------------------------------------------------------------------
Accuracy: 99.40%
Precision: 99.51%
Recall: 99.40%
F1-Score: 99.43%
Matthews Correlation Coefficient: 0.8546
Informedness: 0.9880
Markedness: 0.9901


In [18]:
# Load and preprocess testing data
df_test = pd.concat([pd.read_csv(file) for file in speed_files], ignore_index=True)
df_test = preprocess_data(df_test)
X_test, y_test = extract_features_labels(df_test)

# Benchmark testing the Gradient Boosting Model
test_metrics = test_gradient_boosting(gb, scaler, X_test, y_test)
y_pred = gb.predict(X_test)

# Print Combined Testing Metrics
print(f"\n===============================================================================")
print(f"\nTesting Metrics (Speed Attack Test Files):")
print(f"\n-----------------------------------------------------------------------")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
print(f"\n-----------------------------------------------------------------------")
print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
print(f"Informedness: {test_metrics['informedness']:.4f}")
print(f"Markedness: {test_metrics['markedness']:.4f}")





Testing Metrics (Speed Attack Test Files):

-----------------------------------------------------------------------

Training Time: 38217.12 seconds
Testing Time: 91.25 seconds

-----------------------------------------------------------------------
Accuracy: 99.46%
Precision: 99.81%
Recall: 99.46%
F1-Score: 99.60%
Matthews Correlation Coefficient: 0.4184
Informedness: 0.9891
Markedness: 0.9961


In [21]:
# Load and preprocess testing data
df_test = pd.concat([pd.read_csv(file) for file in systematic_files], ignore_index=True)
df_test = preprocess_data(df_test)
X_test, y_test = extract_features_labels(df_test)

# Benchmark testing the Gradient Boosting Model
test_metrics = test_gradient_boosting(gb, scaler, X_test, y_test)
y_pred = gb.predict(X_test)

# Print Combined Testing Metrics
print(f"\n===============================================================================")
print(f"\nTesting Metrics (Systematic Attack Test Files):")
print(f"\n-----------------------------------------------------------------------")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
print(f"\n-----------------------------------------------------------------------")
print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
print(f"Informedness: {test_metrics['informedness']:.4f}")
print(f"Markedness: {test_metrics['markedness']:.4f}")





Testing Metrics (Systematic Attack Test Files):

-----------------------------------------------------------------------

Training Time: 38217.12 seconds
Testing Time: 40.73 seconds

-----------------------------------------------------------------------
Accuracy: 99.47%
Precision: 99.67%
Recall: 99.47%
F1-Score: 99.53%
Matthews Correlation Coefficient: 0.7649
Informedness: 0.9894
Markedness: 0.9933


In [22]:
# Load and preprocess testing data
df_test = pd.concat([pd.read_csv(file) for file in rpmaccessory_files], ignore_index=True)
df_test = preprocess_data(df_test)
X_test, y_test = extract_features_labels(df_test)

# Benchmark testing the Gradient Boosting Model
test_metrics = test_gradient_boosting(gb, scaler, X_test, y_test)
y_pred = gb.predict(X_test)

# Print Combined Testing Metrics
print(f"\n===============================================================================")
print(f"\nTesting Metrics (RPM Accessory Attack Test Files):")
print(f"\n-----------------------------------------------------------------------")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
print(f"\n-----------------------------------------------------------------------")
print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
print(f"Informedness: {test_metrics['informedness']:.4f}")
print(f"Markedness: {test_metrics['markedness']:.4f}")





Testing Metrics (RPM Accessory Attack Test Files):

-----------------------------------------------------------------------

Training Time: 38217.12 seconds
Testing Time: 6.80 seconds

-----------------------------------------------------------------------
Accuracy: 94.14%
Precision: 99.60%
Recall: 94.14%
F1-Score: 96.78%
Matthews Correlation Coefficient: 0.0176
Informedness: 0.8828
Markedness: 0.9921


In [23]:
# Load and preprocess testing data
df_test = pd.concat([pd.read_csv(file) for file in speedaccessory_files], ignore_index=True)
df_test = preprocess_data(df_test)
X_test, y_test = extract_features_labels(df_test)

# Benchmark testing the Gradient Boosting Model
test_metrics = test_gradient_boosting(gb, scaler, X_test, y_test)
y_pred = gb.predict(X_test)

# Print Combined Testing Metrics
print(f"\n===============================================================================")
print(f"\nTesting Metrics (Speed Accessory Attack Test Files):")
print(f"\n-----------------------------------------------------------------------")
print(f"\nTraining Time: {training_time:.2f} seconds")
print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
print(f"\n-----------------------------------------------------------------------")
print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
print(f"Informedness: {test_metrics['informedness']:.4f}")
print(f"Markedness: {test_metrics['markedness']:.4f}")





Testing Metrics (Speed Accessory Attack Test Files):

-----------------------------------------------------------------------

Training Time: 38217.12 seconds
Testing Time: 8.01 seconds

-----------------------------------------------------------------------
Accuracy: 95.21%
Precision: 99.67%
Recall: 95.21%
F1-Score: 97.23%
Matthews Correlation Coefficient: 0.2573
Informedness: 0.9041
Markedness: 0.9933
