In [1]:
pip install tensorflow


Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import os
import time
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, matthews_corrcoef, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.utils import to_categorical

In [3]:
# Function to Load Data
def load_data_from_directory(directory_path):
    data_frames = []
    for file_name in os.listdir(directory_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(directory_path, file_name)
            df = pd.read_csv(file_path)
            data_frames.append(df)
    return pd.concat(data_frames, ignore_index=True)

# Function to Preprocess Data
def hex_to_int(x):
    try:
        return int(str(x), 16)
    except ValueError:
        return 0  # Handle non-hexadecimal values
        
def preprocess_data(df):
    df['timestamp'] = pd.to_datetime(df['timestamp']).astype(np.int64) // 10**9
    df['arbitration_id'] = df['arbitration_id'].apply(hex_to_int)
    df['data_field'] = df['data_field'].apply(hex_to_int)
    return df

# Function to Extract Features and Labels
def extract_features_labels(df):
    X = df.drop(columns='attack')
    y = df['attack']
    return X, y



In [4]:
# Function to Build and Train MLP Model
def train_mlp(X_train, y_train):
    # Standardize the Features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)

    # Convert labels to categorical if necessary
    y_train_categorical = to_categorical(y_train)

    # Define the MLP model
    model = Sequential()

    # Use the Input layer to specify the shape of the input
    model.add(Input(shape=(X_train_scaled.shape[1],)))

    # Add hidden layers
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))

    # Output layer
    model.add(Dense(y_train_categorical.shape[1], activation='softmax'))

    # Compile the Model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    # Record training start time
    start_time = time.time()
    
    # Train the Model
    model.fit(X_train_scaled, y_train_categorical, epochs=10, batch_size=64, verbose=1)

    # Calculate training time
    training_time = time.time() - start_time
    
    return model, scaler, training_time

# Function to Test MLP Model
def test_mlp(model, scaler, X_test, y_test):
    # Standardize the Features
    X_test_scaled = scaler.transform(X_test)

    # Convert labels to categorical if necessary
    y_test_categorical = to_categorical(y_test, num_classes=model.output_shape[1])

    # Record testing start time
    start_time = time.time()
    
    # Predict on Test Data
    y_test_pred_probs = model.predict(X_test_scaled)

    # Calculate testing time
    testing_time = time.time() - start_time
    
    y_test_pred = np.argmax(y_test_pred_probs, axis=1)
    

    
    # Evaluate Test Performance
    conf_matrix_test = confusion_matrix(y_test, y_test_pred)
    mcc_test = matthews_corrcoef(y_test, y_test_pred)
    accuracy_test = accuracy_score(y_test, y_test_pred)
    precision_test = precision_score(y_test, y_test_pred, average='weighted', zero_division=0)
    recall_test = recall_score(y_test, y_test_pred, average='weighted', zero_division=0)
    f1_test = f1_score(y_test, y_test_pred, average='weighted', zero_division=0)
    informedness_test = recall_test - (1 - recall_test)
    markedness_test = precision_test - (1 - precision_test)

    return {
        'conf_matrix': conf_matrix_test,
        'mcc': mcc_test,
        'accuracy': accuracy_test,
        'precision': precision_test,
        'recall': recall_test,
        'f1_score': f1_test,
        'informedness': informedness_test,
        'markedness': markedness_test,
        'testing_time': testing_time
    }



In [5]:
# Directories
train_directories = [
    './Documents/Research/can-train-and-test/set_01/train_01/'
]

test_directories = [
    './Documents/Research/can-train-and-test/set_01/test_01_known_vehicle_known_attack/',
    './Documents/Research/can-train-and-test/set_01/test_02_unknown_vehicle_known_attack/',
    './Documents/Research/can-train-and-test/set_01/test_03_known_vehicle_unknown_attack/',
    './Documents/Research/can-train-and-test/set_01/test_04_unknown_vehicle_unknown_attack/'
]

# Iterate through each training directory
for i, train_dir in enumerate(train_directories):
    # print(f"\nTraining on data from: {train_dir}")
    
    # Load and preprocess training data
    df_train = load_data_from_directory(train_dir)
    df_train = preprocess_data(df_train)
    X_train, y_train = extract_features_labels(df_train)

    # Train the MLP Model
    mlp_model, scaler, training_time = train_mlp(X_train, y_train)

    # Corresponding test directories for each training set
    corresponding_tests = test_directories[i*4:(i+1)*4]
    
    # Test the model on each corresponding test set
    for test_dir in corresponding_tests:
        # print(f"\nTesting on data from: {test_dir}")
        
        # Load and preprocess testing data
        df_test = load_data_from_directory(test_dir)
        df_test = preprocess_data(df_test)
        X_test, y_test = extract_features_labels(df_test)

        # Test the MLP Model
        test_metrics = test_mlp(mlp_model, scaler, X_test, y_test)

        # Print Testing Metrics
        print(f"\n===============================================================================")
        print(f"\nTraining Time: {training_time:.2f} seconds")
        print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
        print(f"\n-----------------------------------------------------------------------")
        print(f"\nTesting Metrics (Train: {train_dir} | Test: {test_dir}):")
        print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
        print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
        print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
        print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
        print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
        print(f"Informedness: {test_metrics['informedness']:.4f}")
        print(f"Markedness: {test_metrics['markedness']:.4f}")


Epoch 1/10
[1m166456/166456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m348s[0m 2ms/step - accuracy: 0.9966 - loss: 0.0132
Epoch 2/10
[1m166456/166456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m368s[0m 2ms/step - accuracy: 0.9969 - loss: 0.0087
Epoch 3/10
[1m166456/166456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m346s[0m 2ms/step - accuracy: 0.9969 - loss: 0.0084
Epoch 4/10
[1m166456/166456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m324s[0m 2ms/step - accuracy: 0.9969 - loss: 0.0082
Epoch 5/10
[1m166456/166456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m297s[0m 2ms/step - accuracy: 0.9970 - loss: 0.0080
Epoch 6/10
[1m166456/166456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m292s[0m 2ms/step - accuracy: 0.9970 - loss: 0.0079
Epoch 7/10
[1m166456/166456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m324s[0m 2ms/step - accuracy: 0.9970 - loss: 0.0080
Epoch 8/10
[1m166456/166456[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m404s[0m 2ms/step - accuracy

In [6]:
# Directories
train_directories = [
    './Documents/Research/can-train-and-test/set_02/train_01/'
]

test_directories = [
    './Documents/Research/can-train-and-test/set_02/test_01_known_vehicle_known_attack/',
    './Documents/Research/can-train-and-test/set_02/test_02_unknown_vehicle_known_attack/',
    './Documents/Research/can-train-and-test/set_02/test_03_known_vehicle_unknown_attack/',
    './Documents/Research/can-train-and-test/set_02/test_04_unknown_vehicle_unknown_attack/'
]

# Iterate through each training directory
for i, train_dir in enumerate(train_directories):
    
    # print(f"\nTraining on data from: {train_dir}")
    
    # Load and preprocess training data
    df_train = load_data_from_directory(train_dir)
    df_train = preprocess_data(df_train)
    X_train, y_train = extract_features_labels(df_train)

    # Train the MLP Model
    mlp_model, scaler, training_time = train_mlp(X_train, y_train)

    # Corresponding test directories for each training set
    corresponding_tests = test_directories[i*4:(i+1)*4]
    
    # Test the model on each corresponding test set
    for test_dir in corresponding_tests:
        # print(f"\nTesting on data from: {test_dir}")
        
        # Load and preprocess testing data
        df_test = load_data_from_directory(test_dir)
        df_test = preprocess_data(df_test)
        X_test, y_test = extract_features_labels(df_test)

        # Test the MLP Model
        test_metrics = test_mlp(mlp_model, scaler, X_test, y_test)

        # Print Testing Metrics
        print(f"\n===============================================================================")
        print(f"\nTraining Time: {training_time:.2f} seconds")
        print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
        print(f"\n-----------------------------------------------------------------------")
        print(f"\nTesting Metrics (Train: {train_dir} | Test: {test_dir}):")
        print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
        print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
        print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
        print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
        print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
        print(f"Informedness: {test_metrics['informedness']:.4f}")
        print(f"Markedness: {test_metrics['markedness']:.4f}")

Epoch 1/10
[1m270951/270951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m497s[0m 2ms/step - accuracy: 0.9907 - loss: 0.0402
Epoch 2/10
[1m270951/270951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m487s[0m 2ms/step - accuracy: 0.9935 - loss: 0.0290
Epoch 3/10
[1m270951/270951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m475s[0m 2ms/step - accuracy: 0.9939 - loss: 0.0265
Epoch 4/10
[1m270951/270951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m479s[0m 2ms/step - accuracy: 0.9943 - loss: 0.0242
Epoch 5/10
[1m270951/270951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m473s[0m 2ms/step - accuracy: 0.9945 - loss: 0.0232
Epoch 6/10
[1m270951/270951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m496s[0m 2ms/step - accuracy: 0.9947 - loss: 0.0224
Epoch 7/10
[1m270951/270951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m459s[0m 2ms/step - accuracy: 0.9947 - loss: 0.0219
Epoch 8/10
[1m270951/270951[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m462s[0m 2ms/step - accuracy

In [7]:
# Directories
train_directories = [
    './Documents/Research/can-train-and-test/set_03/train_01/'
]

test_directories = [
    './Documents/Research/can-train-and-test/set_03/test_01_known_vehicle_known_attack/',
    './Documents/Research/can-train-and-test/set_03/test_02_unknown_vehicle_known_attack/',
    './Documents/Research/can-train-and-test/set_03/test_03_known_vehicle_unknown_attack/',
    './Documents/Research/can-train-and-test/set_03/test_04_unknown_vehicle_unknown_attack/'
]

# Iterate through each training directory
for i, train_dir in enumerate(train_directories):
    
    # print(f"\nTraining on data from: {train_dir}")
    
    # Load and preprocess training data
    df_train = load_data_from_directory(train_dir)
    df_train = preprocess_data(df_train)
    X_train, y_train = extract_features_labels(df_train)

    # Train the MLP Model
    mlp_model, scaler, training_time = train_mlp(X_train, y_train)

    # Corresponding test directories for each training set
    corresponding_tests = test_directories[i*4:(i+1)*4]
    
    # Test the model on each corresponding test set
    for test_dir in corresponding_tests:
        # print(f"\nTesting on data from: {test_dir}")
        
        # Load and preprocess testing data
        df_test = load_data_from_directory(test_dir)
        df_test = preprocess_data(df_test)
        X_test, y_test = extract_features_labels(df_test)

        # Test the MLP Model
        test_metrics = test_mlp(mlp_model, scaler, X_test, y_test)

        # Print Testing Metrics
        print(f"\n===============================================================================")
        print(f"\nTraining Time: {training_time:.2f} seconds")
        print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
        print(f"\n-----------------------------------------------------------------------")
        print(f"\nTesting Metrics (Train: {train_dir} | Test: {test_dir}):")
        print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
        print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
        print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
        print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
        print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
        print(f"Informedness: {test_metrics['informedness']:.4f}")
        print(f"Markedness: {test_metrics['markedness']:.4f}")

Epoch 1/10
[1m187903/187903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m309s[0m 2ms/step - accuracy: 0.9955 - loss: 0.0154
Epoch 2/10
[1m187903/187903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m374s[0m 2ms/step - accuracy: 0.9986 - loss: 0.0069
Epoch 3/10
[1m187903/187903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m363s[0m 2ms/step - accuracy: 0.9986 - loss: 0.0067
Epoch 4/10
[1m187903/187903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m365s[0m 2ms/step - accuracy: 0.9989 - loss: 0.0058
Epoch 5/10
[1m187903/187903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m372s[0m 2ms/step - accuracy: 0.9989 - loss: 0.0054
Epoch 6/10
[1m187903/187903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m344s[0m 2ms/step - accuracy: 0.9991 - loss: 0.0049
Epoch 7/10
[1m187903/187903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m351s[0m 2ms/step - accuracy: 0.9991 - loss: 0.0048
Epoch 8/10
[1m187903/187903[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m351s[0m 2ms/step - accuracy

In [5]:
# Directories
train_directories = [
    './Documents/Research/can-train-and-test/set_04/train_01/'
]

test_directories = [
    './Documents/Research/can-train-and-test/set_04/test_01_known_vehicle_known_attack/',
    './Documents/Research/can-train-and-test/set_04/test_02_unknown_vehicle_known_attack/',
    './Documents/Research/can-train-and-test/set_04/test_03_known_vehicle_unknown_attack/',
    './Documents/Research/can-train-and-test/set_04/test_04_unknown_vehicle_unknown_attack/'
]

# Iterate through each training directory
for i, train_dir in enumerate(train_directories):
    # print(f"\nTraining on data from: {train_dir}")
    
    # Load and preprocess training data
    df_train = load_data_from_directory(train_dir)
    df_train = preprocess_data(df_train)
    X_train, y_train = extract_features_labels(df_train)

    # Train the MLP Model
    mlp_model, scaler, training_time = train_mlp(X_train, y_train)

    # Corresponding test directories for each training set
    corresponding_tests = test_directories[i*4:(i+1)*4]
    
    # Test the model on each corresponding test set
    for test_dir in corresponding_tests:
        # print(f"\nTesting on data from: {test_dir}")
        
        # Load and preprocess testing data
        df_test = load_data_from_directory(test_dir)
        df_test = preprocess_data(df_test)
        X_test, y_test = extract_features_labels(df_test)

        # Test the MLP Model
        test_metrics = test_mlp(mlp_model, scaler, X_test, y_test)

        # Print Testing Metrics
        print(f"\n===============================================================================")
        print(f"\nTraining Time: {training_time:.2f} seconds")
        print(f"Testing Time: {test_metrics['testing_time']:.2f} seconds")
        print(f"\n-----------------------------------------------------------------------")
        print(f"\nTesting Metrics (Train: {train_dir} | Test: {test_dir}):")
        print(f"Accuracy: {test_metrics['accuracy'] * 100:.2f}%")
        print(f"Precision: {test_metrics['precision'] * 100:.2f}%")
        print(f"Recall: {test_metrics['recall'] * 100:.2f}%")
        print(f"F1-Score: {test_metrics['f1_score'] * 100:.2f}%")
        print(f"Matthews Correlation Coefficient: {test_metrics['mcc']:.4f}")
        print(f"Informedness: {test_metrics['informedness']:.4f}")
        print(f"Markedness: {test_metrics['markedness']:.4f}")

Epoch 1/10
[1m148326/148326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 2ms/step - accuracy: 0.9973 - loss: 0.0165
Epoch 2/10
[1m148326/148326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m365s[0m 2ms/step - accuracy: 0.9976 - loss: 0.0116
Epoch 3/10
[1m148326/148326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m344s[0m 2ms/step - accuracy: 0.9977 - loss: 0.0111
Epoch 4/10
[1m148326/148326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m323s[0m 2ms/step - accuracy: 0.9978 - loss: 0.0110
Epoch 5/10
[1m148326/148326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m340s[0m 2ms/step - accuracy: 0.9978 - loss: 0.0107
Epoch 6/10
[1m148326/148326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m363s[0m 2ms/step - accuracy: 0.9979 - loss: 0.0105
Epoch 7/10
[1m148326/148326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m325s[0m 2ms/step - accuracy: 0.9979 - loss: 0.0101
Epoch 8/10
[1m148326/148326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m327s[0m 2ms/step - accuracy