In [19]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping
from sklearn.metrics import mean_squared_error
from sklearn.metrics import classification_report, accuracy_score, recall_score, f1_score, precision_score
from joblib import load
from tensorflow.keras.models import load_model
from sklearn.manifold import TSNE
from matplotlib.colors import ListedColormap

data_path = "fixeddelta020take2.log"

def hex_to_bin(hex_num):
    
    binary_value = bin(int(str(hex_num), 16))[2:]
    
    return binary_value

def int_to_bin(int_num):
    
    binary_value = bin(int_num)[2:]
    
    return binary_value

hex_to_dec = lambda x: int(x, 16)
dec_to_hex = lambda x : hex(int(x))[2:]

In [2]:
columns = ['Timestamp', 'ID', 'DLC', 'Payload']

# Read the file into a list of lines
with open(data_path, 'r') as file:
    lines = file.readlines()

# Parse each line and extract the relevant information
data = []
for line in lines:
    parts = line.strip().split()
    timestamp = float(parts[0][1:-1])  # Remove parentheses
    bus_name = parts[1]
    ID = parts[2]
    DLC = int(parts[3][1:-1])  # Remove brackets and convert to integer
    payload = ''.join(parts[4:])  # Concatenate payload
    data.append([timestamp, ID, DLC, payload])

In [3]:
# Create a DataFrame from the parsed data
df = pd.DataFrame(data, columns=columns)

df['ID'] = df['ID'].apply(hex_to_dec)
df['Payload'] = df['Payload'].apply(hex_to_dec)
df = df.assign(IAT=df['Timestamp'].diff().fillna(0))

df.drop(columns=['Timestamp'], inplace=True)

In [4]:
X_train = df

# scaler = StandardScaler()

scaler = load('scaler.joblib')
X_train = scaler.transform(X_train)

In [5]:
model = load_model('ae.h5')

2024-03-12 17:16:39.997690: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2024-03-12 17:16:40.000141: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2024-03-12 17:16:40.083468: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:02:00.0 name: Tesla K40m computeCapability: 3.5
coreClock: 0.745GHz coreCount: 15 deviceMemorySize: 11.17GiB deviceMemoryBandwidth: 268.58GiB/s
2024-03-12 17:16:40.084304: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 1 with properties: 
pciBusID: 0000:84:00.0 name: Tesla K40m computeCapability: 3.5
coreClock: 0.745GHz coreCount: 15 deviceMemorySize: 11.17GiB deviceMemoryBandwidth: 268.58GiB/s
2024-03-12 17:16:40.084350: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2024-03-12 17:16:40.088

In [7]:
training_preds = model.predict(X_train)

training_error = mean_squared_error(X_train, training_preds)

2024-03-12 17:18:46.365883: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2024-03-12 17:18:46.366545: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2496840000 Hz
2024-03-12 17:18:46.461404: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10


In [8]:
print(f"Training data error: {training_error}")

Training data error: 0.09380349194758085


In [9]:
def squared_difference_error(X, X_recon):
    # Calculate squared difference error between each sample of X and X_recon
    squared_diff = np.square(X - X_recon)

    return squared_diff

training_losses = squared_difference_error(X_train, training_preds)

In [10]:
car_hacking_df = pd.read_csv('../Smart Attack Algorithm/data/Car Hacking Dataset/preprocessed_car_hacking.csv')

In [11]:
car_hacking_df.head()

Unnamed: 0,Timestamp,ID,DLC,Payload,IAT,label
0,1478198000.0,0316,8,052168092121006f,0.0,0
1,1478198000.0,018f,8,fe5b0000003c0000,0.000209,0
2,1478198000.0,0260,8,19212230088e6d3a,0.000228,0
3,1478198000.0,02a0,8,64009a1d9702bd00,0.000232,0
4,1478198000.0,0329,8,40bb7f1411200014,0.000237,0


In [12]:
car_hacking_df.drop(columns = ['Timestamp'], inplace = True)

car_hacking_df['ID'] = car_hacking_df['ID'].apply(hex_to_dec)
car_hacking_df['Payload'] = car_hacking_df['Payload'].apply(hex_to_dec)

In [13]:
car_hacking_df.columns

Index(['ID', 'DLC', 'Payload', 'IAT', 'label'], dtype='object')

In [14]:
X = car_hacking_df.drop(columns = ['label'])
y = car_hacking_df['label']

In [15]:
X = scaler.transform(X)

In [16]:
X_val, X_test, y_val, y_test = train_test_split(X, y, test_size = 0.5)

In [17]:
def assign_labels(X, X_recon, threshold):
    # Calculate squared error between each row of X and X_recon
    squared_errors = np.mean(np.square(X - X_recon), axis=1)

    # Assign labels based on threshold
    labels = [0 if error < threshold else 1 for error in squared_errors]

    return labels

In [20]:
def vary_thresholds(training_errors = training_losses, start = 0.05, end = 1, step = 0.05):

    quantiles = np.arange(start, end, step)

    thresholds = [np.quantile(training_errors, q) for q in quantiles]

    X_val_recon = model.predict(X_val)

    for threshold in thresholds:

        print("=====================================")
        print(f"Threshold: {threshold}")
        
        y_val_pred = assign_labels(X_val, X_val_recon, threshold)

        print(f"Accuracy: {accuracy_score(y_val, y_val_pred)}")

        print(f"F1 Score: {f1_score(y_val, y_val_pred)}")

        print(f"Recall: {recall_score(y_val, y_val_pred)}")

        print(f"Precision: {precision_score(y_val, y_val_pred)}")

        


vary_thresholds()

Threshold: 1.544788817338674e-69
Accuracy: 0.16037667393207974
F1 Score: 0.27642174741176684
Recall: 1.0
Precision: 0.16037667393207974
Threshold: 1.8175739750314305e-69
Accuracy: 0.16037667393207974
F1 Score: 0.27642174741176684
Recall: 1.0
Precision: 0.16037667393207974
Threshold: 2.028098703700278e-69
Accuracy: 0.16037667393207974
F1 Score: 0.27642174741176684
Recall: 1.0
Precision: 0.16037667393207974
Threshold: 3.236236417435508e-69
Accuracy: 0.16037667393207974
F1 Score: 0.27642174741176684
Recall: 1.0
Precision: 0.16037667393207974
Threshold: 2.710463345013221e-14
Accuracy: 0.16037667393207974
F1 Score: 0.27642174741176684
Recall: 1.0
Precision: 0.16037667393207974
Threshold: 5.473267894110023e-06
Accuracy: 0.16037667393207974
F1 Score: 0.27642174741176684
Recall: 1.0
Precision: 0.16037667393207974
Threshold: 4.724493376003556e-05
Accuracy: 0.16037667393207974
F1 Score: 0.27642174741176684
Recall: 1.0
Precision: 0.16037667393207974
Threshold: 0.00016458720353165643
Accuracy: 0.1

In [22]:
X_test_pred = model.predict(X_test)

In [23]:
y_test_pred = assign_labels(X_test, X_test_pred, 0.09380)

In [24]:
print(f"Accuracy: {accuracy_score(y_test, y_test_pred)}")

Accuracy: 0.16016762635537615


In [25]:
print("Classification Report: ", classification_report(y_test, y_test_pred, zero_division=0))

Classification Report:                precision    recall  f1-score   support

           0       0.00      0.00      0.00   1539317
           1       0.16      1.00      0.28    293569

    accuracy                           0.16   1832886
   macro avg       0.08      0.50      0.14   1832886
weighted avg       0.03      0.16      0.04   1832886

