In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.metrics import mean_squared_error
from sklearn.metrics import classification_report, accuracy_score, recall_score, f1_score, precision_score
from joblib import load
from tensorflow.keras.models import load_model
from sklearn.manifold import TSNE
from matplotlib.colors import ListedColormap
import tensorflow as tf

data_path = '../Smart Attack Algorithm/data/Car Hacking Dataset/benign_data.csv'

def hex_to_bin(hex_num):
    
    binary_value = bin(int(str(hex_num), 16))[2:]
    
    return binary_value

def int_to_bin(int_num):
    
    binary_value = bin(int_num)[2:]
    
    return binary_value

hex_to_dec = lambda x: int(x, 16)
dec_to_hex = lambda x : hex(int(x))[2:]

2024-03-12 20:56:09.881454: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
def read_data(data_path):
    
    columns = ['Timestamp','ID', 'DLC', 'Payload', 'label']
    
    data = pd.read_csv(data_path)
    
    ##Replacing all NaNs with '00' 
    data = data.replace(np.NaN, '00')

    data['ID'] = data['ID'].apply(hex_to_dec)
    
    data['Payload'] = data['Payload'].str.replace(' ', '')
    data['Payload'] = data['Payload'].apply(hex_to_dec)
    
    data = data.assign(IAT=data['Timestamp'].diff().fillna(0))
    data = data.drop(columns = ['Timestamp'], axis = 1)
    
    return data

In [3]:
normal_data = read_data(data_path)
normal_data.drop(columns = ['label'], inplace = True)
normal_data.head()

Unnamed: 0,ID,DLC,Payload,IAT
0,848,8,371692544708313250,0.0
1,704,8,1441151880758558720,0.000221
2,1072,8,0,0.000554
3,1201,8,0,0.000238
4,497,8,0,0.000248


In [4]:
# X_train, X_test = train_test_split(normal_data, test_size=0.2, random_state=42)

# scaler = StandardScaler()

scaler = load('ch_scaler.joblib')
X_train = scaler.fit_transform(normal_data)
# X_test = scaler.transform(X_test)

In [5]:
model = load_model('ae_ch.h5')
X_train_pred = model.predict(X_train)

print("Training Loss: ", mean_squared_error(X_train, X_train_pred))

2024-03-12 20:56:43.912391: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2024-03-12 20:56:43.991674: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2024-03-12 20:56:44.254297: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:02:00.0 name: Tesla K40m computeCapability: 3.5
coreClock: 0.745GHz coreCount: 15 deviceMemorySize: 11.17GiB deviceMemoryBandwidth: 268.58GiB/s
2024-03-12 20:56:44.255287: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 1 with properties: 
pciBusID: 0000:84:00.0 name: Tesla K40m computeCapability: 3.5
coreClock: 0.745GHz coreCount: 15 deviceMemorySize: 11.17GiB deviceMemoryBandwidth: 268.58GiB/s
2024-03-12 20:56:44.255342: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2024-03-12 20:56:48.367

Training Loss:  1.0000033831603325


In [6]:
def squared_difference_error(X, X_recon):
    # Calculate squared difference error between each sample of X and X_recon
    squared_diff = np.square(X - X_recon)

    return squared_diff

training_losses = squared_difference_error(X_train, X_train_pred)

In [7]:
# strat = MirroredStrategy()

# EPOCHS = 1000
# BATCH_SIZE = 32 * strat.num_replicas_in_sync
# LOSS = 'mse'

# # Define early stopping callback

# reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1,
#                               patience=20)

# early_stopper = EarlyStopping(monitor='val_loss', patience=60, restore_best_weights=True)

# input_dim = X_train.shape[1]


# with strat.scope():
#     model = Sequential()

#     ##Encoder
#     model.add(Dense(input_dim, input_shape=(input_dim, ), activation='relu'))
#     model.add(Dense(3, activation='relu'))
#     model.add(Dense(2, activation='relu'))

#     ##Bottleneck
#     model.add(Dense(1, activation='relu'))

#     ##Decoder
#     model.add(Dense(2, activation='relu'))
#     model.add(Dense(3, activation='relu'))
#     model.add(Dense(input_dim))
    
 

#     model.compile(optimizer='adam', loss=LOSS)

# history = model.fit(X_train, X_train, epochs=EPOCHS, batch_size=BATCH_SIZE, 
#                     validation_data=(X_test, X_test), callbacks=[reduce_lr, early_stopper])

In [8]:
def assign_labels(X, X_recon, threshold):
    # Calculate squared error between each row of X and X_recon
    squared_errors = np.mean(np.square(X - X_recon), axis=1)

    # Assign labels based on threshold
    labels = [0 if error < threshold else 1 for error in squared_errors]

    return labels

In [9]:
car_hacking_df = pd.read_csv('../Smart Attack Algorithm/data/Car Hacking Dataset/preprocessed_car_hacking.csv')

In [10]:
car_hacking_df.head()

Unnamed: 0,Timestamp,ID,DLC,Payload,IAT,label
0,1478198000.0,0316,8,052168092121006f,0.0,0
1,1478198000.0,018f,8,fe5b0000003c0000,0.000209,0
2,1478198000.0,0260,8,19212230088e6d3a,0.000228,0
3,1478198000.0,02a0,8,64009a1d9702bd00,0.000232,0
4,1478198000.0,0329,8,40bb7f1411200014,0.000237,0


In [11]:
car_hacking_df.drop(columns = ['Timestamp'], inplace = True)

car_hacking_df['ID'] = car_hacking_df['ID'].apply(hex_to_dec)
car_hacking_df['Payload'] = car_hacking_df['Payload'].apply(hex_to_dec)

In [12]:
car_hacking_df.columns

Index(['ID', 'DLC', 'Payload', 'IAT', 'label'], dtype='object')

In [13]:
X = car_hacking_df.drop(columns = ['label'])
y = car_hacking_df['label']

In [14]:
X_val, X_test, y_val, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

In [15]:
def vary_thresholds(training_errors = training_losses, start = 0.5, end = 1, step = 0.05):

    quantiles = np.arange(start, end, step)

    thresholds = [np.quantile(training_errors, q) for q in quantiles]

    X_val_recon = model.predict(X_val)

    for threshold in thresholds:

        print("=====================================")
        print(f"Threshold: {threshold}")
        
        y_val_pred = assign_labels(X_val, X_val_recon, threshold)

        print(f"Accuracy: {accuracy_score(y_val, y_val_pred)}")

        print(f"F1 Score: {f1_score(y_val, y_val_pred)}")

        print(f"Recall: {recall_score(y_val, y_val_pred)}")

        print(f"Precision: {precision_score(y_val, y_val_pred)}")

        


vary_thresholds()

Threshold: 0.15450967958937473
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 0.17016189676719515
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 0.2583842678424978
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 0.431867581066057
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 0.49040146269082996
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 0.4904016875993273
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 1.0843909334026547
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 1.1768218178432377
Accuracy: 0.16046124006688908
F1 Score: 0.276547349

In [16]:
vary_thresholds(start = 0.05, end = 0.5, step = 0.05)

Threshold: 0.0072129878875475464
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 0.057863305817831966
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 0.05786330581783196
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 0.05786330581783197
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 0.057863305817831966
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 0.057863305817831966
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 0.09459040187117893
Accuracy: 0.16046124006688908
F1 Score: 0.276547349496378
Recall: 1.0
Precision: 0.16046124006688908
Threshold: 0.14036451321365034
Accuracy: 0.16046124006688908
F1 Score: 

In [17]:
X_test_pred = model.predict(X_test)

In [18]:
y_test_pred = assign_labels(X_test, X_test_pred, 1.0000050705461037)

In [19]:
print(f"Accuracy: {accuracy_score(y_test, y_test_pred)}")

Accuracy: 0.16008306026670507


In [20]:
print("Classification Report: ", classification_report(y_test, y_test_pred, zero_division=0))

Classification Report:                precision    recall  f1-score   support

           0       0.00      0.00      0.00   1539472
           1       0.16      1.00      0.28    293414

    accuracy                           0.16   1832886
   macro avg       0.08      0.50      0.14   1832886
weighted avg       0.03      0.16      0.04   1832886

