In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.losses import SparseCategoricalCrossentropy, BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier, export_text, export_graphviz
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.utils import class_weight
import joblib

2024-10-15 10:25:02.767303: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
data_folder = 'Car-Hacking/'
print(os.listdir(data_folder))
smart_attack_path = 'attack_10_10.csv'

['Fuzzy_dataset.csv', 'normal_run_data.7z', 'normal_run_data', 'DoS_dataset.csv', 'RPM_dataset.csv', 'gear_dataset.csv']


In [4]:
def hex_to_bin(hex_num):
    
    binary_value = bin(int(str(hex_num), 16))[2:]
    
    return binary_value

def int_to_bin(int_num):
    
    binary_value = bin(int_num)[2:]
    
    return binary_value

def pad(value, length):
    
    curr_length = len(str(value))
    
    zeros = '0' * (length - curr_length)
    
    return zeros + value

hex_to_dec = lambda x: int(x, 16)

def transform_data(data):

    data['ID'] = data['ID'].apply(hex_to_dec)
    data['Payload'] = data['Payload'].apply(hex_to_dec)

    return data

In [5]:
smart_attack = pd.read_csv(smart_attack_path)

In [6]:
smart_attack.drop(columns = ['Timestamp'], inplace = True)

In [8]:
X_smart = smart_attack.drop(['label'], axis = 1).values
y_smart = smart_attack['label']
y_smart = y_smart.replace(2,1)

In [9]:
X_train_smart, X_test_smart, y_train_smart, y_test_smart = train_test_split(X_smart, y_smart, test_size=0.3, random_state = 42, shuffle = False)

In [10]:
scaler = StandardScaler()

X_train_smart = scaler.fit_transform(X_train_smart)
X_test_smart = scaler.transform(X_test_smart)

In [11]:
oversample = SMOTE()
X_train_smote, y_train_smote = oversample.fit_resample(X_train_smart, y_train_smart) 

In [12]:
##MLP

print("-----MLP-------")

mlp = Sequential()
mlp.add(Input(shape = (4)))
mlp.add(Dense(128, activation = 'relu'))
mlp.add(Dense(64, activation = 'relu'))
mlp.add(Dense(1, activation = 'sigmoid'))

mlp.compile(optimizer='adam',
                loss=BinaryCrossentropy(from_logits=False),
                metrics=['accuracy'])

es = EarlyStopping(monitor = 'val_loss', patience = 5, restore_best_weights = True)

mlp_hist = mlp.fit(X_train_smote, y_train_smote, epochs=100, callbacks = [es], validation_split=0.2, batch_size = 32)

-----MLP-------
Epoch 1/100


2024-10-15 10:29:56.509524: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-10-15 10:29:56.514364: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100


In [13]:
##MLP
print("-----MLP-------")
threshold = 0.5
mlp_preds = mlp.predict(X_test_smart, batch_size = 32)
mlp_preds = (mlp_preds >= threshold).astype(int)

print("--------Smart--------")
print("ACCURACY: ", accuracy_score(y_test_smart, mlp_preds))
print("CLASSIFICATION REPORT:\n", classification_report(y_test_smart, mlp_preds))


-----MLP-------
--------Smart--------
ACCURACY:  0.8089920658242727
CLASSIFICATION REPORT:
               precision    recall  f1-score   support

         0.0       0.87      0.84      0.86     29873
         1.0       0.69      0.74      0.71     14366

    accuracy                           0.81     44239
   macro avg       0.78      0.79      0.79     44239
weighted avg       0.81      0.81      0.81     44239



In [14]:
smart_attack = pd.read_csv(smart_attack_path)

smart_attack.drop(columns = ['Timestamp'], inplace = True)
X_smart = smart_attack.drop(['label'], axis = 1).values
y_smart = smart_attack['label']

X_train_smart, X_test_smart, y_train_smart, y_test_smart = train_test_split(X_smart, y_smart, test_size=0.3, random_state = 42)

In [15]:
test_scaled = scaler.transform(X_smart)
preds = mlp.predict(test_scaled)
preds = (preds >= threshold).astype(int)



In [22]:
X_test_results = pd.DataFrame(data = X_smart, columns = ['ID', 'DLC', 'Payload', "IAT"])

In [23]:
X_test_results['Packet Type'] = np.array(y_smart)
X_test_results['Prediction'] = np.array(preds)

In [29]:
X_test_results['Correctly Predicted'] = np.where(
    ((X_test_results['Packet Type'] == 0) & (X_test_results['Prediction'] == 0)) | 
    ((X_test_results['Packet Type'].isin([1, 2])) & (X_test_results['Prediction'] == 1)),
    1,  # Set to 1 when the conditions are met
    0   # Set to 0 in any other case
)

In [27]:
# X_test_results.to_csv('results.csv', index = False)

In [34]:
X_test_results['IAT'] = X_test_results['IAT'] * 1e6

In [36]:
X_test_results.head()

Unnamed: 0,ID,DLC,Payload,IAT,Packet Type,Prediction,Correctly Predicted
0,848.0,8.0,3.716925e+17,0.0,0.0,0,1
1,704.0,8.0,1.441152e+18,221.014023,0.0,1,0
2,1072.0,8.0,0.0,553.846359,0.0,1,0
3,1201.0,8.0,0.0,237.941742,0.0,0,1
4,497.0,8.0,0.0,248.193741,0.0,0,1


In [50]:
# Group by ID
grouped_by_id = X_test_results.groupby('ID')

for id_value, group in grouped_by_id:
    # Overall statistics for each ID
    overall_mean_iat = group['IAT'].mean()
    overall_median_iat = group['IAT'].median()

    print(f"ID: {id_value}")
    print(f"Overall Mean IAT: {overall_mean_iat}")
    print(f"Overall Median IAT: {overall_median_iat}")

    print()

    group = group[group['Packet Type'].isin([1, 2])]

    # Statistics for 'Correctly Predicted' = 1
    correctly_predicted_1 = group[group['Correctly Predicted'] == 1]
    mean_iat_correct_1 = correctly_predicted_1['IAT'].mean()
    median_iat_correct_1 = correctly_predicted_1['IAT'].median()

    # Statistics for 'Correctly Predicted' = 0
    correctly_predicted_0 = group[group['Correctly Predicted'] == 0]
    mean_iat_correct_0 = correctly_predicted_0['IAT'].mean()
    median_iat_correct_0 = correctly_predicted_0['IAT'].median()

    print(f"Mean IAT for Detected Packets: {mean_iat_correct_1}")
    print(f"Mean IAT for evaded packets: {mean_iat_correct_0}")
    
    print(f"Median IAT for Detected Packets: {median_iat_correct_1}")
    print(f"Median IAT for evaded packets: {median_iat_correct_0}")

    print("----------------------------")


ID: 2.0
Overall Mean IAT: 317.95913703774886
Overall Median IAT: 241.994857788

Mean IAT for Detected Packets: 422.98318035600875
Mean IAT for evaded packets: 252.58353922892636
Median IAT for Detected Packets: 236.0343933105
Median IAT for evaded packets: 246.04797363279997
----------------------------
ID: 160.0
Overall Mean IAT: 439.4342400144029
Overall Median IAT: 240.0875091552

Mean IAT for Detected Packets: 502.5030101785419
Mean IAT for evaded packets: 252.38175141180128
Median IAT for Detected Packets: 246.04797363279997
Median IAT for evaded packets: 230.073928833
----------------------------
ID: 161.0
Overall Mean IAT: 235.30015520224566
Overall Median IAT: 236.0343933105

Mean IAT for Detected Packets: 236.4392938284586
Mean IAT for evaded packets: nan
Median IAT for Detected Packets: 236.0343933105
Median IAT for evaded packets: nan
----------------------------
ID: 304.0
Overall Mean IAT: 262.00338972469257
Overall Median IAT: 237.94174194329997

Mean IAT for Detected Pack

In [43]:
group

Unnamed: 0,ID,DLC,Payload,IAT,Packet Type,Prediction,Correctly Predicted
119,1680.0,8.0,2.686583e+09,240.087509,0.0,0,1
314,1680.0,8.0,2.686583e+09,245.809555,0.0,0,1
509,1680.0,8.0,2.686583e+09,248.193741,0.0,0,1
704,1680.0,8.0,2.686583e+09,244.855881,0.0,0,1
899,1680.0,8.0,2.686583e+09,255.107880,0.0,0,1
...,...,...,...,...,...,...,...
146273,1680.0,8.0,2.686517e+09,242.948532,0.0,0,1
146532,1680.0,8.0,2.686517e+09,244.140625,0.0,0,1
146796,1680.0,8.0,2.686517e+09,241.994858,0.0,0,1
147064,1680.0,8.0,2.686517e+09,247.955322,0.0,0,1


In [49]:
X_test_results.to_csv('results.csv', index = False)

In [48]:
X_test_results

Unnamed: 0,ID,DLC,Payload,IAT,Packet Type,Prediction,Correctly Predicted
0,848.0,8.0,3.716925e+17,0.000000,0.0,0,1
1,704.0,8.0,1.441152e+18,221.014023,0.0,1,0
2,1072.0,8.0,0.000000e+00,553.846359,0.0,1,0
3,1201.0,8.0,0.000000e+00,237.941742,0.0,0,1
4,497.0,8.0,0.000000e+00,248.193741,0.0,0,1
...,...,...,...,...,...,...,...
147457,608.0,8.0,3.202177e+18,237.941742,0.0,1,0
147458,672.0,8.0,1.177723e+14,236.034393,0.0,1,0
147459,809.0,8.0,9.779705e+18,230.073929,0.0,0,1
147460,880.0,8.0,9.851624e+15,247.001648,0.0,0,1
