In [10]:
import os
import numpy as np
import pandas as pd
import argparse
from tensorflow.keras.models import load_model
from xgboost import XGBClassifier
from joblib import load
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, classification_report

In [2]:
data_dir = '../data/Car Hacking Dataset/'
file_name = 'smart_output.csv'
smart_data = pd.read_csv(os.path.join(data_dir, file_name))

In [3]:
smart_data = smart_data.assign(IAT=smart_data['Timestamp'].diff().fillna(0))
smart_data.drop(['Timestamp'], axis = 1, inplace = True)
smart_data.head()

Unnamed: 0,ID,DLC,Payload,label,IAT
0,848.0,8.0,3.716925e+17,0.0,0.0
1,704.0,8.0,1.441152e+18,0.0,0.000221
2,1072.0,8.0,0.0,0.0,0.000554
3,1201.0,8.0,0.0,0.0,0.000238
4,497.0,8.0,0.0,0.0,0.000248


In [4]:
X = smart_data.drop(['label'], axis = 1).to_numpy()
y = smart_data['label'].to_numpy()

In [5]:
def sequencify_data(X, y, seq_size=10):
    
    # Calculate the maximum index to be considered based on sequence size
    max_index = (len(X) // seq_size) * seq_size

    X_seq = []
    y_seq = []
    for i in range(0, max_index, seq_size):
        X_seq.append(X[i:i+seq_size])
        y_seq.append(1 if 1 in y[i:i+seq_size] else 0)

    return np.array(X_seq), np.array(y_seq)

In [6]:
scaler = load(os.path.join(data_dir, 'scaler.joblib'))
seq_scaler = load(os.path.join(data_dir, 'seq_scaler.joblib'))

In [7]:
mlp = load_model(os.path.join(data_dir, 'mlp.h5'))
# lstm = load_model(os.paht.join(data_dir, 'lstm.h5'))
xgb = XGBClassifier()
xgb.load_model(os.path.join(data_dir, 'xgb.json'))
dt = load(os.path.join(data_dir, 'dt.pkl'))
rf = load(os.path.join(data_dir, 'rf.pkl'))

2023-10-29 22:18:24.784989: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-10-29 22:18:24.785745: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M2

systemMemory: 8.00 GB
maxCacheSize: 2.67 GB



In [8]:
X_seq, y_seq = sequencify_data(X, y)

In [9]:
X = scaler.transform(X)

num_samples, seq_length, num_features = X_seq.shape
X_seq_reshaped = X_seq.reshape(num_samples, -1)
X_seq = seq_scaler.fit_transform(X_seq_reshaped)
X_seq = X_seq.reshape(num_samples, seq_length, num_features)




In [11]:
threshold = 0.5

print("------MLP------")

mlp_preds = mlp.predict(X)
mlp_preds = (mlp_preds >= threshold).astype(int)

print("ACCURACY: ", accuracy_score(y, mlp_preds))
print("CLASSIFICATION REPORT:\n", classification_report(y, mlp_preds))

# with open(os.path.join(data_dir,'evaluation_results.txt'),'w') as file:
#     file.write("-------MLP-------\n")
#     file.write(f"Accuracy Score: ")
#     file.write(str(accuracy_score(y, mlp_preds)))
#     file.write("\n")
#     file.write('Classification Report:\n')
#     file.write(str(classification_report(y, mlp_preds)))
#     file.write("\n\n\n\n")


# print("------LSTM------")

# lstm_preds = lstm.predict(X_seq)
# lstm_preds = (lstm_preds >= threshold).astype(int)

# print("ACCURACY: ", accuracy_score(y_seq, lstm_preds))
# print("CLASSIFICATION REPORT:\n", classification_report(y_seq, lstm_preds))

# with open(os.path.join(data_dir,'evaluation_results.txt'),'a') as file:
#     file.write("-------LSTM-------\n")
#     file.write(f"Accuracy Score: ")
#     file.write(str(accuracy_score(y_seq, lstm_preds)))
#     file.write("\n")
#     file.write('Classification Report:\n')
#     file.write(str(classification_report(y_seq, lstm_preds)))
#     file.write("\n\n\n\n")

print("------XGBOOST------")

xgb_preds = xgb.predict(X)

print("ACCURACY: ", accuracy_score(y, xgb_preds))
print("CLASSIFICATION REPORT:\n", classification_report(y, xgb_preds))

print("------Decision Tree------")

dt_preds = dt.predict(X)

print("ACCURACY: ", accuracy_score(y, dt_preds))
print("CLASSIFICATION REPORT:\n", classification_report(y, dt_preds))

print("------Random Forest------")

rf_preds = rf.predict(X)

print("ACCURACY: ", accuracy_score(y, rf_preds))
print("CLASSIFICATION REPORT:\n", classification_report(y, rf_preds))

------MLP------


2023-10-29 22:25:10.990146: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-10-29 22:25:11.120308: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


ACCURACY:  0.9299223057693873
CLASSIFICATION REPORT:
               precision    recall  f1-score   support

         0.0       0.98      0.95      0.96    988871
         1.0       0.00      0.00      0.00     18411

    accuracy                           0.93   1007282
   macro avg       0.49      0.47      0.48   1007282
weighted avg       0.96      0.93      0.95   1007282

------XGBOOST------
ACCURACY:  0.9817220996702016


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


CLASSIFICATION REPORT:
               precision    recall  f1-score   support

         0.0       0.98      1.00      0.99    988871
         1.0       0.00      0.00      0.00     18411

    accuracy                           0.98   1007282
   macro avg       0.49      0.50      0.50   1007282
weighted avg       0.96      0.98      0.97   1007282

------Decision Tree------
ACCURACY:  0.9817220996702016


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


CLASSIFICATION REPORT:
               precision    recall  f1-score   support

         0.0       0.98      1.00      0.99    988871
         1.0       0.00      0.00      0.00     18411

    accuracy                           0.98   1007282
   macro avg       0.49      0.50      0.50   1007282
weighted avg       0.96      0.98      0.97   1007282

------Random Forest------
ACCURACY:  0.9817220996702016


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


CLASSIFICATION REPORT:
               precision    recall  f1-score   support

         0.0       0.98      1.00      0.99    988871
         1.0       0.00      0.00      0.00     18411

    accuracy                           0.98   1007282
   macro avg       0.49      0.50      0.50   1007282
weighted avg       0.96      0.98      0.97   1007282



  _warn_prf(average, modifier, msg_start, len(result))
