In [1]:
from py_pkg.core.core import get_human_time, read_dataset_and_split, get_test_dataset
from py_pkg.algos.algo import Algo
from py_pkg.entities.entities import TestDataType, AttackType, AlgoToPredict
from py_pkg.testing.testing import Testing

import numpy as np
import pandas as pd
from sklearn import preprocessing
import matplotlib.pyplot as plt

In [2]:
# import datasets
training_dataset_dir = './datasets/cantrainandtest/can-train-and-test/set_01/train_01/'
testing_dataset_dir_1 = './datasets/cantrainandtest/can-train-and-test/set_01/test_01_known_vehicle_known_attack/'
testing_dataset_dir_2 = './datasets/cantrainandtest/can-train-and-test/set_01/test_02_unknown_vehicle_known_attack/'

# import kvka test datasets
DoS_3_kvka = pd.read_csv(testing_dataset_dir_1 + "DoS-3.csv")
DoS_4_kvka = pd.read_csv(testing_dataset_dir_1 + "DoS-4.csv")

force_neutral_3_kvka = pd.read_csv(testing_dataset_dir_1 + "force-neutral-3.csv")
force_neutral_4_kvka = pd.read_csv(testing_dataset_dir_1 + "force-neutral-4.csv")

# import uvka test datasets
DoS_3_uvka = pd.read_csv(testing_dataset_dir_2 + "DoS-3.csv")
DoS_4_uvka = pd.read_csv(testing_dataset_dir_2 + "DoS-4.csv")

force_neutral_3_uvka = pd.read_csv(testing_dataset_dir_2 + "force-neutral-3.csv")
force_neutral_4_uvka = pd.read_csv(testing_dataset_dir_2 + "force-neutral-4.csv")

# import train dataset
attack_free_1 = pd.read_csv(training_dataset_dir + "attack-free-1.csv")
attack_free_2 = pd.read_csv(training_dataset_dir + "attack-free-2.csv")

DoS_1 = pd.read_csv(training_dataset_dir + "DoS-1.csv")
DoS_2 = pd.read_csv(training_dataset_dir + "DoS-2.csv")

force_neutral_1 = pd.read_csv(training_dataset_dir + "force-neutral-1.csv")
force_neutral_2 = pd.read_csv(training_dataset_dir + "force-neutral-2.csv")


In [3]:
# concatenate test dataset kvka
DoS_kvka = pd.concat([DoS_3_kvka, DoS_4_kvka])
force_neutral_kvka = pd.concat([force_neutral_3_kvka, force_neutral_4_kvka])

# concatenate test dataset uvka
DoS_uvka = pd.concat([DoS_3_uvka, DoS_4_uvka])
force_neutral_uvka = pd.concat([force_neutral_3_uvka, force_neutral_4_uvka])

# concatenate train datasets
attack_free = pd.concat([attack_free_1, attack_free_2])
DoS = pd.concat([DoS_1, DoS_2])
force_neutral = pd.concat([force_neutral_1, force_neutral_2])

In [4]:
def hex_to_decimal(hex_str):
    return int(hex_str, 16)

def decimal_to_hex(dec):
    return hex(dec)

# update column datatype for train data
DoS['data_field'] = DoS['data_field'].apply(hex_to_decimal)
DoS['arbitration_id'] = DoS['arbitration_id'].apply(hex_to_decimal)

force_neutral['data_field'] = force_neutral['data_field'].apply(hex_to_decimal)
force_neutral['arbitration_id'] = force_neutral['arbitration_id'].apply(hex_to_decimal)

# update column datatype for test data kvka
DoS_kvka['data_field'] = DoS_kvka['data_field'].apply(hex_to_decimal)
DoS_kvka['arbitration_id'] = DoS_kvka['arbitration_id'].apply(hex_to_decimal)

force_neutral_kvka['data_field'] = force_neutral_kvka['data_field'].apply(hex_to_decimal)
force_neutral_kvka['arbitration_id'] = force_neutral_kvka['arbitration_id'].apply(hex_to_decimal)

# update column datatype for test data uvka
DoS_uvka['data_field'] = DoS_uvka['data_field'].apply(hex_to_decimal)
DoS_uvka['arbitration_id'] = DoS_uvka['arbitration_id'].apply(hex_to_decimal)

force_neutral_uvka['data_field'] = force_neutral_uvka['data_field'].apply(hex_to_decimal)
force_neutral_uvka['arbitration_id'] = force_neutral_uvka['arbitration_id'].apply(hex_to_decimal)

display(force_neutral_kvka)


Unnamed: 0,timestamp,arbitration_id,data_field,attack
0,1.672531e+09,409,10376029658402455807,0
1,1.672531e+09,388,12884901888,0
2,1.672531e+09,413,4611756382877457663,0
3,1.672531e+09,417,0,0
4,1.672531e+09,455,1868041717547071,0
...,...,...,...,...
839868,1.672532e+09,481,4279566368,0
839869,1.672532e+09,193,240048196083575707,0
839870,1.672532e+09,197,229227940826484838,0
839871,1.672532e+09,485,5115999566495240705,0


In [5]:
# save training data
DoS.to_csv("./datasets/clean-data-2/set-1/training-data/dos/dos.csv", sep=',', index=False, encoding='utf-8')
force_neutral.to_csv("./datasets/clean-data-2/set-1/training-data/force_neutral/force_neutral.csv", sep=',', index=False, encoding='utf-8')

# save testing data kvka
DoS_kvka.to_csv("./datasets/clean-data-2/set-1/testing-data/dos/dos.csv", sep=',', index=False, encoding='utf-8')
force_neutral_kvka.to_csv("./datasets/clean-data-2/set-1/training-data/force_neutral/force_neutral.csv", sep=',', index=False, encoding='utf-8')

# save testing data uvka
DoS_uvka.to_csv("./datasets/clean-data-2/set-1/testing-data/dos/dos.csv", sep=',', index=False, encoding='utf-8')
force_neutral_uvka.to_csv("./datasets/clean-data-2/set-1/training-data/force_neutral/force_neutral.csv", sep=',', index=False, encoding='utf-8')

In [6]:
import time

dos_dir = "./datasets/clean-data-2/set-1/training-data/dos/dos.csv"
fn_dir = "./datasets/clean-data-2/set-1/training-data/force_neutral/force_neutral.csv"

start_time = time.time()

dos_dataset = read_dataset_and_split(dos_dir)
fn_dataset = read_dataset_and_split(fn_dir)

# initialize algo
algo_1 = Algo(dos_dataset)
algo_2 = Algo(dos_dataset)

print("==> ml-algo [Initializing]")

# implement individual algo
algo_1.impl_random_forests()
algo_1.impl_xgboost()
algo_1.impl_kmeans()

algo_2.impl_random_forests()
algo_2.impl_xgboost()
algo_2.impl_kmeans()



==> ml-algo [Initializing]
==> ml-algo [Random Forests Implemented in 00:07]
==> ml-algo [Gradient Boosting Implemented in 00:09]
==> ml-algo [KMeans Implemented in 00:01]
==> ml-algo [Random Forests Implemented in 00:08]
==> ml-algo [Gradient Boosting Implemented in 00:11]
==> ml-algo [KMeans Implemented in 00:01]


In [10]:
from py_pkg.testing.testing import Testing

tests = Testing()

def run_all_tests(algo, title):
    # Random Forests
    true_values_pred_values_rf_kv_ka = tests.test_for_2(TestDataType.kv_ka, algo, AlgoToPredict.random_forest)
    test_metrics_rf_kv_ka = tests.generate_all_test_metrics(true_values_pred_values_rf_kv_ka)
    
    true_values_pred_values_rf_uv_ka = tests.test_for_2(TestDataType.uv_ka, algo, AlgoToPredict.random_forest)
    test_metrics_rf_uv_ka = tests.generate_all_test_metrics(true_values_pred_values_rf_uv_ka)
    
    # save results
    tests.save_testing_results_2(test_metrics_rf_kv_ka, f"[{title}] Random Forests - KV-KA")
    tests.save_testing_results_2(test_metrics_rf_uv_ka, f"[{title}] Random Forests - UV-KA")
    
    
    # Extreme Gradient Boosting
    true_values_pred_values_xg_kv_ka = tests.test_for_2(TestDataType.kv_ka, algo, AlgoToPredict.xgboost)
    test_metrics_xg_kv_ka = tests.generate_all_test_metrics(true_values_pred_values_xg_kv_ka)
    
    true_values_pred_values_xg_uv_ka = tests.test_for_2(TestDataType.uv_ka, algo, AlgoToPredict.xgboost)
    test_metrics_xg_uv_ka = tests.generate_all_test_metrics(true_values_pred_values_xg_uv_ka)
    
    # save results
    tests.save_testing_results_2(test_metrics_xg_kv_ka, f"[{title}] Extreme Gradient Boosting - KV-KA")
    tests.save_testing_results_2(test_metrics_xg_uv_ka, f"[{title}] Extreme Gradient Boosting - UV-KA")
    
    # K-Means Clustering
    true_values_pred_values_kmeans_kv_ka = tests.test_for_2(TestDataType.kv_ka, algo, AlgoToPredict.k_means)
    test_metrics_kmeans_kv_ka = tests.generate_all_test_metrics(true_values_pred_values_kmeans_kv_ka)
    
    true_values_pred_values_kmeans_uv_ka = tests.test_for_2(TestDataType.uv_ka, algo, AlgoToPredict.k_means)
    test_metrics_kmeans_uv_ka = tests.generate_all_test_metrics(true_values_pred_values_kmeans_uv_ka)
    
    # save results
    tests.save_testing_results_2(test_metrics_kmeans_kv_ka, f"[{title}] K-Means - KV-KA")
    tests.save_testing_results_2(test_metrics_kmeans_uv_ka, f"[{title}] K-Means - UV-KA")
    
    close_time = time.time()
    
    print(f"\n==> ml-algo [Model training for {title} and Testing Completed in {get_human_time(close_time, start_time)}]")



In [None]:
algos = [(algo_1, 'DoS'), (algo_2, 'Force Neutral')]

for algo, title in algos:
    run_all_tests(algo, title)

==> ml-algo [Predicting for Random Forests - DoS]
==> ml-algo [Predicting for Random Forests - Force Neutral]

==> ml-algo [random_forest Predicted in 00:03 for kv_ka test set] 



