## Purpose

To compute the ML performance of the proposed method in near real-time.

In [1]:
import os 
import sys
import numpy as np
from collections import defaultdict
import CAN_objects.aid_message
import matplotlib.pyplot as plt


actt_path = os.path.join(os.path.join(os.path.expanduser("~"), "Projects", "CAN", "actt"))
os.chdir(actt_path)
sys.path.insert(0, "src") # add src folder to path so that files from this folder can be imported

from generalFunctions import unpickle
import subprocess

import importlib
importlib.reload(CAN_objects.aid_message)
from init_cancapture_from_canlog import init_cancap
import json
import seaborn as sns
import pandas as pd

from CAN_objects.capture import MappedCapture, MatchedCapture
import math
from scipy.cluster.hierarchy import single, complete, average, ward, dendrogram, linkage, fcluster

from pprint import pprint
from sklearn.metrics.cluster import normalized_mutual_info_score

from clusim.clustering import Clustering, remap2match
import clusim.sim as sim

import glob
from tqdm import tqdm
import itertools
from scipy.stats import shapiro, mannwhitneyu, ttest_ind, spearmanr
from sklearn.preprocessing import normalize, scale, MinMaxScaler, StandardScaler

## Enable the Use of Functions From the Detect Repo

In [2]:
# sys.path.insert(0, "/home/cades/Projects/CAN/detect/") # add detect folder to path so that files from this folder can be imported
sys.path.insert(0, "/home/cloud/Projects/CAN/detect/") # add detect folder to path so that files from this folder can be imported
import signal_based_preprocess_functions
print(os.getcwd())

/home/cloud/Projects/CAN/actt


## Functions

In [3]:
def from_capture_to_time_series(cap, ground_truth_dbc_path, freq):
    
    signal_multivar_ts, timepts, aid_signal_tups = signal_based_preprocess_functions.capture_to_mv_signal_timeseries(cap, ground_truth_dbc_path, min_hz_msgs=freq)

    return signal_multivar_ts, timepts, aid_signal_tups


def from_captures_to_time_series(cap_1, cap_2, ground_truth_dbc_path):
        
    signal_multivar_ts_1, timepts_1, aid_signal_tups_1 = signal_based_preprocess_functions.capture_to_mv_signal_timeseries(cap_1, ground_truth_dbc_path)
    signal_multivar_ts_2, timepts_2, aid_signal_tups_2 = signal_based_preprocess_functions.capture_to_mv_signal_timeseries(cap_2, ground_truth_dbc_path)

    return signal_multivar_ts_1, timepts_1, aid_signal_tups_1, signal_multivar_ts_2, timepts_2, aid_signal_tups_2


def remove_constant_signals(signal_multivar_ts):
    return signal_multivar_ts[:, ~np.all(signal_multivar_ts[1:] == signal_multivar_ts[:-1], axis=0)]


def partition_time_series(signal_multivar_ts, window_length, offset):
    
    n = signal_multivar_ts.shape[0]
    i = 0
    partition = []
    
    while (i + window_length) < n:
        partition.append(signal_multivar_ts[i: i + window_length,:])
        i = i + offset
        
    if i != n:
        partition.append(signal_multivar_ts[i:n,:])
        
    return partition
    
    
def process_multivariate_signals(signal_multivar_ts, aid_signal_tups, window_length, offset):
    
    # First dataframe
    # Convert matrix of time series into a dataframe
    df = pd.DataFrame({f"{tup[0]}_{tup[1]}": signal_multivar_ts[:,index] for index, tup in enumerate(aid_signal_tups)})
    # display(df)

    # Remove columns with constant values
    df = df.loc[:, (df != df.iloc[0]).any()] 
    # display(df)
    
    # Stadarization
    # df_standardized = (df-df.mean())/df.std()
    df_standardized = (df-df.min())/(df.max()-df.min())
    # display(df_standardized)
    
    # Partition of data frames
    n = df_standardized.shape[0]
    i = 0
    partition = []
    
    while (i + window_length) < n:
        partition.append(df_standardized.iloc[i:i + window_length, :])
        i = i + offset
        
    if i != n:
        partition.append(df_standardized.iloc[i:n, :])
        
    return partition


def process_multiple_multivariate_signals(signal_multivar_ts_1, aid_signal_tups_1, signal_multivar_ts_2, aid_signal_tups_2, window_length, offset):
    
    # First dataframe
    # Convert matrix of time series into a dataframe
    df_1 = pd.DataFrame({f"{tup[0]}_{tup[1]}": signal_multivar_ts_1[:,index] for index, tup in enumerate(aid_signal_tups_1)})
    # display(df)
    print(df_1.shape)

    # Remove columns with constant values
    df_1 = df_1.loc[:, (df_1 != df_1.iloc[0]).any()] 
    # display(df)
    
    # Stadarization
    df_1_standardized = (df_1-df_1.mean())/df_1.std()
    # display(df_2_standardized)
    
    # Partition of data frames
    n = df_1_standardized.shape[0]
    i = 0
    partition_1 = []
    
    while (i + window_length) < n:
        partition_1.append(df_1_standardized.iloc[i:i + window_length, :])
        i = i + offset
        
    if i != n:
        partition_1.append(df_1_standardized.iloc[i:n, :])
        
        
    # Second dataframe
    # Convert matrix of time series into a dataframe
    df_2 = pd.DataFrame({f"{tup[0]}_{tup[1]}": signal_multivar_ts_2[:,index] for index, tup in enumerate(aid_signal_tups_2)})
    # display(df)
    print(df_2.shape)

    # Remove columns with constant values
    df_2 = df_2.loc[:, (df_2 != df_2.iloc[0]).any()] 
    # display(df)
    
    # Stadarization
    df_2_standardized = (df_2-df_2.mean())/df_2.std()
    # display(df_2_standardized)
    
    # Partition of data frames
    n = df_2_standardized.shape[0]
    i = 0
    partition_2 = []
    
    while (i + window_length) < n:
        partition_2.append(df_2_standardized.iloc[i:i + window_length, :])
        i = i + offset
        
    if i != n:
        partition_2.append(df_2_standardized.iloc[i:n, :])
        
    return partition_1, partition_2


def upper(df):
    '''Returns the upper triangle of a correlation matrix.
    You can use scipy.spatial.distance.squareform to recreate matrix from upper triangle.
    Args:
      df: pandas or numpy correlation matrix
    Returns:
      list of values from upper triangle
    '''
    try:
        assert(type(df) == np.ndarray)
    except:
        if type(df) == pd.DataFrame:
            df = df.values
        else:
            raise TypeError('Must be np.ndarray or pd.DataFrame')
    mask = np.triu_indices(df.shape[0], k=1)
    
    return df[mask]



def randomized_test_permutations(m1, m2):
    """Nonparametric permutation testing Monte Carlo"""
    np.random.seed(0)
    rhos = []
    n_iter = 100
    true_rho, _ = spearmanr(upper(m1), upper(m2))
    # matrix permutation, shuffle the groups
    m_ids = list(m1.columns)
    m2_v = upper(m2)
    for iter in range(n_iter):
        np.random.shuffle(m_ids) # shuffle list 
        r, _ = spearmanr(upper(m1.loc[m_ids, m_ids]), m2_v)  
        rhos.append(r)
    perm_p = ((np.sum(np.abs(true_rho) <= np.abs(rhos)))+1)/(n_iter+1) # two-tailed test

    return perm_p


def compute_correlation_matrices(partition):
    
    corr_matrices = []

    for df in partition:

        # Remove columns with constant values
        df = df.loc[:, (df != df.iloc[0]).any()] 

        # Compute correlation matrix
        corr_matrices.append(df.corr(method="pearson"))
        
    return corr_matrices


def compute_similarity_from_correlation_matrices(corr_matrices):
    
    similarities = []
    
    for i in range(len(corr_matrices)-1):

        # print("raw: ", corr_matrices[i].shape, corr_matrices[i+1].shape)

        signal_names_1 = corr_matrices[i].columns.values
        signal_names_2 = corr_matrices[i+1].columns.values
        signal_names_intersection = list(set(signal_names_1).intersection(set(signal_names_2)))

        df_1 = corr_matrices[i].loc[signal_names_intersection, signal_names_intersection] 
        df_2 = corr_matrices[i+1].loc[signal_names_intersection, signal_names_intersection]
  
        # print("pro: ", df_1.shape, df_2.shape, "\n")

        similarities.append((df_1.shape[0], spearmanr(upper(df_1), upper(df_2))[0], spearmanr(upper(df_1), upper(df_2))[1]))
        
    return similarities


def compute_similarity_from_multiple_correlation_matrices(corr_matrices_1, corr_matrices_2):
    
    similarities = []
    
    if len(corr_matrices_1) <= len(corr_matrices_2):
        corr_matrices_reference = corr_matrices_1
    else:
        corr_matrices_reference = corr_matrices_2
        
    print(len(corr_matrices_reference))
            
    for i in range(len(corr_matrices_reference)):

        # print("raw: ", corr_matrices[i].shape, corr_matrices[i+1].shape)

        signal_names_1 = corr_matrices_1[i].columns.values
        signal_names_2 = corr_matrices_2[i].columns.values
        signal_names_intersection = list(set(signal_names_1).intersection(set(signal_names_2)))

        df_1 = corr_matrices_1[i].loc[signal_names_intersection, signal_names_intersection] 
        df_2 = corr_matrices_2[i].loc[signal_names_intersection, signal_names_intersection]
  
        # print("pro: ", df_1.shape, df_2.shape, "\n")

        # similarities.append((df_1.shape[0], spearmanr(upper(df_1), upper(df_2))[0], spearmanr(upper(df_1), upper(df_2))[1]))
        
        correlation = spearmanr(upper(df_1), upper(df_2))[0]
        p_value = spearmanr(upper(df_1), upper(df_2))[1]
        
        if p_value > 0.05:
            similarities.append((i, correlation, p_value))
        else:
            similarities.append(i)
            
        
    return similarities


def create_time_intervals(total_length, window, offset):
    
    # Partition of data frames
    i = 0
    intervals = []
    
    while (i + window) < total_length:
        intervals.append((i, i + window))
        i = i + offset
        
    if i != total_length:
        intervals.append((i , total_length))
        
    return intervals


    # # Partition of data frames
    # n = df_standardized.shape[0]
    # i = 0
    # partition = []
    
    # while (i + window_length) < n:
    #     partition.append(df_standardized.iloc[i:i + window_length, :])
    #     i = i + offset
        
    # if i != n:
    #     partition.append(df_standardized.iloc[i:n, :])
        
    # return partition
    

    # intervals = []
    # # offset = 0.1*offset
    
    # for i in np.arange(0, total_length - window + 1, offset, dtype=float):
    #     intervals.append((i, i + window))

    # if i + window < total_length:
    #     intervals.append((i + offset, total_length))

    # return intervals    

## File Names

In [4]:
ground_truth_dbc_path = os.path.join(actt_path, "metadata", "dbcs", "heuristic_labeled", "anonymized_020822_030640.dbc")
#testing_captures = ["correlated_masquerade_1_030804_082640", "correlated_masquerade_2_031128_011320", "correlated_masquerade_3_040322_190000"]

# training_captures = [directory for directory in os.listdir("/home/cades/Projects/CAN/actt/data-cancaptures/") if ("road_ambient_dyno" in directory) or ("road_ambient_highway" in directory)]
training_captures = [directory for directory in os.listdir("/home/cloud/Projects/CAN/actt/data-cancaptures/") if ("road_ambient_dyno" in directory) or ("road_ambient_highway" in directory)]
print(len(training_captures), training_captures, "\n")  

testing_captures = ["correlated_masquerade_1_030804_082640", "correlated_masquerade_2_031128_011320", "correlated_masquerade_3_040322_190000", 
                    "road_attack_max_speedometer_attack_1_masquerade_060215_054000", "road_attack_max_speedometer_attack_2_masquerade_060611_002640", 
                    "road_attack_max_speedometer_attack_3_masquerade_061004_181320", "road_attack_max_engine_coolant_temp_attack_masquerade_041109_063320",
                    "road_attack_reverse_light_on_attack_1_masquerade_091205_030000", "road_attack_reverse_light_on_attack_2_masquerade_100330_214640", 
                    "road_attack_reverse_light_on_attack_3_masquerade_100724_153320", "road_attack_reverse_light_off_attack_1_masquerade_080110_162000", 
                    "road_attack_reverse_light_off_attack_2_masquerade_080505_110640", "road_attack_reverse_light_off_attack_3_masquerade_080829_045320"]

print(len(testing_captures), testing_captures) 

12 ['road_ambient_dyno_drive_basic_short_020822_030640', 'road_ambient_dyno_idle_radio_infotainment_030410_144000', 'road_ambient_dyno_drive_winter_030410_144000', 'road_ambient_highway_street_driving_diagnostics_031128_011320', 'road_ambient_dyno_drive_extended_short_021215_195320', 'road_ambient_highway_street_driving_long_050305_002000', 'road_ambient_dyno_drive_extended_long_040716_134640', 'road_ambient_dyno_drive_benign_anomaly_030804_082640', 'road_ambient_dyno_exercise_all_bits_030410_144000', 'road_ambient_dyno_reverse_040322_190000', 'road_ambient_dyno_drive_radio_infotainment_041109_063320', 'road_ambient_dyno_drive_basic_long_050305_002000'] 

13 ['correlated_masquerade_1_030804_082640', 'correlated_masquerade_2_031128_011320', 'correlated_masquerade_3_040322_190000', 'road_attack_max_speedometer_attack_1_masquerade_060215_054000', 'road_attack_max_speedometer_attack_2_masquerade_060611_002640', 'road_attack_max_speedometer_attack_3_masquerade_061004_181320', 'road_attack_m

## Obtain Metadata

In [5]:
# with open("/home/cades/Projects/CAN/actt/data/capture_metadata.json") as f:
with open("/home/cloud/Projects/CAN/actt/data/capture_metadata.json") as f:
    attack_metadata = json.load(f)
    
# pprint(testing_captures)
# pprint(attack_metadata)

attack_metadata_keys = ["correlated_signal_attack_1_masquerade", "correlated_signal_attack_2_masquerade", "correlated_signal_attack_3_masquerade", 
                        "max_speedometer_attack_1_masquerade", "max_speedometer_attack_2_masquerade", "max_speedometer_attack_3_masquerade",
                        "max_engine_coolant_temp_attack_masquerade", "reverse_light_on_attack_1_masquerade", "reverse_light_on_attack_2_masquerade",
                        "reverse_light_on_attack_3_masquerade", "reverse_light_off_attack_1_masquerade", "reverse_light_off_attack_2_masquerade",
                        "reverse_light_off_attack_3_masquerade"]

print(len(attack_metadata_keys))

13


In [6]:
print(attack_metadata_keys[0])
attack_metadata["correlated_signal_attack_1_masquerade"]

correlated_signal_attack_1_masquerade


{'description': 'start from driving; accelerate; start injecting; car rolls to stop; stop injecting; accelerate',
 'elapsed_sec': 33.101852,
 'injection_data_str': '595945450000FFFF',
 'injection_id': '0x6e0',
 'injection_interval': [9.191851, 30.050109],
 'modified': True,
 'on_dyno': True}

## Experiments with Correlations of Same Signals

## Extract Correlation Matrix From Training

In [7]:
signal_multivar_ts, timepts, aid_signal_tups = from_capture_to_time_series(training_captures[-1], ground_truth_dbc_path, freq=100)
signal_multivar_ts.shape

(125100, 47)

## Partition Time Series Benign

In [8]:
window = signal_multivar_ts.shape[0] # Consider the entire series
offset = window
partition_training = process_multivariate_signals(signal_multivar_ts, aid_signal_tups, window, offset)

In [9]:
len(partition_training)

1

## Compute Correlation Matrices

In [10]:
corr_matrices_training = compute_correlation_matrices(partition_training)
print(len(corr_matrices_training))

1


In [11]:
corr_matrices_training[0].columns.values

array(['14_0', '14_2', '51_0', '51_1', '51_2', '51_4', '51_5', '51_6',
       '167_0', '167_3', '167_4', '167_5', '167_6', '167_7', '167_8',
       '208_0', '208_1', '208_2', '208_3', '208_4', '208_5', '293_2',
       '293_3', '293_5', '293_6', '852_0', '852_2', '852_3', '1505_0',
       '1505_1', '1505_5', '1760_0', '1760_1', '1760_2', '1760_3'],
      dtype=object)

## Extract Correlation Matrix From Testing

In [12]:
testing_captures

['correlated_masquerade_1_030804_082640',
 'correlated_masquerade_2_031128_011320',
 'correlated_masquerade_3_040322_190000',
 'road_attack_max_speedometer_attack_1_masquerade_060215_054000',
 'road_attack_max_speedometer_attack_2_masquerade_060611_002640',
 'road_attack_max_speedometer_attack_3_masquerade_061004_181320',
 'road_attack_max_engine_coolant_temp_attack_masquerade_041109_063320',
 'road_attack_reverse_light_on_attack_1_masquerade_091205_030000',
 'road_attack_reverse_light_on_attack_2_masquerade_100330_214640',
 'road_attack_reverse_light_on_attack_3_masquerade_100724_153320',
 'road_attack_reverse_light_off_attack_1_masquerade_080110_162000',
 'road_attack_reverse_light_off_attack_2_masquerade_080505_110640',
 'road_attack_reverse_light_off_attack_3_masquerade_080829_045320']

In [13]:
signal_multivar_ts, timepts, aid_signal_tups = from_capture_to_time_series(testing_captures[0], ground_truth_dbc_path, freq=100)

In [14]:
timepts[-1]

33.99

## Partition Testing Signals

In [15]:
partition_testing = process_multivariate_signals(signal_multivar_ts, aid_signal_tups, 50, 10)

In [16]:
print(len(partition_testing))
#partition_testing[0]

336


## Compute Correlation Matrices Testing

In [17]:
corr_matrices_testing = compute_correlation_matrices(partition_testing)
len(corr_matrices_testing)

336

## Create Time Intervals

In [18]:
total_length = int(np.ceil(timepts[-1]))   
window = 10
offset = 1        
        
intervals_testing = create_time_intervals(total_length, window, offset)
    
print(total_length, len(intervals_testing), intervals_testing)

34 25 [(0, 10), (1, 11), (2, 12), (3, 13), (4, 14), (5, 15), (6, 16), (7, 17), (8, 18), (9, 19), (10, 20), (11, 21), (12, 22), (13, 23), (14, 24), (15, 25), (16, 26), (17, 27), (18, 28), (19, 29), (20, 30), (21, 31), (22, 32), (23, 33), (24, 34)]


## Hypothesis Testing (Single Attack)

In [22]:
signals_training = corr_matrices_training[0].columns.values

window = 100
offset = 1

print("Processing: ", attack_metadata_keys[0])
signal_multivar_ts, timepts, aid_signal_tups = from_capture_to_time_series(testing_captures[0], ground_truth_dbc_path, freq=100)

partition_testing = process_multivariate_signals(signal_multivar_ts, aid_signal_tups, window, offset) # Partition time series
print("intervals: ", len(partition_testing))

# display(partition_testing[0])
# display(partition_testing[1])
# display(partition_testing[-1])

corr_matrices_testing = compute_correlation_matrices(partition_testing) # Compute Correlations

# total_length = int(np.ceil(timepts[-1])) 
total_length = timepts[-1]
print("total length (s): ", total_length) 
intervals_testing = create_time_intervals(total_length, window/100, offset/100)
print(len(intervals_testing), intervals_testing)
print("attack interval (s): ", attack_metadata[attack_metadata_keys[0]]["injection_interval"][0], attack_metadata[attack_metadata_keys[0]]["injection_interval"][1])

tp, fp, fn, tn = 0, 0, 0, 0

for index_interval in tqdm(range(len(intervals_testing))):
    
    # Compute signal names intersection
    signals_testing = corr_matrices_testing[index_interval].columns.values
    signal_names_intersection = list(set(signals_training).intersection(set(signals_testing)))

    # print(signal_names_intersection)
    
    # Filter correlation matrices by common names
    corr_matrix_1 = corr_matrices_training[0].loc[signal_names_intersection, signal_names_intersection]
    corr_matrix_2 = corr_matrices_testing[index_interval].loc[signal_names_intersection, signal_names_intersection]
    
    # Do hypothesis test
    spearman_test = spearmanr(upper(corr_matrix_1), upper(corr_matrix_2))
    # print((i, corr_matrix_1.shape[0], spearman_test[0], spearman_test[1]))
    
    if spearman_test[1] > 0.05: # positive detection
        if ((intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[0]]["injection_interval"][0] and intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[0]]["injection_interval"][0])
               or (intervals_testing[index_interval][0] > attack_metadata[attack_metadata_keys[0]]["injection_interval"][0] and intervals_testing[index_interval][1] < attack_metadata[attack_metadata_keys[0]]["injection_interval"][1])
                   or (intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[0]]["injection_interval"][1] and intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[0]]["injection_interval"][1])):
            tp += 1
        else:
            fp += 1
    else: # negative detection
        if ((intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[0]]["injection_interval"][0] and intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[0]]["injection_interval"][0])
               or (intervals_testing[index_interval][0] > attack_metadata[attack_metadata_keys[0]]["injection_interval"][0] and intervals_testing[index_interval][1] < attack_metadata[attack_metadata_keys[0]]["injection_interval"][1])
                   or (intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[0]]["injection_interval"][1] and intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[0]]["injection_interval"][1])):
            fn += 1
        else:
            tn += 1
            
# precision
if tp + fp != 0:            
    precision = tp/(tp + fp)
else:
    precision = np.nan

# recall
if tp + fn != 0:
    recall = tp/(tp + fn)
else:
    recall = np.nan

# f1
if precision + recall != 0:
    f1 = 2*((precision*recall)/(precision + recall))

else:
    f1 = np.nan

# fpr
if fp + tn != 0:
    fpr = fp/(fp + tn)
else:
    fpr = np.nan

# fnr
if fn + tp != 0:
    fnr = fn/(fn + tp)
else:
    fnr = np.nan

# mcc
if (tp+fp == 0) or (tp+fn == 0) or (tn+fp == 0) or (tn+fn == 0):
    mcc = (tp*tn) - (fp*fn)
else:
    mcc = (tp*tn - fp*fn)/(math.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)))

print(f"tp: {tp}, tn: {tn}, fp: {fp}, fn: {fn}")
print(f"precision: {precision:.3f}, recall: {recall:.3f}, f1: {f1:.3f}, fpr: {fpr:.3f}, fnr: {fnr:.3f}, mcc: {mcc:.3f}")
print(f"positive_intervals: {tp+fn:.3f}, negative_intervals: {tn+fp:.3f}\n")

Processing:  correlated_signal_attack_1_masquerade
intervals:  3301
total length (s):  33.99
3300 [(0, 1.0), (0.01, 1.01), (0.02, 1.02), (0.03, 1.03), (0.04, 1.04), (0.05, 1.05), (0.060000000000000005, 1.06), (0.07, 1.07), (0.08, 1.08), (0.09, 1.09), (0.09999999999999999, 1.1), (0.10999999999999999, 1.1099999999999999), (0.11999999999999998, 1.1199999999999999), (0.12999999999999998, 1.13), (0.13999999999999999, 1.14), (0.15, 1.15), (0.16, 1.16), (0.17, 1.17), (0.18000000000000002, 1.18), (0.19000000000000003, 1.19), (0.20000000000000004, 1.2), (0.21000000000000005, 1.21), (0.22000000000000006, 1.22), (0.23000000000000007, 1.23), (0.24000000000000007, 1.24), (0.25000000000000006, 1.25), (0.26000000000000006, 1.26), (0.2700000000000001, 1.27), (0.2800000000000001, 1.28), (0.2900000000000001, 1.29), (0.3000000000000001, 1.3), (0.3100000000000001, 1.31), (0.3200000000000001, 1.32), (0.3300000000000001, 1.33), (0.34000000000000014, 1.34), (0.35000000000000014, 1.35), (0.36000000000000015, 

100%|██████████| 3300/3300 [00:06<00:00, 481.41it/s]

tp: 1847, tn: 815, fp: 299, fn: 339
precision: 0.861, recall: 0.845, f1: 0.853, fpr: 0.268, fnr: 0.155, mcc: 0.572
positive_intervals: 2186.000, negative_intervals: 1114.000






## Hypothesis Testing (All Attacks)

In [20]:
signals_training = corr_matrices_training[0].columns.values

window = 100
offset = 1 

for index_attack in range(len(attack_metadata_keys)):

    print("Processing: ", attack_metadata_keys[index_attack])
    signal_multivar_ts, timepts, aid_signal_tups = from_capture_to_time_series(testing_captures[index_attack], ground_truth_dbc_path, freq=100)
    
    partition_testing = process_multivariate_signals(signal_multivar_ts, aid_signal_tups, window, offset) # Partition time series
    print("intervals: ", len(partition_testing))

    # display(partition_testing[0])
    # display(partition_testing[1])
    # display(partition_testing[-1])
    
    corr_matrices_testing = compute_correlation_matrices(partition_testing) # Compute Correlations
    
    # total_length = int(np.ceil(timepts[-1])) 
    total_length = timepts[-1]
    print("total length (s): ", total_length) 
    intervals_testing = create_time_intervals(total_length, window/100, offset/100)
    print(len(intervals_testing), intervals_testing)
    print("attack interval (s): ", attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0], attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1])
    
    tp, fp, fn, tn = 0, 0, 0, 0

    for index_interval in range(len(intervals_testing)):

        # Compute signal names intersection
        signals_testing = corr_matrices_testing[index_interval].columns.values
        signal_names_intersection = list(set(signals_training).intersection(set(signals_testing)))

        # Filter correlation matrices by common names
        corr_matrix_1 = corr_matrices_training[0].loc[signal_names_intersection, signal_names_intersection]
        corr_matrix_2 = corr_matrices_testing[index_interval].loc[signal_names_intersection, signal_names_intersection]

        # Do hypothesis test
        spearman_test = spearmanr(upper(corr_matrix_1), upper(corr_matrix_2))
        # print((i, corr_matrix_1.shape[0], spearman_test[0], spearman_test[1]))

        if spearman_test[1] > 0.05: # positive detection
            if ((intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0] and intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0])
                   or (intervals_testing[index_interval][0] > attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0] and intervals_testing[index_interval][1] < attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1])
                       or (intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1] and intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1])):
                tp += 1
            else:
                fp += 1
        else: # negative detection
            if ((intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0] and intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0])
                   or (intervals_testing[index_interval][0] > attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0] and intervals_testing[index_interval][1] < attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1])
                       or (intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1] and intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1])):
                fn += 1
            else:
                tn += 1
    # precision
    if tp + fp != 0:            
        precision = tp/(tp + fp)
    else:
        precision = np.nan
        
    # recall
    if tp + fn != 0:
        recall = tp/(tp + fn)
    else:
        recall = np.nan
        
    # f1
    if precision + recall != 0:
        f1 = 2*((precision*recall)/(precision + recall))
        
    else:
        f1 = np.nan
        
    # fpr
    if fp + tn != 0:
        fpr = fp/(fp + tn)
    else:
        fpr = np.nan

    # fnr
    if fn + tp != 0:
        fnr = fn/(fn + tp)
    else:
        fnr = np.nan

    # mcc
    if (tp+fp == 0) or (tp+fn == 0) or (tn+fp == 0) or (tn+fn == 0):
        mcc = (tp*tn) - (fp*fn)
    else:
        mcc = (tp*tn - fp*fn)/(math.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)))

    print(f"tp: {tp}, tn: {tn}, fp: {fp}, fn: {fn}")
    print(f"precision: {precision:.3f}, recall: {recall:.3f}, f1: {f1:.3f}, fpr: {fpr:.3f}, fnr: {fnr:.3f}, mcc: {mcc:.3f}")
    print(f"positive_intervals: {tp+fn:.3f}, negative_intervals: {tn+fp:.3f}\n")

Processing:  correlated_signal_attack_1_masquerade


intervals:  3301
total length (s):  33.99
3300 [(0, 1.0), (0.01, 1.01), (0.02, 1.02), (0.03, 1.03), (0.04, 1.04), (0.05, 1.05), (0.060000000000000005, 1.06), (0.07, 1.07), (0.08, 1.08), (0.09, 1.09), (0.09999999999999999, 1.1), (0.10999999999999999, 1.1099999999999999), (0.11999999999999998, 1.1199999999999999), (0.12999999999999998, 1.13), (0.13999999999999999, 1.14), (0.15, 1.15), (0.16, 1.16), (0.17, 1.17), (0.18000000000000002, 1.18), (0.19000000000000003, 1.19), (0.20000000000000004, 1.2), (0.21000000000000005, 1.21), (0.22000000000000006, 1.22), (0.23000000000000007, 1.23), (0.24000000000000007, 1.24), (0.25000000000000006, 1.25), (0.26000000000000006, 1.26), (0.2700000000000001, 1.27), (0.2800000000000001, 1.28), (0.2900000000000001, 1.29), (0.3000000000000001, 1.3), (0.3100000000000001, 1.31), (0.3200000000000001, 1.32), (0.3300000000000001, 1.33), (0.34000000000000014, 1.34), (0.35000000000000014, 1.35), (0.36000000000000015, 1.36), (0.37000000000000016, 1.37), (0.380000000000