## Purpose

To compute the ML performance of the proposed method in near real-time.

In [2]:
import os 
import sys
import numpy as np
from collections import defaultdict
import CAN_objects.aid_message
import matplotlib.pyplot as plt


actt_path = os.path.join(os.path.join(os.path.expanduser("~"), "Projects", "CAN", "actt"))
os.chdir(actt_path)
sys.path.insert(0, "src") # add src folder to path so that files from this folder can be imported

from generalFunctions import unpickle
import subprocess

import importlib
importlib.reload(CAN_objects.aid_message)
from init_cancapture_from_canlog import init_cancap
import json
import seaborn as sns
import pandas as pd

from CAN_objects.capture import MappedCapture, MatchedCapture
import math
from scipy.cluster.hierarchy import single, complete, average, ward, dendrogram, linkage, fcluster

from pprint import pprint
from sklearn.metrics.cluster import normalized_mutual_info_score

from clusim.clustering import Clustering, remap2match
import clusim.sim as sim

import glob
from tqdm import tqdm
import itertools
from scipy.stats import shapiro, mannwhitneyu, ttest_ind, spearmanr
from sklearn.preprocessing import normalize, scale, MinMaxScaler, StandardScaler

import warnings
warnings.filterwarnings("ignore")

## Enable the Use of Functions From the Detect Repo

In [3]:
# sys.path.insert(0, "/home/cades/Projects/CAN/detect/") # add detect folder to path so that files from this folder can be imported
sys.path.insert(0, "/home/cloud/Projects/CAN/detect/") # add detect folder to path so that files from this folder can be imported
import signal_based_preprocess_functions
print(os.getcwd())

/home/cloud/Projects/CAN/actt


## Functions

In [4]:
def from_capture_to_time_series(cap, ground_truth_dbc_path):
    
    signal_multivar_ts, timepts, aid_signal_tups = signal_based_preprocess_functions.capture_to_mv_signal_timeseries(cap, ground_truth_dbc_path)

    return signal_multivar_ts, timepts, aid_signal_tups


def from_captures_to_time_series(cap_1, cap_2, ground_truth_dbc_path):
        
    signal_multivar_ts_1, timepts_1, aid_signal_tups_1 = signal_based_preprocess_functions.capture_to_mv_signal_timeseries(cap_1, ground_truth_dbc_path)
    signal_multivar_ts_2, timepts_2, aid_signal_tups_2 = signal_based_preprocess_functions.capture_to_mv_signal_timeseries(cap_2, ground_truth_dbc_path)

    return signal_multivar_ts_1, timepts_1, aid_signal_tups_1, signal_multivar_ts_2, timepts_2, aid_signal_tups_2


def remove_constant_signals(signal_multivar_ts):
    return signal_multivar_ts[:, ~np.all(signal_multivar_ts[1:] == signal_multivar_ts[:-1], axis=0)]


def partition_time_series(signal_multivar_ts, window_length, offset):
    
    n = signal_multivar_ts.shape[0]
    i = 0
    partition = []
    
    while (i + window_length) < n:
        partition.append(signal_multivar_ts[i: i + window_length,:])
        i = i + offset
        
    if i != n:
        partition.append(signal_multivar_ts[i:n,:])
        
    return partition
    
    
def process_multivariate_signals(df, window_length, offset):
    
    # First dataframe
    # Convert matrix of time series into a dataframe
    # df = pd.DataFrame({f"{tup[0]}_{tup[1]}": signal_multivar_ts[:,index] for index, tup in enumerate(aid_signal_tups)})
    # display(df.shape)

    # Remove columns with constant values
    df = df.loc[:, (df != df.iloc[0]).any()] 
    # display(df.shape)
    
    # Stadarization
    # df_standardized = (df-df.mean())/df.std()
    # display(df_standardized)

    # normalization
    df_standardized = (df-df.min())/(df.max()-df.min())
    
    # Partition of data frames
    n = df_standardized.shape[0]
    i = 0
    partition = []
    
    while (i + window_length) < n:
        partition.append(df_standardized.iloc[i:i + window_length, :])
        i = i + offset
        
    if i != n:
        partition.append(df_standardized.iloc[i:n, :])
        
    return partition


def process_multiple_multivariate_signals(signal_multivar_ts_1, aid_signal_tups_1, signal_multivar_ts_2, aid_signal_tups_2, window_length, offset):
    
    # First dataframe
    # Convert matrix of time series into a dataframe
    df_1 = pd.DataFrame({f"{tup[0]}_{tup[1]}": signal_multivar_ts_1[:,index] for index, tup in enumerate(aid_signal_tups_1)})
    # display(df)
    print(df_1.shape)

    # Remove columns with constant values
    df_1 = df_1.loc[:, (df_1 != df_1.iloc[0]).any()] 
    # display(df)
    
    # Stadarization
    df_1_standardized = (df_1-df_1.mean())/df_1.std()
    # display(df_2_standardized)
    
    # Partition of data frames
    n = df_1_standardized.shape[0]
    i = 0
    partition_1 = []
    
    while (i + window_length) < n:
        partition_1.append(df_1_standardized.iloc[i:i + window_length, :])
        i = i + offset
        
    if i != n:
        partition_1.append(df_1_standardized.iloc[i:n, :])
        
        
    # Second dataframe
    # Convert matrix of time series into a dataframe
    df_2 = pd.DataFrame({f"{tup[0]}_{tup[1]}": signal_multivar_ts_2[:,index] for index, tup in enumerate(aid_signal_tups_2)})
    # display(df)
    print(df_2.shape)

    # Remove columns with constant values
    df_2 = df_2.loc[:, (df_2 != df_2.iloc[0]).any()] 
    # display(df)
    
    # Stadarization
    df_2_standardized = (df_2-df_2.mean())/df_2.std()
    # display(df_2_standardized)
    
    # Partition of data frames
    n = df_2_standardized.shape[0]
    i = 0
    partition_2 = []
    
    while (i + window_length) < n:
        partition_2.append(df_2_standardized.iloc[i:i + window_length, :])
        i = i + offset
        
    if i != n:
        partition_2.append(df_2_standardized.iloc[i:n, :])
        
    return partition_1, partition_2


def upper(df):
    '''Returns the upper triangle of a correlation matrix.
    You can use scipy.spatial.distance.squareform to recreate matrix from upper triangle.
    Args:
      df: pandas or numpy correlation matrix
    Returns:
      list of values from upper triangle
    '''
    try:
        assert(type(df) == np.ndarray)
    except:
        if type(df) == pd.DataFrame:
            df = df.values
        else:
            raise TypeError('Must be np.ndarray or pd.DataFrame')
    mask = np.triu_indices(df.shape[0], k=1)
    
    return df[mask]



def randomized_test_permutations(m1, m2):
    """Nonparametric permutation testing Monte Carlo"""
    np.random.seed(0)
    rhos = []
    n_iter = 100
    true_rho, _ = spearmanr(upper(m1), upper(m2))
    # matrix permutation, shuffle the groups
    m_ids = list(m1.columns)
    m2_v = upper(m2)
    for iter in range(n_iter):
        np.random.shuffle(m_ids) # shuffle list 
        r, _ = spearmanr(upper(m1.loc[m_ids, m_ids]), m2_v)  
        rhos.append(r)
    perm_p = ((np.sum(np.abs(true_rho) <= np.abs(rhos)))+1)/(n_iter+1) # two-tailed test

    return perm_p


def compute_correlation_matrices(partition):
    
    corr_matrices = []

    for df in partition:

        # Remove columns with constant values
        df = df.loc[:, (df != df.iloc[0]).any()] 

        # Compute correlation matrix
        # corr_matrices.append(df.corr(method="pearson"))
        corr_matrices.append(np.corrcoef(df.to_numpy(), rowvar=False))
        
    return corr_matrices


def compute_similarity_from_correlation_matrices(corr_matrices):
    
    similarities = []
    
    for i in range(len(corr_matrices)-1):

        # print("raw: ", corr_matrices[i].shape, corr_matrices[i+1].shape)

        signal_names_1 = corr_matrices[i].columns.values
        signal_names_2 = corr_matrices[i+1].columns.values
        signal_names_intersection = list(set(signal_names_1).intersection(set(signal_names_2)))

        df_1 = corr_matrices[i].loc[signal_names_intersection, signal_names_intersection] 
        df_2 = corr_matrices[i+1].loc[signal_names_intersection, signal_names_intersection]
  
        # print("pro: ", df_1.shape, df_2.shape, "\n")

        similarities.append((df_1.shape[0], spearmanr(upper(df_1), upper(df_2))[0], spearmanr(upper(df_1), upper(df_2))[1]))
        
    return similarities


def compute_similarity_from_multiple_correlation_matrices(corr_matrices_1, corr_matrices_2):
    
    similarities = []
    
    if len(corr_matrices_1) <= len(corr_matrices_2):
        corr_matrices_reference = corr_matrices_1
    else:
        corr_matrices_reference = corr_matrices_2
        
    print(len(corr_matrices_reference))
            
    for i in range(len(corr_matrices_reference)):

        # print("raw: ", corr_matrices[i].shape, corr_matrices[i+1].shape)

        signal_names_1 = corr_matrices_1[i].columns.values
        signal_names_2 = corr_matrices_2[i].columns.values
        signal_names_intersection = list(set(signal_names_1).intersection(set(signal_names_2)))

        df_1 = corr_matrices_1[i].loc[signal_names_intersection, signal_names_intersection] 
        df_2 = corr_matrices_2[i].loc[signal_names_intersection, signal_names_intersection]
  
        # print("pro: ", df_1.shape, df_2.shape, "\n")

        # similarities.append((df_1.shape[0], spearmanr(upper(df_1), upper(df_2))[0], spearmanr(upper(df_1), upper(df_2))[1]))
        
        correlation = spearmanr(upper(df_1), upper(df_2))[0]
        p_value = spearmanr(upper(df_1), upper(df_2))[1]
        
        if p_value > 0.05:
            similarities.append((i, correlation, p_value))
        else:
            similarities.append(i)
            
        
    return similarities


def create_time_intervals(time_steps, window, offset):
    
    # Partition of data frames
    i = 0
    intervals = []
    total_length = len(time_steps)
    
    while (i + window) < total_length:
        intervals.append((time_steps[i], time_steps[i + window]))
        i = i + offset
        
    if i != total_length:
        intervals.append((time_steps[i] , time_steps[total_length-1]))
        
    return intervals


    # # Partition of data frames
    # n = df_standardized.shape[0]
    # i = 0
    # partition = []
    
    # while (i + window_length) < n:
    #     partition.append(df_standardized.iloc[i:i + window_length, :])
    #     i = i + offset
        
    # if i != n:
    #     partition.append(df_standardized.iloc[i:n, :])
        
    # return partition
    

    # intervals = []
    # # offset = 0.1*offset
    
    # for i in np.arange(0, total_length - window + 1, offset, dtype=float):
    #     intervals.append((i, i + window))

    # if i + window < total_length:
    #     intervals.append((i + offset, total_length))

    # return intervals    


def process_CAN_file(file_name):

    # read data frame
    df = pd.read_csv(file_name, engine="c")

    # remove unwanted column
    df = df.drop("Unnamed: 0", axis=1)

    # reorganize columns
    df.insert(0, "Label", df.pop("Label"))
    df.insert(0, "Time", df.pop("Time"))
    df.insert(0, "ID", df.pop("ID"))

    # Forward filling process
    df = df.ffill().copy() # Fill up the missing values with the most recent values
    df = df.bfill().dropna() # Fill up the remaining missing values in the first few rows
    # display(df)

    return df

## File Names

In [5]:
testing_captures = ["correlated_masquerade_1_030804_082640", "correlated_masquerade_2_031128_011320", "correlated_masquerade_3_040322_190000", 
                    "road_attack_max_speedometer_attack_1_masquerade_060215_054000", "road_attack_max_speedometer_attack_2_masquerade_060611_002640", 
                    "road_attack_max_speedometer_attack_3_masquerade_061004_181320", "road_attack_max_engine_coolant_temp_attack_masquerade_041109_063320",
                    "road_attack_reverse_light_on_attack_1_masquerade_091205_030000", "road_attack_reverse_light_on_attack_2_masquerade_100330_214640", 
                    "road_attack_reverse_light_on_attack_3_masquerade_100724_153320", "road_attack_reverse_light_off_attack_1_masquerade_080110_162000", 
                    "road_attack_reverse_light_off_attack_2_masquerade_080505_110640", "road_attack_reverse_light_off_attack_3_masquerade_080829_045320"]

print(len(testing_captures), testing_captures) 

13 ['correlated_masquerade_1_030804_082640', 'correlated_masquerade_2_031128_011320', 'correlated_masquerade_3_040322_190000', 'road_attack_max_speedometer_attack_1_masquerade_060215_054000', 'road_attack_max_speedometer_attack_2_masquerade_060611_002640', 'road_attack_max_speedometer_attack_3_masquerade_061004_181320', 'road_attack_max_engine_coolant_temp_attack_masquerade_041109_063320', 'road_attack_reverse_light_on_attack_1_masquerade_091205_030000', 'road_attack_reverse_light_on_attack_2_masquerade_100330_214640', 'road_attack_reverse_light_on_attack_3_masquerade_100724_153320', 'road_attack_reverse_light_off_attack_1_masquerade_080110_162000', 'road_attack_reverse_light_off_attack_2_masquerade_080505_110640', 'road_attack_reverse_light_off_attack_3_masquerade_080829_045320']


## Obtain Metadata

In [6]:
# with open("/home/cades/Projects/CAN/actt/data/capture_metadata.json") as f:
with open("/home/cloud/Projects/CAN/actt/data/capture_metadata.json") as f:
    attack_metadata = json.load(f)
    
# pprint(testing_captures)
# pprint(attack_metadata)

attack_metadata_keys = ["correlated_signal_attack_1_masquerade", "correlated_signal_attack_2_masquerade", "correlated_signal_attack_3_masquerade", 
                        "max_speedometer_attack_1_masquerade", "max_speedometer_attack_2_masquerade", "max_speedometer_attack_3_masquerade",
                        "max_engine_coolant_temp_attack_masquerade", "reverse_light_on_attack_1_masquerade", "reverse_light_on_attack_2_masquerade",
                        "reverse_light_on_attack_3_masquerade", "reverse_light_off_attack_1_masquerade", "reverse_light_off_attack_2_masquerade",
                        "reverse_light_off_attack_3_masquerade"]

print(len(attack_metadata_keys))

13


In [7]:
print(attack_metadata_keys[0])
attack_metadata["correlated_signal_attack_1_masquerade"]

correlated_signal_attack_1_masquerade


{'description': 'start from driving; accelerate; start injecting; car rolls to stop; stop injecting; accelerate',
 'elapsed_sec': 33.101852,
 'injection_data_str': '595945450000FFFF',
 'injection_id': '0x6e0',
 'injection_interval': [9.191851, 30.050109],
 'modified': True,
 'on_dyno': True}

## Experiments with Correlations of Same Signals

In [8]:
df_benign = pd.read_csv("/home/cloud/Projects/CAN/actt/data/ROAD-data-time-series/generated/ambient/ambient_dyno_drive_extended_long_generated.csv", engine="c") # Reading data
display(df_benign)

Unnamed: 0.1,Unnamed: 0,Signal_1_of_ID_263,Signal_2_of_ID_263,Signal_3_of_ID_263,Signal_4_of_ID_263,Signal_5_of_ID_263,Signal_6_of_ID_263,Signal_7_of_ID_263,Signal_8_of_ID_263,Signal_9_of_ID_263,...,Signal_2_of_ID_705,Signal_3_of_ID_705,Signal_4_of_ID_705,Signal_5_of_ID_705,Signal_6_of_ID_705,Signal_7_of_ID_705,Signal_8_of_ID_705,ID,Label,Time
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,263.0,0.0,0.000000
1,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,1621.0,0.0,0.001018
2,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,186.0,0.0,0.001019
3,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,813.0,0.0,0.010104
4,13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,263.0,0.0,0.020093
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1335044,1277001,,,,,,,,,,...,-100.0,-1.0,-110.0,1.0,1.0,143.0,111.0,651.0,0.0,657.872243
1335045,1277234,,,,,,,,,,...,-96.0,-1.0,-112.0,1.0,1.0,131.0,121.0,14.0,0.0,657.872244
1335046,1277464,,,,,,,,,,...,-97.0,-1.0,-113.0,1.0,1.0,122.0,132.0,167.0,0.0,657.872245
1335047,1277646,,,,,,,,,,...,-97.0,-1.0,-110.0,1.0,1.0,109.0,142.0,458.0,0.0,657.873274


In [9]:
df_benign = df_benign.drop("Unnamed: 0", axis=1)
display(df_benign)

Unnamed: 0,Signal_1_of_ID_263,Signal_2_of_ID_263,Signal_3_of_ID_263,Signal_4_of_ID_263,Signal_5_of_ID_263,Signal_6_of_ID_263,Signal_7_of_ID_263,Signal_8_of_ID_263,Signal_9_of_ID_263,Signal_1_of_ID_1621,...,Signal_2_of_ID_705,Signal_3_of_ID_705,Signal_4_of_ID_705,Signal_5_of_ID_705,Signal_6_of_ID_705,Signal_7_of_ID_705,Signal_8_of_ID_705,ID,Label,Time
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,,263.0,0.0,0.000000
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,,1621.0,0.0,0.001018
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,,186.0,0.0,0.001019
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,,813.0,0.0,0.010104
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,...,,,,,,,,263.0,0.0,0.020093
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1335044,,,,,,,,,,,...,-100.0,-1.0,-110.0,1.0,1.0,143.0,111.0,651.0,0.0,657.872243
1335045,,,,,,,,,,,...,-96.0,-1.0,-112.0,1.0,1.0,131.0,121.0,14.0,0.0,657.872244
1335046,,,,,,,,,,,...,-97.0,-1.0,-113.0,1.0,1.0,122.0,132.0,167.0,0.0,657.872245
1335047,,,,,,,,,,,...,-97.0,-1.0,-110.0,1.0,1.0,109.0,142.0,458.0,0.0,657.873274


In [10]:
df_benign.insert(0, "Label", df_benign.pop("Label"))
df_benign.insert(0, "Time", df_benign.pop("Time"))
df_benign.insert(0, "ID", df_benign.pop("ID"))


display(df_benign)

Unnamed: 0,ID,Time,Label,Signal_1_of_ID_263,Signal_2_of_ID_263,Signal_3_of_ID_263,Signal_4_of_ID_263,Signal_5_of_ID_263,Signal_6_of_ID_263,Signal_7_of_ID_263,...,Signal_4_of_ID_961,Signal_5_of_ID_961,Signal_1_of_ID_705,Signal_2_of_ID_705,Signal_3_of_ID_705,Signal_4_of_ID_705,Signal_5_of_ID_705,Signal_6_of_ID_705,Signal_7_of_ID_705,Signal_8_of_ID_705
0,263.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
1,1621.0,0.001018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2,186.0,0.001019,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
3,813.0,0.010104,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
4,263.0,0.020093,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1335044,651.0,657.872243,0.0,,,,,,,,...,,,0.0,-100.0,-1.0,-110.0,1.0,1.0,143.0,111.0
1335045,14.0,657.872244,0.0,,,,,,,,...,,,0.0,-96.0,-1.0,-112.0,1.0,1.0,131.0,121.0
1335046,167.0,657.872245,0.0,,,,,,,,...,,,0.0,-97.0,-1.0,-113.0,1.0,1.0,122.0,132.0
1335047,458.0,657.873274,0.0,,,,,,,,...,,,0.0,-97.0,-1.0,-110.0,1.0,1.0,109.0,142.0


In [11]:
# df_benign.sort_values(by="Time", ascending=True, inplace=True)
# display(df_benign)

## Forward Filling Mechanism

In [12]:
df_benign = df_benign.ffill().copy() # Fill up the missing values with the most recent values
df_benign = df_benign.bfill().dropna() # Fill up the remaining missing values in the first few rows
display(df_benign)

Unnamed: 0,ID,Time,Label,Signal_1_of_ID_263,Signal_2_of_ID_263,Signal_3_of_ID_263,Signal_4_of_ID_263,Signal_5_of_ID_263,Signal_6_of_ID_263,Signal_7_of_ID_263,...,Signal_4_of_ID_961,Signal_5_of_ID_961,Signal_1_of_ID_705,Signal_2_of_ID_705,Signal_3_of_ID_705,Signal_4_of_ID_705,Signal_5_of_ID_705,Signal_6_of_ID_705,Signal_7_of_ID_705,Signal_8_of_ID_705
0,263.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1621.0,0.001018,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,186.0,0.001019,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,813.0,0.010104,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,263.0,0.020093,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1335044,651.0,657.872243,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,250.0,152.0,0.0,-100.0,-1.0,-110.0,1.0,1.0,143.0,111.0
1335045,14.0,657.872244,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,250.0,152.0,0.0,-96.0,-1.0,-112.0,1.0,1.0,131.0,121.0
1335046,167.0,657.872245,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,250.0,152.0,0.0,-97.0,-1.0,-113.0,1.0,1.0,122.0,132.0
1335047,458.0,657.873274,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,250.0,152.0,0.0,-97.0,-1.0,-110.0,1.0,1.0,109.0,142.0


## Partition Time Series Benign

In [13]:
window = df_benign.shape[0]
offset = window
partition_training = process_multivariate_signals(df_benign, window, offset)

In [14]:
print(len(partition_training))
partition_training[0]

1


Unnamed: 0,ID,Time,Signal_1_of_ID_263,Signal_2_of_ID_263,Signal_3_of_ID_263,Signal_4_of_ID_263,Signal_5_of_ID_263,Signal_7_of_ID_263,Signal_8_of_ID_263,Signal_9_of_ID_263,...,Signal_4_of_ID_961,Signal_5_of_ID_961,Signal_1_of_ID_705,Signal_2_of_ID_705,Signal_3_of_ID_705,Signal_4_of_ID_705,Signal_5_of_ID_705,Signal_6_of_ID_705,Signal_7_of_ID_705,Signal_8_of_ID_705
0,0.144220,0.000000,0.0,0.468864,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,1.0,1.000000,1.0,1.000000,0.0,0.0,0.000000,0.000000
1,0.906285,0.000002,0.0,0.468864,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,1.0,1.000000,1.0,1.000000,0.0,0.0,0.000000,0.000000
2,0.101010,0.000002,0.0,0.468864,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,1.0,1.000000,1.0,1.000000,0.0,0.0,0.000000,0.000000
3,0.452862,0.000015,0.0,0.468864,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,1.0,1.000000,1.0,1.000000,0.0,0.0,0.000000,0.000000
4,0.144220,0.000031,0.0,0.468864,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,1.0,1.000000,1.0,1.000000,0.0,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1335044,0.361953,0.999994,0.0,0.468864,0.0,0.0,0.0,0.0,0.0,0.0,...,0.980392,0.596078,0.0,0.159664,0.0,0.303797,1.0,1.0,0.560784,0.435294
1335045,0.004489,0.999994,0.0,0.468864,0.0,0.0,0.0,0.0,0.0,0.0,...,0.980392,0.596078,0.0,0.193277,0.0,0.291139,1.0,1.0,0.513725,0.474510
1335046,0.090348,0.999994,0.0,0.468864,0.0,0.0,0.0,0.0,0.0,0.0,...,0.980392,0.596078,0.0,0.184874,0.0,0.284810,1.0,1.0,0.478431,0.517647
1335047,0.253648,0.999995,0.0,0.468864,0.0,0.0,0.0,0.0,0.0,0.0,...,0.980392,0.596078,0.0,0.184874,0.0,0.303797,1.0,1.0,0.427451,0.556863


## Compute Correlation Matrices

In [15]:
corr_matrices_training = compute_correlation_matrices(partition_training)
print(len(corr_matrices_training))

1


In [16]:
[corr_matrices_training[i].shape for i in range(len(corr_matrices_training))]

[(464, 464)]

In [17]:
corr_sample_training = np.concatenate([upper(corr_matrices_training[i]) for i in range(len(corr_matrices_training))])
len(corr_sample_training)

107416

In [18]:
corr_sample_training

array([-1.36218269e-02,  1.60891702e-03,  6.65948504e-05, ...,
        8.63075510e-01,  8.64827915e-01,  5.24671580e-01])

## Extract Correlation Matrix From Testing

In [12]:
testing_captures

['correlated_masquerade_1_030804_082640',
 'correlated_masquerade_2_031128_011320',
 'correlated_masquerade_3_040322_190000',
 'road_attack_max_speedometer_attack_1_masquerade_060215_054000',
 'road_attack_max_speedometer_attack_2_masquerade_060611_002640',
 'road_attack_max_speedometer_attack_3_masquerade_061004_181320',
 'road_attack_max_engine_coolant_temp_attack_masquerade_041109_063320',
 'road_attack_reverse_light_on_attack_1_masquerade_091205_030000',
 'road_attack_reverse_light_on_attack_2_masquerade_100330_214640',
 'road_attack_reverse_light_on_attack_3_masquerade_100724_153320',
 'road_attack_reverse_light_off_attack_1_masquerade_080110_162000',
 'road_attack_reverse_light_off_attack_2_masquerade_080505_110640',
 'road_attack_reverse_light_off_attack_3_masquerade_080829_045320']

In [19]:
df_attack = process_CAN_file("/home/cloud/Projects/CAN/actt/data/ROAD-data-time-series/generated/attack/correlated_signal_attack_1_masquerade_generated.csv")
display(df_attack)

Unnamed: 0,ID,Time,Label,Signal_1_of_ID_852,Signal_2_of_ID_852,Signal_3_of_ID_852,Signal_4_of_ID_852,Signal_1_of_ID_1505,Signal_2_of_ID_1505,Signal_3_of_ID_1505,...,Signal_2_of_ID_1621,Signal_3_of_ID_1621,Signal_4_of_ID_1621,Signal_5_of_ID_1621,Signal_6_of_ID_1621,Signal_7_of_ID_1621,Signal_8_of_ID_1621,Signal_9_of_ID_1621,Signal_1_of_ID_1649,Signal_2_of_ID_1649
0,852.0,0.000000,0.0,32808.0,0.0,78.0,9.0,510.0,2.0,3.0,...,-1.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0
1,1505.0,0.001020,0.0,32808.0,0.0,76.0,11.0,510.0,2.0,3.0,...,-1.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0
2,651.0,0.001021,0.0,32808.0,0.0,76.0,11.0,510.0,2.0,3.0,...,-1.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0
3,167.0,0.001023,0.0,32808.0,0.0,75.0,12.0,510.0,2.0,3.0,...,-1.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0
4,208.0,0.001024,0.0,32808.0,0.0,75.0,12.0,510.0,2.0,3.0,...,-1.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74147,51.0,33.097961,0.0,32791.0,0.0,97.0,7.0,511.0,1.0,3.0,...,-1.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0
74148,167.0,33.097963,0.0,32791.0,0.0,97.0,7.0,511.0,1.0,3.0,...,-1.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0
74149,61.0,33.097964,0.0,32791.0,0.0,97.0,7.0,511.0,1.0,3.0,...,-1.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0
74150,1413.0,33.099802,0.0,32791.0,0.0,97.0,7.0,511.0,1.0,3.0,...,-1.0,0.0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0


In [20]:
df_attack["Time"].to_numpy()[-1]

33.10185205936432

In [21]:
window = 500
offset = window
partition_testing = process_multivariate_signals(df_attack, window, offset)

In [22]:
print(len(partition_testing))
partition_testing[-1]

149


Unnamed: 0,ID,Time,Label,Signal_1_of_ID_852,Signal_3_of_ID_852,Signal_4_of_ID_852,Signal_1_of_ID_1505,Signal_2_of_ID_1505,Signal_6_of_ID_1505,Signal_4_of_ID_167,...,Signal_3_of_ID_953,Signal_7_of_ID_778,Signal_1_of_ID_1331,Signal_5_of_ID_1751,Signal_5_of_ID_631,Signal_5_of_ID_930,Signal_1_of_ID_1621,Signal_6_of_ID_1621,Signal_1_of_ID_1649,Signal_2_of_ID_1649
74000,0.375421,0.997810,0.0,0.273684,0.694444,0.428571,1.0,0.0,0.863014,0.666667,...,1.0,1.0,0.875,1.0,0.000000,0.0,0.559322,0.0,1.0,0.0
74001,0.004489,0.997878,0.0,0.273684,0.694444,0.428571,1.0,0.0,0.863014,0.666667,...,1.0,1.0,0.875,1.0,0.000000,0.0,0.559322,0.0,1.0,0.0
74002,0.392256,0.997909,0.0,0.273684,0.694444,0.428571,1.0,0.0,0.863014,0.666667,...,1.0,1.0,0.875,1.0,0.000000,0.0,0.559322,0.0,1.0,0.0
74003,0.144220,0.997939,0.0,0.273684,0.694444,0.428571,1.0,0.0,0.863014,0.666667,...,1.0,1.0,0.875,1.0,0.000000,0.0,0.559322,0.0,1.0,0.0
74004,0.984287,0.997939,0.0,0.273684,0.694444,0.428571,1.0,0.0,0.863014,0.666667,...,1.0,1.0,0.875,1.0,0.000000,0.0,0.559322,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74147,0.025253,0.999882,0.0,0.273684,0.694444,0.428571,1.0,0.0,0.863014,0.666667,...,1.0,1.0,0.875,1.0,0.428571,1.0,0.050847,1.0,1.0,0.0
74148,0.090348,0.999883,0.0,0.273684,0.694444,0.428571,1.0,0.0,0.863014,0.666667,...,1.0,1.0,0.875,1.0,0.428571,1.0,0.067797,1.0,1.0,0.0
74149,0.030864,0.999883,0.0,0.273684,0.694444,0.428571,1.0,0.0,0.863014,0.666667,...,1.0,1.0,0.875,1.0,0.428571,1.0,0.084746,1.0,1.0,0.0
74150,0.789562,0.999938,0.0,0.273684,0.694444,0.428571,1.0,0.0,0.863014,0.666667,...,1.0,1.0,0.875,1.0,0.428571,1.0,0.084746,1.0,1.0,0.0


## Compute Correlation Matrices Testing

In [24]:
corr_matrices_testing = compute_correlation_matrices(partition_testing)
len(corr_matrices_testing)

149

In [25]:
display(attack_metadata_keys[0])
attack_metadata[attack_metadata_keys[0]]

'correlated_signal_attack_1_masquerade'

{'description': 'start from driving; accelerate; start injecting; car rolls to stop; stop injecting; accelerate',
 'elapsed_sec': 33.101852,
 'injection_data_str': '595945450000FFFF',
 'injection_id': '0x6e0',
 'injection_interval': [9.191851, 30.050109],
 'modified': True,
 'on_dyno': True}

## Hypothesis Testing (Single Attack)

In [21]:
signals_training = corr_matrices_training[0].columns.values

window = 10
offset = 1

print("Processing: ", attack_metadata_keys[0])
signal_multivar_ts, timepts, aid_signal_tups = from_capture_to_time_series(testing_captures[0], ground_truth_dbc_path)

partition_testing = process_multivariate_signals(signal_multivar_ts, aid_signal_tups, window, offset) # Partition time series
print("intervals: ", len(partition_testing))

# display(partition_testing[0])
# display(partition_testing[1])
# display(partition_testing[-1])

corr_matrices_testing = compute_correlation_matrices(partition_testing) # Compute Correlations

# total_length = int(np.ceil(timepts[-1])) 
total_length = timepts[-1]
print("total length (s): ", total_length) 
intervals_testing = create_time_intervals(total_length, window/10, offset/10)
print("attack interval (s): ", attack_metadata[attack_metadata_keys[0]]["injection_interval"][0], attack_metadata[attack_metadata_keys[0]]["injection_interval"][1])

tp, fp, fn, tn = 0, 0, 0, 0

for index_interval in tqdm(range(len(intervals_testing))):
    
    # Compute signal names intersection
    signals_testing = corr_matrices_testing[index_interval].columns.values
    signal_names_intersection = list(set(signals_training).intersection(set(signals_testing)))
    
    # Filter correlation matrices by common names
    corr_matrix_1 = corr_matrices_training[0].loc[signal_names_intersection, signal_names_intersection]
    corr_matrix_2 = corr_matrices_testing[index_interval].loc[signal_names_intersection, signal_names_intersection]
    
    # Do hypothesis test
    spearman_test = spearmanr(upper(corr_matrix_1), upper(corr_matrix_2))
    # print((i, corr_matrix_1.shape[0], spearman_test[0], spearman_test[1]))
    
    if spearman_test[1] > 0.05: # positive detection
        if ((intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[0]]["injection_interval"][0] and intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[0]]["injection_interval"][0])
               or (intervals_testing[index_interval][0] > attack_metadata[attack_metadata_keys[0]]["injection_interval"][0] and intervals_testing[index_interval][1] < attack_metadata[attack_metadata_keys[0]]["injection_interval"][1])
                   or (intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[0]]["injection_interval"][1] and intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[0]]["injection_interval"][1])):
            tp += 1
        else:
            fp += 1
    else: # negative detection
        if ((intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[0]]["injection_interval"][0] and intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[0]]["injection_interval"][0])
               or (intervals_testing[index_interval][0] > attack_metadata[attack_metadata_keys[0]]["injection_interval"][0] and intervals_testing[index_interval][1] < attack_metadata[attack_metadata_keys[0]]["injection_interval"][1])
                   or (intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[0]]["injection_interval"][1] and intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[0]]["injection_interval"][1])):
            fn += 1
        else:
            tn += 1
            
# precision
if tp + fp != 0:            
    precision = tp/(tp + fp)
else:
    precision = np.nan

# recall
if tp + fn != 0:
    recall = tp/(tp + fn)
else:
    recall = np.nan

# f1
if precision + recall != 0:
    f1 = 2*((precision*recall)/(precision + recall))

else:
    f1 = np.nan

# fpr
if fp + tn != 0:
    fpr = fp/(fp + tn)
else:
    fpr = np.nan

# fnr
if fn + tp != 0:
    fnr = fn/(fn + tp)
else:
    fnr = np.nan

# mcc
if (tp+fp == 0) or (tp+fn == 0) or (tn+fp == 0) or (tn+fn == 0):
    mcc = (tp*tn) - (fp*fn)
else:
    mcc = (tp*tn - fp*fn)/(math.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)))

print(f"tp: {tp}, tn: {tn}, fp: {fp}, fn: {fn}")
print(f"precision: {precision:.3f}, recall: {recall:.3f}, f1: {f1:.3f}, fpr: {fpr:.3f}, fnr: {fnr:.3f}, mcc: {mcc:.3f}")
print(f"positive_intervals: {tp+fn:.3f}, negative_intervals: {tn+fp:.3f}\n")

Processing:  correlated_signal_attack_1_masquerade
intervals:  331
total length (s):  33.9
attack interval (s):  9.191851 30.050109


100%|██████████| 330/330 [00:00<00:00, 344.91it/s]

tp: 13, tn: 107, fp: 4, fn: 206
precision: 0.765, recall: 0.059, f1: 0.110, fpr: 0.036, fnr: 0.941, mcc: 0.050
positive_intervals: 219.000, negative_intervals: 111.000






## Hypothesis Testing (All Attacks)

In [24]:
window = 10
offset = 1 
signals_training = corr_matrices_training[0].columns.values

for index_attack in range(len(attack_metadata_keys)):

    print("Processing: ", attack_metadata_keys[index_attack])
    signal_multivar_ts, timepts, aid_signal_tups = from_capture_to_time_series(testing_captures[index_attack], ground_truth_dbc_path)
    
    partition_testing = process_multivariate_signals(signal_multivar_ts, aid_signal_tups, window, offset) # Partition time series
    print("intervals: ", len(partition_testing))

    # display(partition_testing[0])
    # display(partition_testing[1])
    # display(partition_testing[-1])
    
    corr_matrices_testing = compute_correlation_matrices(partition_testing) # Compute Correlations
    
    # total_length = int(np.ceil(timepts[-1])) 
    total_length = timepts[-1]
    print("total length (s): ", total_length) 
    intervals_testing = create_time_intervals(total_length, window/10, offset/10)
    print("attack interval (s): ", attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0], attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1])
    
    tp, fp, fn, tn = 0, 0, 0, 0

    for index_interval in range(len(intervals_testing)):

        # Compute signal names intersection
        signals_testing = corr_matrices_testing[index_interval].columns.values
        signal_names_intersection = list(set(signals_training).intersection(set(signals_testing)))

        # Filter correlation matrices by common names
        corr_matrix_1 = corr_matrices_training[0].loc[signal_names_intersection, signal_names_intersection]
        corr_matrix_2 = corr_matrices_testing[index_interval].loc[signal_names_intersection, signal_names_intersection]

        # Do hypothesis test
        spearman_test = spearmanr(upper(corr_matrix_1), upper(corr_matrix_2))
        # print((i, corr_matrix_1.shape[0], spearman_test[0], spearman_test[1]))

        if spearman_test[1] > 0.05: # positive detection
            if ((intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0] and intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0])
                   or (intervals_testing[index_interval][0] > attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0] and intervals_testing[index_interval][1] < attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1])
                       or (intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1] and intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1])):
                tp += 1
            else:
                fp += 1
        else: # negative detection
            if ((intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0] and intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0])
                   or (intervals_testing[index_interval][0] > attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][0] and intervals_testing[index_interval][1] < attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1])
                       or (intervals_testing[index_interval][0] < attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1] and intervals_testing[index_interval][1] > attack_metadata[attack_metadata_keys[index_attack]]["injection_interval"][1])):
                fn += 1
            else:
                tn += 1
    # precision
    if tp + fp != 0:            
        precision = tp/(tp + fp)
    else:
        precision = np.nan
        
    # recall
    if tp + fn != 0:
        recall = tp/(tp + fn)
    else:
        recall = np.nan
        
    # f1
    if precision + recall != 0:
        f1 = 2*((precision*recall)/(precision + recall))
        
    else:
        f1 = np.nan
        
    # fpr
    if fp + tn != 0:
        fpr = fp/(fp + tn)
    else:
        fpr = np.nan

    # fnr
    if fn + tp != 0:
        fnr = fn/(fn + tp)
    else:
        fnr = np.nan

    # mcc
    if (tp+fp == 0) or (tp+fn == 0) or (tn+fp == 0) or (tn+fn == 0):
        mcc = (tp*tn) - (fp*fn)
    else:
        mcc = (tp*tn - fp*fn)/(math.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)))

    print(f"tp: {tp}, tn: {tn}, fp: {fp}, fn: {fn}")
    print(f"precision: {precision:.3f}, recall: {recall:.3f}, f1: {f1:.3f}, fpr: {fpr:.3f}, fnr: {fnr:.3f}, mcc: {mcc:.3f}")
    print(f"positive_intervals: {tp+fn:.3f}, negative_intervals: {tn+fp:.3f}\n")

Processing:  correlated_signal_attack_1_masquerade
intervals:  331
total length (s):  33.9
attack interval (s):  9.191851 30.050109
tp: 13, tn: 107, fp: 4, fn: 206
precision: 0.765, recall: 0.059, f1: 0.110, fpr: 0.036, fnr: 0.941, mcc: 0.050
positive_intervals: 219.000, negative_intervals: 111.000

Processing:  correlated_signal_attack_2_masquerade
intervals:  281
total length (s):  28.9
attack interval (s):  6.830477 28.225908
tp: 1, tn: 49, fp: 10, fn: 220
precision: 0.091, recall: 0.005, f1: 0.009, fpr: 0.169, fnr: 0.995, mcc: -0.346
positive_intervals: 221.000, negative_intervals: 59.000

Processing:  correlated_signal_attack_3_masquerade
intervals:  161
total length (s):  16.9
attack interval (s):  4.318482 16.95706
tp: 32, tn: 29, fp: 5, fn: 95
precision: 0.865, recall: 0.252, f1: 0.390, fpr: 0.147, fnr: 0.748, mcc: 0.102
positive_intervals: 127.000, negative_intervals: 34.000

Processing:  max_speedometer_attack_1_masquerade
intervals:  881
total length (s):  88.9
attack interv