In [1]:
import gc
from pathlib import Path
import datetime
import struct
import glob

import numpy as np
import pandas as pd
from scipy.stats import norm, rv_histogram
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from sklearn import metrics
from pympler import asizeof

pd.set_option('display.float_format', str)

######### SELECT THE DATASET #########
# DATASET = 'Syncan'
DATASET = 'X-CANIDS'
DATASET_DIR = f'../../Dataset/{DATASET}'

In [3]:
def str_to_list(data_str: str) -> list:
    data_list_str = data_str.split()
    data_list = [int(x) for x in data_list_str]
    if len(data_list) < 8:  # fill with dummy values (0) to 8 bytes
        data_list += [0] * (8 - len(data_list))
    return data_list

def bytes_to_list(data_bytes: bytes) -> list:
    l = len(data_bytes)
    decimal_values = struct.unpack(f'{l}B', data_bytes)
    return list(decimal_values)

def load_arrange_data(file_path, dataset, print_option=True):
    if dataset == 'Syncan':
        df = pd.read_csv(file_path, delimiter=',')
        df['Time'] = round(df['Time'] / 1000, 7)     # milliseconds to seconds
        df.rename(columns={'Label': 'Session'}, inplace=True)
        df['SessionCat'] = 'Normal'
        df.loc[df['Session'] == 1, 'SessionCat'] = 'Attack'
        if print_option:
            print(f'# rows: {df.shape[0]:,}')
            print(df['Session'].value_counts())
        return df
    elif dataset == 'X-CANIDS':
        df = pd.read_parquet(file_path)
        # make SessionCat
        df['Label'] = 'Normal'
        splits = Path(file_path).stem.split('-')
        attack_map_dict = {'fabr': 'Fabrication', 'fuzz': 'Fuzzing', 'masq': 'Masquerade', 'repl': 'Replay', 'susp': 'Suspension'}
        attack = None
        if len(splits) > 1:
            attack, aidh = attack_map_dict[splits[1]], splits[2]
            df.loc[df['label'] == 1, 'Label'] = attack
        # Make Session labels (Note: Attacks in X-CANIDS Dataset were performed without a pause)
        df['Session'] = 0
        df['SessionCat'] = 'Normal'
        msgs = df.loc[df['label'] == 1]
        t_start, t_end = 0, 0
        if msgs.shape[0] > 0:  # if the dataset includes attack messages
            t_start, t_end = msgs.index.min(), msgs.index.max()
            df.loc[t_start:t_end, 'Session'] = 1
            df.loc[t_start:t_end, 'SessionCat'] = attack
            assert df.query('label == 1 and Session == 0').shape[0] == 0   
        if attack == 'Suspension':  # it doens't have label=1 rows, so apply a rough approach
            assert len(df.loc[(480 < df.index.total_seconds()) & (df.index.total_seconds() <= 1440) & (df['label'] == 1)]) == 0
            df.loc[(480 < df.index.total_seconds()) & (df.index.total_seconds() <= 1440), 'Session'] = 1
            df.loc[(480 < df.index.total_seconds()) & (df.index.total_seconds() <= 1440), 'SessionCat'] = attack
        # Format columns
        df.reset_index(inplace=True)
        df['Time'] = df['timestamp'].dt.total_seconds()
        df['Data'] = df['data'].apply(bytes_to_list)
        df.rename(columns={'arbitration_id': 'ID', 'dlc': 'DLC'}, inplace=True)
        if print_option:
            print(f'# rows: {df.shape[0]:,}')
            print(pd.concat([df['Label'].value_counts().rename('Label'), df['SessionCat'].value_counts().rename('SessionCat')], axis=1))
        return df[['Session', 'SessionCat', 'Label', 'Time', 'ID', 'DLC', 'Data']]

# Define a training set and validation set

In [3]:
if DATASET == 'Syncan':
    data_files = {
        'train': [
            f'{DATASET_DIR}/train_train.csv'
        ],
        'valid': [
            f'{DATASET_DIR}/train_valid.csv'
        ],
        'test': [
            f'{DATASET_DIR}/test_normal.csv',
            f'{DATASET_DIR}/test_flooding.csv',
            f'{DATASET_DIR}/test_plateau.csv',
            f'{DATASET_DIR}/test_continuous.csv',
            f'{DATASET_DIR}/test_playback.csv',
            f'{DATASET_DIR}/test_suppress.csv'
        ]
    }
elif DATASET == 'X-CANIDS':
    data_files = {
        'train': glob.glob(f'{DATASET_DIR}/raw/dump[1-4].parquet'),
        'valid': [f'{DATASET_DIR}/raw/dump5.parquet'],
        'test': glob.glob(f'{DATASET_DIR}/raw/dump6-*.parquet')
    }

# Statistical Feature Extraction

In [4]:
def time_cut(data: pd.DataFrame):
    time_cut = data.loc[data.groupby('ID').cumcount() == 1, 'Time'].max()  # the timestamp that every ID has been occurred at least twice
    return time_cut

def get_counts(data: pd.DataFrame, cut: bool, ids=[], sessioncat=False):
    if sessioncat:
        base_cols = ['Session', 'SessionCat', 'Time', 'ID']
    else:
        base_cols = ['Session', 'Time', 'ID']
    data = data[base_cols].copy()
    n_rows = data.shape[0]
    data['Timedelta'] = pd.to_timedelta(data['Time'], unit='s')
    data_cnt = data[['ID', 'Timedelta']].copy()
    data_cnt['ID'] = data_cnt['ID'].astype('category')
    data_cnt = pd.get_dummies(data_cnt, prefix='', prefix_sep='').rolling('1s', on='Timedelta').sum()
    id_cols = list(data_cnt.columns[1:])
    if DATASET == 'X-CANIDS':
        id_cols = [int(id_str) for id_str in id_cols]
        data_cnt.columns = list(data_cnt.columns[:1]) + id_cols
    data_cnt[id_cols] = data_cnt[id_cols].astype(int)
    if ids:
        data_cnt = data_cnt[list(data_cnt.columns[:1]) + ids]
    base_cols.remove('ID')
    data_cnt = pd.concat([data[base_cols + ['Timedelta']], data_cnt], axis=1).drop(columns=['Timedelta'])
    if cut:
        t_cut = time_cut(data)
        data_cnt = data_cnt.loc[data_cnt['Time'] >= t_cut]
        print(f'Data has been truncated to remove NaN ({n_rows - data_cnt.shape[0]:,} rows removed)')
    assert data_cnt.isna().sum().sum() == 0
    return data_cnt

def get_time_intervals(data: pd.DataFrame, ffill: bool, cut: bool):
    data = data[['Session', 'Time', 'ID']].copy()
    n_rows = data.shape[0]
    data['Interval'] = data.groupby('ID')['Time'].diff()
    data_itv = pd.concat([data[['Session', 'Time']], data.pivot(columns='ID', values='Interval')], axis=1)
    if ffill:
        data_itv.ffill(inplace=True)
    if cut:
        t_cut = time_cut(data)
        data_itv = data_itv.loc[data_itv['Time'] >= t_cut]
        print(f'Data has been truncated to remove NaN ({n_rows - data_itv.shape[0]:,} rows removed)')
    if ffill and cut:
        assert data_itv.isna().sum().sum() == 0
    return data_itv

In [5]:
ids_union = set()
ids_inter = set()
cnt_dfs = list()
itv_dfs = list()

total_rows = 0
for data_file in data_files['train']:
    print(f'{data_file}')
    df_data = load_arrange_data(data_file, dataset=DATASET)
    total_rows += df_data.shape[0]
    unique_ids = set(df_data['ID'].unique())
    print(f'# unique CAN IDs = {len(unique_ids)}')
    ids_union |= unique_ids
    if not ids_inter:
        ids_inter = unique_ids.copy()
    else:
        ids_inter &= unique_ids
    df_data_cnt = get_counts(df_data, cut=True)
    df_data_itv = get_time_intervals(df_data, ffill=False, cut=True)
    cnt_dfs.append(df_data_cnt)
    itv_dfs.append(df_data_itv)

df_cnt_concat = pd.concat(cnt_dfs, axis=0, ignore_index=True)
df_itv_concat = pd.concat(itv_dfs, axis=0, ignore_index=True)
print('\nAll ID-specific time intervals in the train dataset are concatenated.')
print(df_cnt_concat.shape)
print(df_itv_concat.shape)

../../Dataset/X-CANIDS/raw/dump3.parquet
# rows: 3,233,753
          Label  SessionCat
Normal  3233753     3233753
# unique CAN IDs = 62
Data has been truncated to remove NaN (5,960 rows removed)
Data has been truncated to remove NaN (5,960 rows removed)
../../Dataset/X-CANIDS/raw/dump4.parquet
# rows: 4,761,327
          Label  SessionCat
Normal  4761327     4761327
# unique CAN IDs = 64
Data has been truncated to remove NaN (7,268 rows removed)
Data has been truncated to remove NaN (7,268 rows removed)
../../Dataset/X-CANIDS/raw/dump2.parquet
# rows: 4,134,502
          Label  SessionCat
Normal  4134502     4134502
# unique CAN IDs = 64
Data has been truncated to remove NaN (58,764 rows removed)
Data has been truncated to remove NaN (58,764 rows removed)
../../Dataset/X-CANIDS/raw/dump1.parquet
# rows: 3,123,785
          Label  SessionCat
Normal  3123785     3123785
# unique CAN IDs = 62
Data has been truncated to remove NaN (6,805 rows removed)
Data has been truncated to remove NaN

In [6]:
print(f'\nCAN IDs in every dataset ({len(ids_inter)}): {sorted(list(ids_inter))}')
print(f'\nCAN IDs only in a specific dataset ({len(ids_union - ids_inter)}): {sorted(list(ids_union - ids_inter))}')
target_ids = sorted(list(ids_inter))


CAN IDs in every dataset (62): [66, 67, 68, 127, 128, 129, 273, 274, 275, 339, 354, 356, 399, 512, 544, 593, 608, 688, 790, 809, 897, 899, 902, 903, 1040, 1078, 1151, 1168, 1170, 1265, 1280, 1282, 1287, 1292, 1312, 1314, 1322, 1331, 1332, 1333, 1345, 1348, 1349, 1351, 1353, 1356, 1363, 1365, 1366, 1367, 1369, 1407, 1415, 1419, 1427, 1440, 1456, 1460, 1470, 1472, 1491, 1530]

CAN IDs only in a specific dataset (2): [2016, 2024]


In [7]:
# The mean and standard deviation of time intervals (ID-specific)
ignore_cols = ['Session', 'Time']
df_cnt_gauss = pd.concat(
    [df_cnt_concat.drop(columns=ignore_cols).mean().rename('mean'), 
     df_cnt_concat.drop(columns=ignore_cols).std().rename('std')], 
    axis=1
)
df_itv_gauss = pd.concat(
    [df_itv_concat.drop(columns=ignore_cols).mean().rename('mean'), 
     df_itv_concat.drop(columns=ignore_cols).std().rename('std')], 
    axis=1
)
display(df_cnt_gauss)
display(df_itv_gauss)

Unnamed: 0,mean,std
1040,5.005908767101802,0.07777456938556762
1078,20.01950486900123,0.14149859670661089
1151,50.01968299595969,0.16222209139913107
1168,20.008175651764763,0.10171775778745148
1170,20.008075550081486,0.10489129536022514
...,...,...
899,50.14235480807693,0.34947101707558975
902,50.020279124878,0.16837370854916398
903,50.02018291127854,0.16734612501747362
2016,0.0007424859257806266,0.027238478747113392


Unnamed: 0,mean,std
66,0.9954650628489626,0.044292683556723965
67,0.9976073837876621,0.0002634018710625745
68,0.9490001651426253,0.1966017799689266
127,0.9995112136592371,0.00012761940997246292
128,0.009999059567974351,0.0003118041254577751
...,...,...
1472,0.999510740474479,0.0007597910838317145
1491,0.9996947733218331,0.0008563397188535942
1530,1.0009190113701798,0.0010015640158069577
2016,11.029508,9.050514872829723


In [8]:
del df_cnt_concat, df_itv_concat
gc.collect()

23

## Normalized Likelihood Transformation

In [9]:
# if gap <= mean, use interval values, and if gap > mean, use gap values
def interval_with_gap_condition(data: pd.DataFrame, ids: list): # gauss: pd.DataFrame):
    data = data[['Session', 'SessionCat', 'Time', 'ID']].copy()
    n_rows = data.shape[0]
    t_cut = time_cut(data)
    data['Interval'] = data.groupby('ID')['Time'].diff()
    data = pd.concat([data[['Session', 'SessionCat', 'Time']], data.pivot(columns='ID', values=['Time', 'Interval'])], axis=1)
    ft_dict = {
        'Session': data['Session'].to_list(),
        'SessionCat': data['SessionCat'].to_list(),
        'Time': data['Time'].to_list()
    }
    for id in ids:
        data[('Gap', id)] = data['Time'] - data[('Time', id)].ffill()
        data[('Interval', id)] = data[('Interval', id)].ffill()
        data[('Gap', id)] = data[[('Interval', id), ('Gap', id)]].max(axis=1)
        ft_dict[id] = data[('Gap', id)]
    ft_df = pd.DataFrame.from_dict(ft_dict)
    ft_df = ft_df.loc[ft_df['Time'] >= t_cut]
    return ft_df

In [10]:
# Get normal distribution classes
norm_dist = {'cnt': {}, 'itv': {}}
for id in target_ids:
    cnt_mean = df_cnt_gauss.loc[id, 'mean']
    cnt_std = df_cnt_gauss.loc[id, 'std']
    itv_mean = df_itv_gauss.loc[id, 'mean']
    itv_std = df_itv_gauss.loc[id, 'std']
    if cnt_std == 0:    # Some IDs have constant counts at all times, which means std = 0
        print('cnt', id, cnt_mean, cnt_std)
        norm_dist['cnt'][id] = norm(cnt_mean, cnt_mean * 0.001)   # Set std to a sufficiently small value
    else:
        norm_dist['cnt'][id] = norm(cnt_mean, cnt_std)
    if itv_std == 0:    # Some IDs have constant counts at all times, which means std = 0
        print('itv', id, itv_mean, itv_std)
        norm_dist['itv'][id] = norm(itv_mean, itv_mean * 0.001)   # Set std to a sufficiently small value
    else:
        norm_dist['itv'][id] = norm(itv_mean, itv_std)

In [11]:
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

# Get the maximum value of likelihoods (of the training set)
df_cnt_likelihoods, df_itv_likelihoods = list(), list()
for data_file in tqdm(data_files['train']):
    df = load_arrange_data(data_file, dataset=DATASET, print_option=False)
    df_itv = interval_with_gap_condition(df, ids=target_ids) # gauss=df_gauss)
    df_cnt = get_counts(df, cut=True)
    cnt_likelihood, itv_likelihood = dict(), dict()
    for id in target_ids:
        cnt_likelihood[id] = norm_dist['cnt'][id].logpdf(df_cnt[id])
        itv_likelihood[id] = norm_dist['itv'][id].logpdf(df_itv[id])
    df_cnt_likelihoods.append(pd.DataFrame.from_dict(cnt_likelihood))
    df_itv_likelihoods.append(pd.DataFrame.from_dict(itv_likelihood))

max_range = {'cnt': [0, 0], 'itv': [0, 0]}  # [min, max]
for df in df_cnt_likelihoods:    
    current_max = df.sum(axis=1).max()
    current_min = df.sum(axis=1).min()
    if max_range['cnt'][0] > current_min:
        max_range['cnt'][0] = current_min
    if max_range['cnt'][1] < current_max:
        max_range['cnt'][1] = current_max
for df in df_itv_likelihoods: 
    current_max = df.sum(axis=1).max()
    current_min = df.sum(axis=1).min()
    if max_range['itv'][0] > current_min:
        max_range['itv'][0] = current_min
    if max_range['itv'][1] < current_max:
        max_range['itv'][1] = current_max
print(f'Max range (row-wise): {max_range}')

  0%|          | 0/4 [00:00<?, ?it/s]

Data has been truncated to remove NaN (5,960 rows removed)
Data has been truncated to remove NaN (7,268 rows removed)
Data has been truncated to remove NaN (58,764 rows removed)
Data has been truncated to remove NaN (6,805 rows removed)
Max range (row-wise): {'cnt': [-4286.379269796721, 72.16759954834816], 'itv': [-221136.8489468792, 392.66707133555195]}


In [12]:
del df_cnt_likelihoods, df_itv_likelihoods
gc.collect()

47

## Update Session Labels and Save

In [13]:
# Update session label
# - if an attack message is used to extract feature, label as attack
# - which means stop labeling as attacks if every ID receives two normal messages
def update_label(data: pd.DataFrame, dataset: str, ids: list):
    if dataset == 'X-CANIDS':
        data['LabelBin'] = 0
        data.loc[data['Label'] != 'Normal', 'LabelBin'] = 1
        data['Lag2Label'] = data.groupby('ID')['LabelBin'].shift(2)
    elif dataset == 'Syncan':
        data['Lag2Label'] = data.groupby('ID')['Session'].shift(2)
    data['Lag2Label'] = data['Lag2Label'].fillna(0)
    attack_points = data.loc[data['Session'] != data['Session'].shift(1).fillna(0), 'Time'].to_list()
    attack_names = data.loc[data['Session'] != data['Session'].shift(-1).fillna(0), 'SessionCat'].to_list()
    assert len(attack_points) % 2 == 0, attack_points
    attack_ranges = []
    for i in range(len(attack_points) // 2):
        attack_ranges.append(attack_points[2*i:2*(i+1)])
    for i in range(len(attack_ranges)):
        end = attack_ranges[i][1]
        attack = attack_names[2 * i + 1]
        if i < len(attack_ranges) - 1:
            next_start = attack_ranges[i+1][0]
        else:   # in case of the last attack
            next_start = end + 10
        query = (end < data['Time']) & (data['Time'] < next_start) & (data['Lag2Label'] == 1) & (data['ID'].isin(ids))
        effect_end = data.loc[query, 'Time'].max()
        if not np.isnan(effect_end):
            print(f'({i+1}) {attack} affects until {effect_end}s, {effect_end - end:.2f}s passed after the attack.')
            data.loc[(data['Time'] >= end) & (data['Time'] < effect_end), 'Session'] = 1
            data.loc[(data['Time'] >= end) & (data['Time'] < effect_end), 'SessionCat'] = attack
    return data

def update_label_window(data: pd.DataFrame, dataset: str, ids: list, window=1):
    attack_points = data.loc[data['Session'] != data['Session'].shift(1).fillna(0), 'Time'].to_list()
    attack_names = data.loc[data['Session'] != data['Session'].shift(-1).fillna(0), 'SessionCat'].to_list()
    assert len(attack_points) % 2 == 0, attack_points
    attack_ranges = []
    for i in range(len(attack_points) // 2):
        attack_ranges.append(attack_points[2*i:2*(i+1)])
    for i in range(len(attack_ranges)):
        end = attack_ranges[i][1]
        attack = attack_names[2 * i + 1]
        print(f'({i+1}) {attack} affects until {end + window} s, {window} s passed after the attack.')
        data.loc[(data['Time'] >= end) & (data['Time'] < end + window), 'Session'] = 1
        data.loc[(data['Time'] >= end) & (data['Time'] < end + window), 'SessionCat'] = attack
    return data

def z_extraction(data_cnt: pd.DataFrame, data_itv: pd.DataFrame, ids: list, norm_dict: dict, likelihood_range: dict):
    assert len(data_cnt) == len(data_itv)
    l_cnt_dict, l_itv_dict = dict(), dict()
    for id in ids:
        l_cnt_dict[id] = norm_dict['cnt'][id].logpdf(data_cnt[id])
        l_itv_dict[id] = norm_dict['itv'][id].logpdf(data_itv[id])
    l_cnt_df = pd.DataFrame.from_dict(l_cnt_dict)
    l_itv_df = pd.DataFrame.from_dict(l_itv_dict)
    l_df = data_cnt[['Session', 'SessionCat', 'Time']].copy()
    l_df['z_cnt'] = ((l_cnt_df.sum(axis=1) - likelihood_range['cnt'][0]) / (likelihood_range['cnt'][1] - likelihood_range['cnt'][0])).values
    l_df['z_itv'] = ((l_itv_df.sum(axis=1) - likelihood_range['itv'][0]) / (likelihood_range['itv'][1] - likelihood_range['itv'][0])).values
    return l_df

def get_save_path(original_path: str, dataset: str):
    if dataset == 'X-CANIDS':
        new_path = f'{Path(original_path).parents[1]}/z/{Path(original_path).stem}.parquet'
    elif dataset == 'Syncan':
        new_path = f'{Path(original_path).parent}/z_{Path(original_path).stem.split("_")[-1]}.parquet'
    return new_path

In [14]:
import time

for step, file_list in data_files.items():
    for data_file in tqdm(file_list, desc=f'Processing {step} datasets'):
        # Update session labels and extract z features
        df_cnt = load_arrange_data(data_file, dataset=DATASET, print_option=False)
        print(df_cnt.Session.value_counts())
        df_itv = df_cnt.copy()

        start_time = time.process_time()
        
        df_itv = interval_with_gap_condition(df_itv, ids=target_ids) # gauss=df_gauss)
        df_cnt = get_counts(df_cnt, ids=target_ids, cut=True, sessioncat=True)
        df_z = z_extraction(df_cnt, df_itv, ids=target_ids, norm_dict=norm_dist, likelihood_range=max_range)

        end_time = time.process_time()
        
        assert df_z.query('Session == 1 and SessionCat == "Normal"').shape[0] == 0
        assert df_z.query('Session == 0 and SessionCat != "Normal"').shape[0] == 0

        # Save as a parquet file
        str_cols = [str(c) for c in df_z.columns]
        df_z.columns = str_cols
        save_path = get_save_path(data_file, dataset=DATASET)
        df_z.to_parquet(save_path)
        print(df_z.Session.value_counts())
        print(f'{save_path} is saved.')
        
# Measure inference speed
process_time = end_time - start_time
inference_speed = len(df_itv) / process_time
print("-----------------------------------------")
print(f'CPU execution time: {process_time:,} seconds')
print(f'Inference speed: {inference_speed:.2f} messages per second')
print("-----------------------------------------")

Processing test datasets:   0%|          | 0/1 [00:00<?, ?it/s]

Session
0    4279909
Name: count, dtype: int64
Data has been truncated to remove NaN (50,475 rows removed)
Session
0    4229434
Name: count, dtype: int64
../../Dataset/X-CANIDS/z/dump6.parquet is saved.


In [15]:
del df_cnt, df_itv, df_z
gc.collect()

50

### Move(copy) z data files to the result folder

In [4]:
######### SELECT THE DATASET #########
# DATASET = 'Syncan'
DATASET = 'X-CANIDS'
DATASET_DIR = f'../../Dataset/{DATASET}'

if DATASET == 'X-CANIDS':
    feature_files = {
        'train': glob.glob(f'{DATASET_DIR}/z/dump[1-4].parquet'),
        'valid': [f'{DATASET_DIR}/z/dump5.parquet'],
        'test': glob.glob(f'{DATASET_DIR}/z/dump6-*.parquet'),
    }
elif DATASET == 'Syncan':
    feature_files = {
        'train': [
            f'{DATASET_DIR}/z_train.parquet'
        ],
        'valid': [
            f'{DATASET_DIR}/z_valid.parquet'
        ],
        'test': [
            f'{DATASET_DIR}/z_normal.parquet',
            f'{DATASET_DIR}/z_flooding.parquet',
            f'{DATASET_DIR}/z_plateau.parquet',
            f'{DATASET_DIR}/z_continuous.parquet',
            f'{DATASET_DIR}/z_playback.parquet',
            f'{DATASET_DIR}/z_suppress.parquet'
        ]
    }

In [18]:
# test results
dt_exp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
for ft_file in tqdm(feature_files['test']):
    df = pd.read_parquet(ft_file)
    results = df
    if DATASET == 'Syncan':
        attack = Path(ft_file).stem.split('_')[-1]
        save_path = f'../../Results/{DATASET}_TIL_{dt_exp}_{attack}.parquet'
        results.to_parquet(save_path)
    elif DATASET == 'X-CANIDS':
        attack = '-'.join(Path(ft_file).stem.split('-')[1:])
        save_path = f'../../Results/{DATASET}_TIL_{dt_exp}_{attack}.parquet'
        results.to_parquet(save_path)
    print(f'{save_path} saved.')

  0%|          | 0/6 [00:00<?, ?it/s]

../../Results/Syncan_TIL_2024-07-19_09-29-18_normal.parquet saved.
../../Results/Syncan_TIL_2024-07-19_09-29-18_flooding.parquet saved.
../../Results/Syncan_TIL_2024-07-19_09-29-18_plateau.parquet saved.
../../Results/Syncan_TIL_2024-07-19_09-29-18_continuous.parquet saved.
../../Results/Syncan_TIL_2024-07-19_09-29-18_playback.parquet saved.
../../Results/Syncan_TIL_2024-07-19_09-29-18_suppress.parquet saved.


In [27]:
# memory usage
objs = [df_itv_gauss, df_cnt_gauss, max_range, max_range]
print(f'Model size: {asizeof.asizeof(objs)/1024:.2f} KB')

Model size: 55.41 KB


In [19]:
# validation results (for setting threshold)
ft_file = feature_files['valid'][0]
df = pd.read_parquet(ft_file)
results = df
if DATASET == 'Syncan':
    attack = Path(ft_file).stem.split('_')[-1]
    save_path = f'../../Results/{DATASET}_TIL_{dt_exp}_valid.parquet'
    results.to_parquet(save_path)
elif DATASET == 'X-CANIDS':
    attack = '-'.join(Path(ft_file).stem.split('-')[1:])
    save_path = f'../../Results/{DATASET}_TIL_{dt_exp}_valid.parquet'
    results.to_parquet(save_path)
print(f'{save_path} saved.')

../../Results/Syncan_TIL_2024-07-19_09-29-18_valid.parquet saved.
