In [1]:
import pandas as pd
import numpy as np

In [2]:
# Function to select the timestamp and can_id
def data_to_be_utilized(file_path):

    # Reading the CSV file without headers
    df = pd.read_csv(file_path, header=None)

    # Manually assigning names to the first two columns
    df.columns = ['timestamp', 'can_id'] + list(df.columns[2:])

    # Extracting the required columns
    selected_columns = df[['timestamp', 'can_id']]

    # df['timestamp'] = pd.to_datetime(df['timestamp'])
    return selected_columns

In [3]:
# Function to extract distinct CAN IDs
def extract_distinct_can_ids(selected_columns):

    # Finding the distinct CAN IDs
    distinct_can_ids = selected_columns['can_id'].unique()

    return distinct_can_ids


In [4]:
#Converting the timesttamp to decimal form
def preprocess_time(df):

    #Converting time values to decimal form
    df['timestamp'] = df['timestamp'].astype(float)

    #Sorting the data based on can_id and timestamp
    df.sort_values(by=['can_id', 'timestamp'], inplace=True)
    return df

In [5]:
def calculate_periodicity(df):

    # Calculate the time difference between consecutive timestamps for each 'can_id'.
    # The `groupby` function groups the DataFrame by 'can_id'.
    # The `diff` function computes the difference between each timestamp and the previous one within each group.
    # The result is stored in a new column 'time_diff'.
    df['time_diff'] = df.groupby('can_id')['timestamp'].diff()

    # Grouping the DataFrame by 'can_id' again to perform aggregation on the 'time_diff' column.
    # The `agg` function allows us to calculate multiple aggregate statistics at once:
    # - 'mean' computes the average interval for each 'can_id'.
    # - 'std' computes the standard deviation of the intervals for each 'can_id', indicating the variability.
    periodicity_stats = df.groupby('can_id')['time_diff'].agg(['mean', 'std']).reset_index()

    # Calculating the total number of frames (occurrences) for each 'can_id'.
    frame_counts = df.groupby('can_id').size().reset_index(name='occurrences')

    # Merge the periodicity statistics with the frame counts.
    periodicity = pd.merge(periodicity_stats, frame_counts, on='can_id')

    # Renaming the columns of the resulting DataFrame for clarity:
    # - 'can_id' remains the identifier for each group.
    # - 'mean' is renamed to 'average_interval' to indicate it represents the average time interval.
    # - 'std' is renamed to 'std_deviation' to indicate it represents the standard deviation of the time intervals.
    periodicity.columns = ['can_id', 'average_interval (in ms)', 'std_deviation','no_of_occurences']
    
    # Convert the values of 'average_interval' to milliseconds by multiplying by 1000
    periodicity['average_interval (in ms)'] *= 1000

    # Sort the DataFrame based on the 'average_interval' column in ascending order
    periodicity.sort_values(by='average_interval (in ms)', inplace=True)

    return periodicity

In [6]:
#path to benign dataset = 'D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Demo/all_benign_data.txt'
#path to attack dataset = 'D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Original/attack_dataset.csv' 
file_path = 'D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Original/attack_dataset.csv' 
selected_columns = data_to_be_utilized(file_path)
distinct_can_ids = extract_distinct_can_ids(selected_columns)
preprocessed_time = preprocess_time(selected_columns)
periodicity = calculate_periodicity(preprocessed_time)

In [7]:
print("Distinct CAN IDs:")
print(np.sort(distinct_can_ids))

Distinct CAN IDs:
['0000' '0002' '00a0' '00a1' '0130' '0131' '0140' '0153' '018f' '01f1'
 '0260' '02a0' '02c0' '0316' '0329' '0350' '0370' '0430' '043f' '0440'
 '04b1' '04f0' '0545' '05a0' '05a2' '05f0' '0690']


In [8]:
print(periodicity)


   can_id  average_interval (in ms)  std_deviation  no_of_occurences
0    0000                  3.870938       0.067848            587521
4    0130                 16.849328       0.062456            168118
1    0002                 16.905842       0.063173            167556
5    0131                 16.960503       0.063171            167016
6    0140                 16.961925       0.062890            167002
8    018f                 16.969199       0.062809            166931
12   02c0                 16.991313       0.062883            166713
16   0370                 16.994565       0.062952            166681
13   0316                 16.999750       0.063465            166631
7    0153                 17.005609       0.063420            166573
18   043f                 17.053705       0.063548            166103
10   0260                 17.097226       0.063757            165681
11   02a0                 17.104143       0.063437            165614
15   0350                 17.10707