In [1]:
import json

In [2]:
def calculate_crc(data):
    """
    Calculate CRC-15 checksum for the given data.

    Args:
        data (str): Binary data string.

    Returns:
       CRC-15 checksum.

    """
    crc = 0x0000

    # CRC-15 polynomial
    poly = 0x4599

    for bit in data:
        # XOR with the current bit shifted left by 14 bits
        crc ^= (int(bit) & 0x01) << 14

        for _ in range(15):
            if crc & 0x8000:
                crc = (crc << 1) ^ poly
            else:
                crc <<= 1

        # Ensuring 15 bits
        crc &= 0x7FFF

    return crc

In [3]:
def stuff_bits(binary_string):
    """
    Inserting '1' after every 5 consecutive '0's in the binary string.

    Args:
        binary_string (str): Binary string to be stuffed.

    Returns:
        str: Binary string after stuffing.

    """
    result = ''

    # Initialize a count for consecutive 0's
    count = 0

    for bit in binary_string:

        # Appending the current bit to the result string
        result += bit
        
        # Incrementing the count if the current bit is 0
        if bit == '0':
            count += 1
            
            # Inserting a 1 after 5 consecutive 0's
            if count == 5:
                result += '1'
                # Reseting the count after inserting the 1
                count = 0
        else:
            # Reseting the count if the current bit is not 0
            count = 0

    return result


In [4]:
def hex_to_bits(hex_value, num_bits):
    """
    Convert hexadecimal value to binary string with specified number of bits.
    
    Args:
        hex_value (str): Hexadecimal value to be converted.
        num_bits (int): Number of bits for the resulting binary string.

    Returns:
        str: Binary string representation of the hexadecimal value.
    """
    return bin(int(hex_value, 16))[2:].zfill(num_bits)


In [5]:

def convert_to_binary_string(can_id, dlc, data):
    """
    Converting CAN frame components to a binary string according to the CAN protocol.

    Args:
        can_id (str): CAN identifier in hexadecimal format.
        dlc (int): Data Length Code indicating the number of bytes of data.
        data (list): List of hexadecimal bytes representing data.

    Returns:
        str: Binary string representing the formatted CAN frame.

    """

    # Start of Frame (SOF) bit
    start_of_frame = '0'

    # Converting CAN identifier to 11-bit binary representation
    can_id_bits = hex_to_bits(can_id, 11)

    # Remote Transmission Request (RTR) bit
    rtr_bit = '0'

    # Identifier Extension (IDE) bit
    ide_bit = '0'

    # Control bits (R0 and Stuff)
    control_r0_bit = '0'
    control_stuff_bit = '1'

    # Converting Data Length Code (DLC) to 4-bit binary representation
    dlc_bits = bin(dlc)[2:].zfill(4)

    # Convert data bytes to binary representation
    if data[0] != '':
        data_bits = ''.join(hex_to_bits(hex_byte, 8) for hex_byte in data)
    else:
        data_bits = ''

    # Filling missing data bytes with zeros
    padding_bits = '0' * (8 * (8 - dlc))
    data_bit_total = data_bits + padding_bits

    # Calculating CRC-15 checksum and converting to binary representation
    crc_bit = bin(calculate_crc(start_of_frame + can_id_bits + rtr_bit + ide_bit + control_r0_bit +
                                control_stuff_bit + dlc_bits + data_bit_total))[2:].zfill(15)

    # CRC delimiter bit
    crc_delimiter = '1'

    # Acknowledge (ACK) bit
    ack_bit = '0'

    # ACK delimiter bit
    ack_delimiter = '1'

    # End of Frame (EOF) bits
    end_of_frame_bits = '1' * 7

    # Inter-Frame Spacing bits
    inter_frame_spacing_bits = '1' * 3

    # Combining all bits as per CAN frame format and stuffing them
        return stuff_bits(start_of_frame + can_id_bits + rtr_bit + ide_bit + control_r0_bit +
                        control_stuff_bit + dlc_bits + data_bit_total + crc_bit + crc_delimiter +
                        ack_bit + ack_delimiter + end_of_frame_bits + inter_frame_spacing_bits)

In [6]:
def form_data(input_filename):
    """
    Reading data from a file and formatting it into arrays for further processing.

    Args:
        input_filename (str): Path to the input file containing CAN data.

    Returns:
        tuple: A tuple containing three elements:
            - data_array (list): A list of lists containing timestamp and converted binary data.
            - frame_type (list): A list containing frame types (0 for benign frames, 1 for attacked frames).
            - anchor (list): A list containing unique converted binary data strings for a specific CAN arbitration ID.

    """

    # Initialising empty lists and variables

    #frame count
    fc = 1

    # List to store timestamp and converted binary data
    data_array = []

    # List to store frame types : attack/benign
    frame_type = []

    # Arbitration ID to identify anchor frames
    can_arb_id = '0002'

    #binary string size of each data frame
    frame_size = []

    # Set to store unique converted binary data  strings for anchor frames
    anchor = set()  

    # Open the input file for reading
    with open(input_filename, 'r') as input_file:

        # Iterate over each line in the input file
        for line in input_file:

            # Splitting the line by comma to extract different parts
            parts = line.strip().split(',')

            # Extracting timestamp, CAN ID, DLC, and data from the parts
            timestamp = float(parts[0])
            can_id = parts[1]
            dlc = int(parts[2])
            data = parts[3:3 + dlc]

            # Determining frame type based on the last part (R for benign, otherwise T for attack)
            frame_type.append(0 if parts[-1] == 'R' else 1)

            # Converting data to binary string representation
            converted_data = convert_to_binary_string(can_id, dlc, data)

            #storing binary string size of each data string
            frame_size.append([fc,len(converted_data)])
            fc+=1

            # Checking if the CAN ID matches the anchor CAN ID
            if can_id == can_arb_id:
                anchor.add(converted_data)

            # Appending timestamp and converted binary data to the data array
            data_array.append([timestamp, converted_data])

    # Converting set to list to ensure consistent ordering
    anchor = list(anchor)

    return data_array, frame_type, anchor, frame_size


In [7]:
#path to benign dataset = 'D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Demo/all_benign_data.txt'
#path to attack dataset = 'D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Original/attack_dataset.csv'
#path to subset of attack dataset = 'D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Demo\attack_dataset.csv'
input_filename = "D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Demo/attack_dataset.csv"

# Calling the form_data function to process the input file and obtain data arrays
data_array, frame_type, anchor, frame_size = form_data(input_filename)

# Saving data to a JSON file
# Opening the JSON file in write mode
with open("D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Metadata/formatted_data.json", "w") as json_file:
    
    # Write the data arrays and anchor list to the JSON file
    json.dump({"data_array": data_array, "frame_type": frame_type, "anchor": anchor}, json_file)


In [8]:
import json

def generate_table(index_sizes):
    """
    Generate a table containing unique sizes and corresponding indexes from a list of lists.

    Args:
        index_sizes (list of lists): List containing index-size pairs.

    Returns:
        dict: A dictionary where keys are unique sizes and values are lists of indexes.
    """
    # Sort the list of lists based on size
    index_sizes.sort(key=lambda x: x[1])

    # Create a dictionary to store indexes corresponding to each size
    size_index_dict = {}

    # Populate the dictionary
    for item in index_sizes:
        index, size = item
        if size not in size_index_dict:
            size_index_dict[size] = [index]
        else:
            size_index_dict[size].append(index)

    return size_index_dict

def store_as_json(index_sizes, filename):
    """
    Generate a JSON file containing the table data.

    Args:
        index_sizes (list of lists): List containing index-size pairs.
        filename (str): Name of the JSON file to be generated.
    """
    # Generate the table
    size_index_dict = generate_table(index_sizes)

    # Write the dictionary to a JSON file
    with open(filename, 'w') as json_file:
        json.dump(size_index_dict, json_file, indent=4)

def print_table(size_index_dict):
    """
    Print the table with sizes and corresponding indexes.

    Args:
        size_index_dict (dict): Dictionary containing sizes as keys and lists of indexes as values.
    """
    print("Size\tIndexes")
    for size, indexes in size_index_dict.items():
        indexes_str = ', '.join(map(str, indexes))
        print(f"{size}\t{indexes_str}")

# Example usage:
# frame_size is assumed to be defined elsewhere
filename = "D:\\IIT-D\\Sem-4\\JCD893-M.Tech Major Project Part-2\\Anchor_Frame\\Dataset\\Metadata\\frame_size_table.json"
store_as_json(frame_size, filename)
print_table(generate_table(frame_size))


Size	Indexes
114	55539, 152147, 902412
115	1, 3, 11, 17, 41, 56, 76, 78, 92, 113, 115, 129, 150, 166, 186, 188, 202, 204, 222, 224, 241, 256, 261, 263, 271, 277, 279, 293, 298, 300, 308, 314, 335, 351, 353, 371, 373, 387, 407, 426, 446, 448, 462, 483, 485, 499, 501, 520, 522, 536, 556, 572, 574, 592, 611, 626, 631, 641, 647, 649, 668, 670, 684, 705, 721, 741, 743, 757, 777, 779, 796, 798, 816, 818, 832, 853, 855, 869, 871, 885, 890, 892, 900, 908, 921, 944, 964, 1001, 1017, 1038, 1054, 1075, 1091, 1113, 1129, 1149, 1160, 1168, 1183, 1188, 1190, 1198, 1204, 1225, 1227, 1241, 1243, 1262, 1264, 1298, 1314, 1334, 1336, 1353, 1355, 1373, 1389, 1410, 1412, 1426, 1428, 1442, 1447, 1463, 1498, 1530, 1551, 1568, 1610, 1637, 1641, 1698, 1757, 1761, 1832, 1854, 1887, 1925, 1963, 1999, 2003, 2027, 2031, 2108, 2134, 2153, 2157, 2177, 2193, 2197, 2220, 2256, 2260, 2328, 2350, 2383, 2408, 2413, 2427, 2429, 2439, 2449, 2451, 2496, 2509, 2513, 2551, 2555, 2593, 2622, 2654, 2687, 2719, 2794, 2804, 2808,