In [3]:
import json

import re


In [4]:
def calculate_crc(data):
    """
    Calculate CRC-15 checksum for the given data.

    Args:
        data (str): Binary data string.

    Returns:
       CRC-15 checksum.

    """
    crc = 0x0000

    # CRC-15 polynomial
    poly = 0x4599

    for bit in data:
        # XOR with the current bit shifted left by 14 bits
        crc ^= (int(bit) & 0x01) << 14

        for _ in range(15):
            if crc & 0x8000:
                crc = (crc << 1) ^ poly
            else:
                crc <<= 1

        # Ensuring 15 bits
        crc &= 0x7FFF

    return crc

In [49]:
def stuff_bits(binary_string):
    """
    Inserting '1' after every 5 consecutive '0's in the binary string.

    Args:
        binary_string (str): Binary string to be stuffed.

    Returns:
        str: Binary string after stuffing.

    """
    result = ''

    # Initialize a count for consecutive 0's
    count = 0

    for bit in binary_string:

        # Appending the current bit to the result string
        result += bit
        
        # Incrementing the count if the current bit is 0
        if bit == '0':
            count += 1
            
            # Inserting a 1 after 5 consecutive 0's
            if count == 5:
                result += '1'
                # Reseting the count after inserting the 1
                count = 0
        else:
            # Reseting the count if the current bit is not 0
            count = 0

    return result

def destuff_bits(binary_string):
    """
    Removing '1' inserted after every 5 consecutive '0's in the binary string.

    Args:
        binary_string (str): Binary string to be destuffed.

    Returns:
        str: Binary string after destuffing.
    """
    result = ''
    count = 0

    i = 0
    while i < len(binary_string):
        bit = binary_string[i]
        result += bit
        if bit == '0':
            count += 1
            if count == 5:
                # Skip the next bit if it is '1'
                if i + 1 < len(binary_string) and binary_string[i + 1] == '1':
                    i += 1
                count = 0
        else:
            count = 0
        i += 1

    return result


In [50]:
# stuff_bits("11111000011110000")
destuff_bits("001100010110000100000100010100100001011010000010010010010000100100001000001000011011111111100001010011011111111111")

'001100010110000100000000101001000010110100000001001001000010010000100000000011011111111100001010011011111111111'

In [26]:
def hex_to_bits(hex_value, num_bits):
    """
    Convert hexadecimal value to binary string with specified number of bits.
    
    Args:
        hex_value (str): Hexadecimal value to be converted.
        num_bits (int): Number of bits for the resulting binary string.

    Returns:
        str: Binary string representation of the hexadecimal value.
    """
    return bin(int(hex_value, 16))[2:].zfill(num_bits)


In [4]:
print(hex_to_bits('0x130', 11))

00100110000


In [8]:
bin(8)[2:].zfill(4)
dlcx = 1100
print(bin(dlcx)[2:].zfill(4))
int('1100', 2)

10001001100


12

In [55]:

def convert_to_binary_string(can_id, dlc, data):
    """
    Converting CAN frame components to a binary string according to the CAN protocol.

    Args:
        can_id (str): CAN identifier in hexadecimal format.
        dlc (int): Data Length Code indicating the number of bytes of data.
        data (list): List of hexadecimal bytes representing data.

    Returns:
        str: Binary string representing the formatted CAN frame.

    """

    # Start of Frame (SOF) bit
    start_of_frame = '0'
 
    # Converting CAN identifier to 11-bit binary representation
    can_id_bits = hex_to_bits(can_id, 11)
 
    # Remote Transmission Request (RTR) bit
    rtr_bit = '0'
 
    # Identifier Extension (IDE) bit
    ide_bit = '0'
 
    # Control bits (R0 and Stuff)
    control_r0_bit = '0'
    #control_stuff_bit = '1'
 
    # Converting Data Length Code (DLC) to 4-bit binary representation
    dlc_bits = bin(dlc)[2:].zfill(4)
    
    
    # Convert data bytes to binary representation
    
    if dlc:
        if data[0] != '':
            data_bits = ''.join(hex_to_bits(hex_byte, 8) for hex_byte in data)
        else:
            data_bits = ''
    else:
        data_bits = ''
    
    # print(data_bits)
    # Filling missing data bytes with zeros
    padding_bits = '0' * (8 * (8 - dlc))
    data_bit_total = data_bits + padding_bits
 
    # Calculating CRC-15 checksum and converting to binary representation
    crc_bit = bin(calculate_crc(start_of_frame + can_id_bits + rtr_bit + ide_bit + control_r0_bit +
                                dlc_bits + data_bit_total))[2:].zfill(15)
 
    # CRC delimiter bit
    crc_delimiter = '1'
 
    # Acknowledge (ACK) bit
    ack_bit = '0'
 
    # ACK delimiter bit
    ack_delimiter = '1'
 
    # End of Frame (EOF) bits
    end_of_frame_bits = '1' * 7
 
    # Inter-Frame Spacing bits
    inter_frame_spacing_bits = '1' * 3
    # print("before stuffing")
    # print(start_of_frame + can_id_bits + rtr_bit + ide_bit + control_r0_bit +  dlc_bits + data_bit_total + crc_bit+ crc_delimiter + ack_bit + ack_delimiter + end_of_frame_bits + inter_frame_spacing_bits )
    #stuffing the bits:
    stuffed_bits = stuff_bits(start_of_frame + can_id_bits + rtr_bit + ide_bit + control_r0_bit +  dlc_bits + data_bit_total + crc_bit)
    # Combining all bits as per CAN frame format and stuffing them
    return  stuffed_bits + crc_delimiter + ack_bit + ack_delimiter + end_of_frame_bits + inter_frame_spacing_bits 



In [58]:
def reverse_can_frame(binary_string):
    """
    Reverse the process of converting a CAN frame binary string back to its components.

    Args:
        binary_string (str): Binary string representing a CAN frame.

    Returns:
        tuple: CAN identifier (str), Data Length Code (int), and data (list) in hexadecimal format.
    """
    # Unstuffing the bits
    binary_string = destuff_bits(binary_string)

    # Extracting the relevant components from the binary string

    # Start of Frame (SOF) bit
    start_of_frame = binary_string[0]

    # Extracting the CAN ID (11 bits)
    can_id_bits = binary_string[1:12]
    can_id = bits_to_hex(can_id_bits)

    # Remote Transmission Request (RTR) bit
    rtr_bit = binary_string[12]

    # Identifier Extension (IDE) bit
    ide_bit = binary_string[13]

    # Control bits (R0 and Stuff)
    control_r0_bit = binary_string[14]

    # Data Length Code (DLC) (4 bits)
    dlc_bits = binary_string[15:19]
    dlc = int(dlc_bits, 2)

    # Extracting the data bytes (data length specified by DLC)
    data_bits = binary_string[19:19 + dlc * 8]
    data = [hex(int(data_bits[i:i+8], 2))[2:].zfill(2) for i in range(0, len(data_bits), 8)]

    return can_id, dlc, data



def bits_to_hex(binary_str):
    """
    Convert binary string to hexadecimal.

    Args:
        binary_str (str): Binary string.

    Returns:
        str: Hexadecimal string.
    """
    return hex(int(binary_str, 2))[2:].upper()


In [59]:
reverse_can_frame("001100010110000100000000101001000010110100000001001001000010010000100000000011011111111100001010011011111111111")

('316', 8, ['05', '21', '68', '09', '21', '21', '00', '6f'])

In [56]:
def form_data(input_filename):
    """
    Reading data from a file and formatting it into arrays for further processing.

    Args:
        input_filename (str): Path to the input file containing CAN data.

    Returns:
        tuple: A tuple containing three elements:
            - data_array (list): A list of lists containing timestamp and converted binary data.
            - frame_type (list): A list containing frame types (0 for benign frames, 1 for attacked frames).
            - anchor (list): A list containing unique converted binary data strings for a specific CAN arbitration ID.

    """

    # Initialising empty lists and variables

    #frame count
    fc = 1

    # List to store timestamp and converted binary data
    data_array = []

    # List to store frame types : attack/benign
    frame_type = []

    # Arbitration ID to identify anchor frames
    can_arb_id = '0153'

    #binary string size of each data frame
    frame_size = []

    # Set to store unique converted binary data  strings for anchor frames
    anchor = set()  

    # Open the input file for reading
    with open(input_filename, 'r') as input_file:

        # Iterate over each line in the input file
        for line in input_file:

            # # Splitting the line by comma to extract different parts
            parts = line.strip().split(',')

            # # Extracting timestamp, CAN ID, DLC, and data from the parts
            # timestamp = float(parts[0])
            # can_id = parts[1]
            # dlc = int(parts[2])
            # data = parts[3:3 + dlc]

            #Splitting and extracting this for OTIDS dataset format
            
            # print(type(parts[-1]))
            # Extract the timestamp, CAN ID, DLC, and data
            timestamp = float(parts[0])
            can_id = parts[1]
            dlc = int(parts[2])
            data = parts[3:3 + dlc]
            # print(can_id)
            # print(dlc)
            # print(data)


            # Determining frame type based on the last part (R for benign, otherwise T for attack)
            frame_type.append(0 if parts[-1] == 'R' else 1)

            #Determining frmae typ ei snot required here because I am converting 

            # Converting data to binary string representation
            # print(type(data), frame_type.count('0'))
            # 
            # print(data)

            converted_data = convert_to_binary_string(can_id, dlc, data)
            # print(converted_data)
            #storing binary string size of each data string
            frame_size.append([fc,len(converted_data)])
            fc+=1

            # Checking if the CAN ID matches the anchor CAN ID
            if can_id == can_arb_id:
                anchor.add(converted_data)

            # Appending timestamp and converted binary data to the data array
            data_array.append([timestamp, converted_data])

    # Converting set to list to ensure consistent ordering
    anchor = list(anchor)

    return data_array, frame_type, anchor, frame_size



def form_data_OTIDS(input_filename):
    """
    Reading data from a file and formatting it into arrays for further processing.

    Args:
        input_filename (str): Path to the input file containing CAN data.

    Returns:
        tuple: A tuple containing three elements:
            - data_array (list): A list of lists containing timestamp and converted binary data.
            - frame_type (list): A list containing frame types (0 for benign frames, 1 for attacked frames).
            - anchor (list): A list containing unique converted binary data strings for a specific CAN arbitration ID.

    """

    # Initialising empty lists and variables

    #frame count
    fc = 1

    # List to store timestamp and converted binary data
    data_array = []

    # List to store frame types : attack/benign
    frame_type = []

    # Arbitration ID to identify anchor frames
    can_arb_id = '0002'

    #binary string size of each data frame
    frame_size = []

    # Set to store unique converted binary data  strings for anchor frames
    anchor = set()  

    # Open the input file for reading
    with open(input_filename, 'r') as input_file:

        # Iterate over each line in the input file
        for line in input_file:

            # # Splitting the line by comma to extract different parts
            # parts = line.strip()

            # # Extracting timestamp, CAN ID, DLC, and data from the parts
            # timestamp = float(parts[0])
            # can_id = parts[1]
            # dlc = int(parts[2])
            # data = parts[3:3 + dlc]
            line = line.strip()
            # print(line)
            timestamp = float(line.split("Timestamp: ")[1].strip().split(' ')[0])
            # print(timestamp)
            can_id = line.split('ID: ')[1].split()[0]
            # print(can_id)
            dlc = int(line.split('DLC: ')[1].split()[0])
            # print(dlc)
            # data = ''.join(line.split('DLC: ')[-1].split()[-8:])
            data = line.split('DLC: ')[1]
            data = data.split(" ")
            label = data[9]
            payload = data[1:9]
            # print(type(payload))
            # can_data = payload.split()


            # print(type(can_data))
            

            # hex_array = hex(int(can_data.split()))            
            # print(hex_rray)
            # pass
                        # Convert the concatenated hexadecimal string to a hexadecimal integer
            # hex_value = hex(int(hex_string, 16))
            # print(hex_value)
            # pass
            # print(data)
            # print("data and label",can_data,label)

            #print(timestamp,"\n"+can_id+"\n"+dlc+"\n"+data)

            # Determining frame type based on the last part (R for benign, otherwise T for attack)
            frame_type.append(0 if label== 'R' else 1)

            #Determining frmae typ ei snot required here because I am converting 
            # Converting data to binary string representation
            converted_data = convert_to_binary_string(can_id, dlc, payload)
            # print(len(converted_data))
            # print(converted_data)
            #storing binary string size of each data string
            frame_size.append([fc,len(converted_data)])
            fc+=1

            # Checking if the CAN ID matches the anchor CAN ID
            if can_id == can_arb_id:
                anchor.add(converted_data)

            # Appending timestamp and converted binary data to the data array
            data_array.append([timestamp, converted_data])

    # Converting set to list to ensure consistent ordering
    anchor = list(anchor)

    return data_array, frame_type, anchor, frame_size


In [57]:
#path to benign dataset = 'D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Demo/all_benign_data.txt'
#path to attack dataset = 'D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Original/attack_dataset.csv'
#path to subset of attack dataset = 'D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Demo\attack_dataset.csv'
#input_filename = "D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Demo/attack_dataset.csv"
# input_filename = r"C:\Users\uqipali\OneDrive - The University of Queensland\Papers to read\RESEARCH PAPERS\Paper 1 - Image based Adv. Attack\Experiments\Image generation from Traffic and train IDS\Logic\dataset.txt"
input_filename = r"C:\Users\uqipali\OneDrive - The University of Queensland\Papers to read\RESEARCH PAPERS\Paper 1 - Image based Adv. Attack\Experiments\Image generation from Traffic and train IDS\Logic\DoS_dataset_CH.csv"
# Calling the form_data function to process the input file and obtain data arrays
# data_array, frame_type, anchor, frame_size = form_data_OTIDS(input_filename)

data_array, frame_type, anchor, frame_size = form_data(input_filename)
# print(data_array)
# Saving data to a JSON file
# Opening the JSON file in write mode
# with open(r"DoS_data_CH.json", "w") as json_file:
    
#     # Write the data arrays and anchor list to the JSON file
#     json.dump({"data_array": data_array, "frame_type": frame_type, "anchor": anchor}, json_file)


before stuffing
001100010110000100000000101001000010110100000001001001000010010000100000000011011111111100001010011011111111111
before stuffing
000110001111000100011111110010110110000000000000000000000000011110000000000000000001010011111110011011111111111
before stuffing
001001100000000100000011001001000010010001000110000000010001000111001101101001110101000111111110111011111111111
before stuffing
001010100000000100001100100000000001001101000011101100101110000001010111101000000000011001010101111011111111111
before stuffing
001100101001000100001000000101110110111111100010100000100010010000000000000000101000000000000011011011111111111
before stuffing
010101000101000100011011000000000000000000010001010000000000000000000000000000000000110100101100011011111111111
before stuffing
000000000010000100000000000000000000000000000000000000000000000001100001011000100010110010101110101011111111111
before stuffing
0001010100110001000000000000010000100010000111111110000000011111111000000000000000011001

KeyboardInterrupt: 

In [65]:
data_array[0][0]

1478198376.389427

In [10]:
#path to benign dataset = 'D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Demo/all_benign_data.txt'
#path to attack dataset = 'D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Original/attack_dataset.csv'
#path to subset of attack dataset = 'D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Demo\attack_dataset.csv'
#input_filename = "D:\IIT-D\Sem-4\JCD893-M.Tech Major Project Part-2\Anchor_Frame\Dataset\Demo/attack_dataset.csv"
input_filename = r"C:\Users\uqipali\OneDrive - The University of Queensland\Papers to read\RESEARCH PAPERS\Paper 1 - Image based Adv. Attack\Experiments\genids_28th_July_15_51\genids\OTIDS dataset\labeled_test_Attack_free.txt"
# input_filename = r"C:\Users\uqipali\Downloads\DoS_dataset.csv"
# Calling the form_data function to process the input file and obtain data arrays
data_array, frame_type, anchor, frame_size = form_data_OTIDS(input_filename)
# print(data_array)
# Saving data to a JSON file
# Opening the JSON file in write mode
with open(r"C:\Users\uqipali\OneDrive - The University of Queensland\Papers to read\RESEARCH PAPERS\Paper 1 - Image based Adv. Attack\Experiments\genids_28th_July_15_51\genids\OTIDS dataset/formatted_data_OTIDS_Attack_free.json", "w") as json_file:
    
    # Write the data arrays and anchor list to the JSON file
    json.dump({"data_array": data_array, "frame_type": frame_type, "anchor": anchor}, json_file)


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\uqipali\\OneDrive - The University of Queensland\\Papers to read\\RESEARCH PAPERS\\Paper 1 - Image based Adv. Attack\\Experiments\\genids_28th_July_15_51\\genids\\OTIDS dataset\\labeled_test_Attack_free.txt'

In [None]:
print(data_array[0])

In [None]:
import json

def generate_table(index_sizes):
    """
    Generate a table containing unique sizes and corresponding indexes from a list of lists.

    Args:
        index_sizes (list of lists): List containing index-size pairs.

    Returns:
        dict: A dictionary where keys are unique sizes and values are lists of indexes.
    """
    # Sort the list of lists based on size
    index_sizes.sort(key=lambda x: x[1])

    # Create a dictionary to store indexes corresponding to each size
    size_index_dict = {}

    # Populate the dictionary
    for item in index_sizes:
        index, size = item
        if size not in size_index_dict:
            size_index_dict[size] = [index]
        else:
            size_index_dict[size].append(index)

    return size_index_dict

def store_as_json(index_sizes, filename):
    """
    Generate a JSON file containing the table data.

    Args:
        index_sizes (list of lists): List containing index-size pairs.
        filename (str): Name of the JSON file to be generated.
    """
    # Generate the table
    size_index_dict = generate_table(index_sizes)

    # Write the dictionary to a JSON file
    with open(filename, 'w') as json_file:
        json.dump(size_index_dict, json_file, indent=4)

def print_table(size_index_dict):
    """
    Print the table with sizes and corresponding indexes.

    Args:
        size_index_dict (dict): Dictionary containing sizes as keys and lists of indexes as values.
    """
    print("Size\tIndexes")
    for size, indexes in size_index_dict.items():
        indexes_str = ', '.join(map(str, indexes))
        print(f"{size}\t{indexes_str}")

# Example usage:
# frame_size is assumed to be defined elsewhere
filename = "D:\\IIT-D\\Sem-4\\JCD893-M.Tech Major Project Part-2\\Anchor_Frame\\Dataset\\Metadata\\frame_size_table.json"
store_as_json(frame_size, filename)
print_table(generate_table(frame_size))
