# Imports

In [1]:
import pandas as pd
import re
import glob
import os

In [2]:
folder_path = 'UAVCAN_Attack_dataset(DroneCAN)'
bin_files = glob.glob(os.path.join(folder_path, '*.bin'))
# bin_files

In [27]:
bin_files[8:9]

['UAVCAN_Attack_dataset(DroneCAN)\\type8_label.bin']

In [3]:
# Function to parse each line based on the specific format
def parse_line(line):
    pattern = r'(\S+)\s+\(([\d.]+)\)\s+(\S+)\s+(\S+)\s+\[([\d]+)\]\s+(.+)'
    match = re.match(pattern, line)
    if match:
        label = match.group(1)
        timestamp = match.group(2)
        interface = match.group(3)
        can_id = match.group(4)
        data_length = match.group(5)
        data = match.group(6)
        return [label, timestamp, interface, can_id, data_length, data]
    return [None] * 6

In [4]:
column_names = ['label', 'timestamp', 'interface', 'can_id', 'data_length', 'data']

In [5]:
output_folder = "converted_bin_to_xlsx"
os.makedirs(output_folder, exist_ok=True)

# Decoding .bin files to .xlsx

In [28]:
data_dic = {}
for file_path in bin_files[8:9]:
    print(file_path)
    parsed_data = []
    with open(file_path, 'r') as file:
        for line in file:
            parsed_data.append(parse_line(line.strip()))

    parsed_df = pd.DataFrame(parsed_data, columns=column_names)

    label_key = os.path.basename(file_path).split('.')[0]
    data_dict = {label_key: parsed_df}

    output_file_path = os.path.join(output_folder, f"{label_key}.xlsx")
    parsed_df.to_excel(output_file_path, index=False)
    print(f"Data saved to {output_file_path}")

    

UAVCAN_Attack_dataset(DroneCAN)\type8_label.bin
Data saved to converted_bin_to_xlsx\type8_label.xlsx


# CAN ID Decoding

In [8]:
def hex_to_32bit_binary(can_id):
    
    if isinstance(can_id, int):
        can_id_hex = f"{can_id:x}"
    else:
        can_id_hex = str(can_id)
    return bin(int(can_id_hex, 16))[2:].zfill(32)

In [9]:
def extract_basic_info(binary_29bit):
    print("-"* 50)
    print(f'Binary_29_bit: {binary_29bit}')
    source_node_id = binary_29bit[-7:]
    print(f'source_node_id: {source_node_id}')
    source_node_id_decimal = int(source_node_id, 2)
    print(f'source_node_id_decimal:{source_node_id_decimal}')
    service_message = binary_29bit[-8]
    print(f'service_message: {service_message}')
    priority_bits = binary_29bit[-29:-24]
    print(f'priority_bits: {priority_bits}')
    priority_decimal = int(priority_bits, 2)
    print(f'priority_decimal: {priority_decimal}')
    
    return source_node_id_decimal, service_message, priority_decimal

In [10]:
def extract_service_message_info(binary_29bit):
    print(("-"*50) + "Service message"+("-"*50))
    destination_node_id_bits = binary_29bit[-15:-8]
    print(f'destination_node_id_bits: {destination_node_id_bits}')
    destination_node_id_decimal = int(destination_node_id_bits, 2)
    print(f'destination_node_id_decimal: {destination_node_id_decimal}')
    request_or_response = binary_29bit[-16]
    print(f'request_or_response:{request_or_response}')
    service_type_id_bits = binary_29bit[-24:-16]
    print(f'service_type_id_bits:{service_type_id_bits}')
    service_type_id_decimal = int(service_type_id_bits, 2)
    print(f'service_type_id_decimal:{service_type_id_decimal}')
    return  destination_node_id_decimal, request_or_response, service_type_id_decimal

In [88]:
def extract_non_service_message_info(binary_29bit, source_node_id_decimal):
        if source_node_id_decimal == 0:
            discriminator_bits = binary_29bit[-24:-10]
            lower_bit_message_id = binary_29bit[-10:-8]
            return {
                # "Anonymous Message Frame": True,
                "discriminator": int(discriminator_bits,2),
                "lower_message_bit": int(lower_bit_message_id,2),
                # "message_type": None,
                "message_type_decimal":None
            }
        else:
            message_type_id_bits = binary_29bit[-24:-8]
            return {
                # "message_frame": False,
                "discriminator": None,
                "lower_message_bit": None,
                # "message_type": message_type_id_bits,
                "message_type_decimal":int(message_type_id_bits,2)
            }

In [89]:
folder_path = output_folder
xlsx_files = glob.glob(os.path.join(folder_path, '*.xlsx'))
xlsx_files

['converted_bin_to_xlsx\\type10_label.xlsx',
 'converted_bin_to_xlsx\\type1_label.xlsx',
 'converted_bin_to_xlsx\\type2_label.xlsx',
 'converted_bin_to_xlsx\\type3_label.xlsx',
 'converted_bin_to_xlsx\\type4_label.xlsx',
 'converted_bin_to_xlsx\\type5_label.xlsx',
 'converted_bin_to_xlsx\\type6_label.xlsx',
 'converted_bin_to_xlsx\\type7_label.xlsx',
 'converted_bin_to_xlsx\\type8_label.xlsx',
 'converted_bin_to_xlsx\\type9_label.xlsx']

In [90]:
# xlsx_files[8:]

In [91]:
canid_decode_folder = "stage1_xlsx_file_can_id_decode/v2"
os.makedirs(canid_decode_folder, exist_ok=True)

In [92]:
for file_path in xlsx_files:
    df = pd.read_excel(file_path)
    label_key = os.path.basename(file_path).split('.')[0]
    
    unique_elements = df['can_id'].unique()
    new_df = pd.DataFrame({'can_id': unique_elements})
    new_df = new_df.dropna()
    new_df['can_id_32_bit'] = new_df['can_id'].apply(hex_to_32bit_binary)
    new_df['can_id_29_bit'] = new_df['can_id_32_bit'].apply(lambda x: x[-29:])
    
    basic_info = new_df['can_id_29_bit'].apply(extract_basic_info)
    new_df['source_node_id_decimal'] = basic_info.apply(lambda x: x[0])
    new_df['service_flag'] = basic_info.apply(lambda x: x[1])
    new_df['priority'] = basic_info.apply(lambda x: x[2])
    
    non_service_messages = new_df[new_df['service_flag'] == '0']
    non_service_message_info = non_service_messages.apply(
        lambda row: extract_non_service_message_info(row['can_id_29_bit'], row['source_node_id_decimal']),
        axis=1
        )
    # non_service_message_info
    
    for key in list(non_service_message_info[0].keys()):
        new_df[key] = non_service_message_info.apply(lambda x: x[key])

    service_messages = new_df[new_df['service_flag'] == '1']
    service_message_info = service_messages.apply(
        lambda row: extract_service_message_info(row['can_id_29_bit']),
        axis=1
    )
    
    
    new_df.loc[service_messages.index, 'destination_node_id_decimal'] = service_message_info.apply(lambda x: x[0])
    new_df.loc[service_messages.index, 'request_or_response'] = service_message_info.apply(lambda x: x[1])
    new_df.loc[service_messages.index, 'service_type_id_decimal'] = service_message_info.apply(lambda x: x[2])
    
    new_df.drop(columns=['can_id_32_bit'], inplace=True)
    new_df.drop(columns=['can_id_29_bit'], inplace=True)

    merged_df = pd.merge(df, new_df, on='can_id', how='left')
    merged_output_file_path = os.path.join(canid_decode_folder, f"{label_key}_merged.xlsx")
    merged_df.to_excel(merged_output_file_path, index=False)

    print(f"Merged data saved to {merged_output_file_path}")



--------------------------------------------------
Binary_29_bit: 00101000001000000011000000001
source_node_id: 0000001
source_node_id_decimal:1
service_message: 0
priority_bits: 00101
priority_decimal: 5
--------------------------------------------------
Binary_29_bit: 11111000001000011100100000001
source_node_id: 0000001
source_node_id_decimal:1
service_message: 0
priority_bits: 11111
priority_decimal: 31
--------------------------------------------------
Binary_29_bit: 10000000000010101010100000001
source_node_id: 0000001
source_node_id_decimal:1
service_message: 0
priority_bits: 10000
priority_decimal: 16
--------------------------------------------------
Binary_29_bit: 00001000000000000010000000001
source_node_id: 0000001
source_node_id_decimal:1
service_message: 0
priority_bits: 00001
priority_decimal: 1
--------------------------------------------------
Binary_29_bit: 10000000000010101010101111011
source_node_id: 1111011
source_node_id_decimal:123
service_message: 0
priority_bit

#  Data Decoding

In [93]:
folder_path = canid_decode_folder
xlsx_files = glob.glob(os.path.join(folder_path, '*.xlsx'))
xlsx_files

['stage1_xlsx_file_can_id_decode/v2\\type10_label_merged.xlsx',
 'stage1_xlsx_file_can_id_decode/v2\\type1_label_merged.xlsx',
 'stage1_xlsx_file_can_id_decode/v2\\type2_label_merged.xlsx',
 'stage1_xlsx_file_can_id_decode/v2\\type3_label_merged.xlsx',
 'stage1_xlsx_file_can_id_decode/v2\\type4_label_merged.xlsx',
 'stage1_xlsx_file_can_id_decode/v2\\type5_label_merged.xlsx',
 'stage1_xlsx_file_can_id_decode/v2\\type6_label_merged.xlsx',
 'stage1_xlsx_file_can_id_decode/v2\\type7_label_merged.xlsx',
 'stage1_xlsx_file_can_id_decode/v2\\type8_label_merged.xlsx',
 'stage1_xlsx_file_can_id_decode/v2\\type9_label_merged.xlsx']

In [2]:
data_decode_folder = "stage2_data_decode/v2"
os.makedirs(data_decode_folder, exist_ok=True)

In [95]:

def process_tail_byte(hex_data):
    
    hex_bytes = hex_data.split()
    
    if hex_bytes:
        tail_byte_hex = hex_bytes[-1]
    else:
        tail_byte_hex = ""
    
    # Convert the tail byte to binary
    if tail_byte_hex:
        tail_byte_bin = bin(int(tail_byte_hex, 16))[2:].zfill(8)
    else:
        tail_byte_bin = ""
    
    return tail_byte_hex, tail_byte_bin

def determine_message_type(tail_byte_bin):
    if len(tail_byte_bin) == 8:
        start_of_message = tail_byte_bin[0] == '1'
        end_of_message = tail_byte_bin[1] == '1'
    else:
        start_of_message = False
        end_of_message = False
    
    return start_of_message, end_of_message

def extract_transfer_id(tail_byte_bin):
    if len(tail_byte_bin) == 8:
        transfer_id_bin = tail_byte_bin[3:8]
        transfer_id = int(transfer_id_bin, 2)
    else:
        transfer_id = None
    
    return transfer_id

def get_effective_data(row):
    hex_bytes = row['data'].split()
    
    if row['single_message_frame'] == True:
        effective_data = ' '.join(hex_bytes[:-1])  # Exclude the tail byte
    else:
        if row['start_of_message']:
            effective_data = ' '.join(hex_bytes[2:7])
            last_effective_transfer_id = row
        else:
            effective_data = ' '.join(hex_bytes[:-1])
    
    return effective_data

In [96]:
for file_path in xlsx_files:
    data = pd.read_excel(file_path)
    label_key = os.path.basename(file_path).split('.')[0]

    
    data[['tail_byte_hex', 'tail_byte_bin']] = data['data'].apply(lambda x: pd.Series(process_tail_byte(x)))
    data[['start_of_message', 'end_of_message']] = data['tail_byte_bin'].apply(lambda x: pd.Series(determine_message_type(x)))
    data['single_message_frame'] = data.apply(
        lambda row: True if row['start_of_message'] and row['end_of_message'] else False, axis=1
    )
    data['transfer_ID'] = data['tail_byte_bin'].apply(extract_transfer_id)
    data['effective_data'] = data.apply(get_effective_data, axis=1)

    output_file_path = os.path.join(data_decode_folder, f"{label_key}_final_decoded.xlsx")
    # print(output_file_path)
    # break
    data.to_excel(output_file_path, index=False)

    print(f"Data decoded saved to {output_file_path}")


Data decoded saved to stage2_data_decode/v2\type10_label_merged_final_decoded.xlsx
Data decoded saved to stage2_data_decode/v2\type1_label_merged_final_decoded.xlsx
Data decoded saved to stage2_data_decode/v2\type2_label_merged_final_decoded.xlsx
Data decoded saved to stage2_data_decode/v2\type3_label_merged_final_decoded.xlsx
Data decoded saved to stage2_data_decode/v2\type4_label_merged_final_decoded.xlsx
Data decoded saved to stage2_data_decode/v2\type5_label_merged_final_decoded.xlsx
Data decoded saved to stage2_data_decode/v2\type6_label_merged_final_decoded.xlsx
Data decoded saved to stage2_data_decode/v2\type7_label_merged_final_decoded.xlsx
Data decoded saved to stage2_data_decode/v2\type8_label_merged_final_decoded.xlsx
Data decoded saved to stage2_data_decode/v2\type9_label_merged_final_decoded.xlsx


In [54]:
os.getcwd()

'C:\\Users\\Reek\\Documents\\uavcan_V2'

# data clean

In [3]:
data_clean_folder = "stage3_data_cleaning/v2"
os.makedirs(data_clean_folder, exist_ok=True)

In [4]:
folder_path = data_decode_folder
xlsx_files = glob.glob(os.path.join(folder_path, '*.xlsx'))
xlsx_files

['stage2_data_decode/v2\\type10_label_merged_final_decoded.xlsx',
 'stage2_data_decode/v2\\type1_label_merged_final_decoded.xlsx',
 'stage2_data_decode/v2\\type2_label_merged_final_decoded.xlsx',
 'stage2_data_decode/v2\\type3_label_merged_final_decoded.xlsx',
 'stage2_data_decode/v2\\type4_label_merged_final_decoded.xlsx',
 'stage2_data_decode/v2\\type5_label_merged_final_decoded.xlsx',
 'stage2_data_decode/v2\\type6_label_merged_final_decoded.xlsx',
 'stage2_data_decode/v2\\type7_label_merged_final_decoded.xlsx',
 'stage2_data_decode/v2\\type8_label_merged_final_decoded.xlsx',
 'stage2_data_decode/v2\\type9_label_merged_final_decoded.xlsx']

In [6]:
def split_and_convert(effective_data):
    effective_data = str(effective_data)
    # Split the string by spaces
    hex_values = effective_data.split()
    # Convert hex to decimal
    decimal_values = [int(hex_val, 16) for hex_val in hex_values]
    return decimal_values

In [7]:
for file_path in xlsx_files:
    data = pd.read_excel(file_path)
    label_key = os.path.basename(file_path).split('.')[0]
    
    data = data.drop(columns=['interface','data', 'tail_byte_hex', 'tail_byte_bin','discriminator','lower_message_bit'])
    data = data.fillna(-99)
    data['label'] = data['label'].apply(lambda x: 0 if x == 'Normal' else 1)
    
    data = data.astype({col: int for col in data.select_dtypes(include=['bool']).columns})
    
    effective_data_split = data['effective_data'].apply(split_and_convert)
    effective_data_split_df = effective_data_split.apply(pd.Series)
    effective_data_split_df.columns = [f'effective_data_{i}' for i in range(effective_data_split_df.shape[1])]
    data = pd.concat([data, effective_data_split_df], axis=1)
    data = data.fillna(-199)
    data = data.drop(columns=['effective_data'])
    output_file_path = os.path.join(data_clean_folder, f"{label_key}_clean3.xlsx")
    data.to_excel(output_file_path, index=False)

    print(f"Data decoded saved to {output_file_path}")

Data decoded saved to stage3_data_cleaning/v2\type10_label_merged_final_decoded_clean3.xlsx
Data decoded saved to stage3_data_cleaning/v2\type1_label_merged_final_decoded_clean3.xlsx
Data decoded saved to stage3_data_cleaning/v2\type2_label_merged_final_decoded_clean3.xlsx
Data decoded saved to stage3_data_cleaning/v2\type3_label_merged_final_decoded_clean3.xlsx
Data decoded saved to stage3_data_cleaning/v2\type4_label_merged_final_decoded_clean3.xlsx
Data decoded saved to stage3_data_cleaning/v2\type5_label_merged_final_decoded_clean3.xlsx
Data decoded saved to stage3_data_cleaning/v2\type6_label_merged_final_decoded_clean3.xlsx
Data decoded saved to stage3_data_cleaning/v2\type7_label_merged_final_decoded_clean3.xlsx
Data decoded saved to stage3_data_cleaning/v2\type8_label_merged_final_decoded_clean3.xlsx
Data decoded saved to stage3_data_cleaning/v2\type9_label_merged_final_decoded_clean3.xlsx


In [99]:
file_path = xlsx_files[0]
df = pd.read_excel(file_path)
df.shape

(207380, 22)

In [100]:
df.head()

Unnamed: 0,label,timestamp,interface,can_id,data_length,data,source_node_id_decimal,service_flag,priority,discriminator,...,destination_node_id_decimal,request_or_response,service_type_id_decimal,tail_byte_hex,tail_byte_bin,start_of_message,end_of_message,single_message_frame,transfer_ID,effective_data
0,Normal,0.0,can0,5040601,8,A6 35 00 00 00 00 00 94,1,0,5,,...,,,,94,10010100,True,False,False,20,00 00 00 00 00
1,Normal,0.000139,can0,5040601,7,00 00 00 00 00 00 74,1,0,5,,...,,,,74,1110100,False,True,False,20,00 00 00 00 00 00
2,Normal,0.004482,can0,5040601,8,A6 35 00 00 00 00 00 95,1,0,5,,...,,,,95,10010101,True,False,False,21,00 00 00 00 00
3,Normal,0.004641,can0,5040601,7,00 00 00 00 00 00 75,1,0,5,,...,,,,75,1110101,False,True,False,21,00 00 00 00 00 00
4,Normal,0.00933,can0,5040601,8,A6 35 00 00 00 00 00 96,1,0,5,,...,,,,96,10010110,True,False,False,22,00 00 00 00 00


In [101]:
df.columns 

Index(['label', 'timestamp', 'interface', 'can_id', 'data_length', 'data',
       'source_node_id_decimal', 'service_flag', 'priority', 'discriminator',
       'lower_message_bit', 'message_type_decimal',
       'destination_node_id_decimal', 'request_or_response',
       'service_type_id_decimal', 'tail_byte_hex', 'tail_byte_bin',
       'start_of_message', 'end_of_message', 'single_message_frame',
       'transfer_ID', 'effective_data'],
      dtype='object')

In [134]:
data =df

In [135]:
data.shape

(207380, 22)

In [136]:
# data.head()

In [138]:
# Drop the specified columns
data = data.drop(columns=['interface', 'can_id','data', 'tail_byte_hex', 'tail_byte_bin','discriminator','lower_message_bit'])

data['label'] = data['label'].apply(lambda x: 0 if x == 'Normal' else 1)

data = data.astype({col: int for col in data.select_dtypes(include=['bool']).columns})

effective_data_split = data['effective_data'].apply(split_and_convert)
effective_data_split_df = effective_data_split.apply(pd.Series)
effective_data_split_df.columns = [f'effective_data_{i}' for i in range(effective_data_split_df.shape[1])]
data = pd.concat([data, effective_data_split_df], axis=1)
data = data.fillna(-99)
data = data.drop(columns=['effective_data'])
data.shape

(207380, 21)

In [140]:
data.head()

Unnamed: 0,label,timestamp,data_length,source_node_id_decimal,service_flag,priority,message_type_decimal,destination_node_id_decimal,request_or_response,service_type_id_decimal,...,end_of_message,single_message_frame,transfer_ID,effective_data_0,effective_data_1,effective_data_2,effective_data_3,effective_data_4,effective_data_5,effective_data_6
0,0,0.0,8,1,0,5,1030,-99.0,-99.0,-99.0,...,0,0,20,0.0,0.0,0.0,0.0,0.0,-99.0,-99.0
1,0,0.000139,7,1,0,5,1030,-99.0,-99.0,-99.0,...,1,0,20,0.0,0.0,0.0,0.0,0.0,0.0,-99.0
2,0,0.004482,8,1,0,5,1030,-99.0,-99.0,-99.0,...,0,0,21,0.0,0.0,0.0,0.0,0.0,-99.0,-99.0
3,0,0.004641,7,1,0,5,1030,-99.0,-99.0,-99.0,...,1,0,21,0.0,0.0,0.0,0.0,0.0,0.0,-99.0
4,0,0.00933,8,1,0,5,1030,-99.0,-99.0,-99.0,...,0,0,22,0.0,0.0,0.0,0.0,0.0,-99.0,-99.0


In [115]:
data['label'] = data['label'].apply(lambda x: 0 if x == 'Normal' else 1)

In [117]:
data_filled = data.astype({col: int for col in data.select_dtypes(include=['bool']).columns})

In [118]:
data_filled.head()

Unnamed: 0,label,timestamp,data_length,source_node_id_decimal,service_flag,priority,message_type_decimal,destination_node_id_decimal,request_or_response,service_type_id_decimal,start_of_message,end_of_message,single_message_frame,transfer_ID,effective_data
0,0,0.0,8,1,0,5,1030,-99.0,-99.0,-99.0,1,0,0,20,00 00 00 00 00
1,0,0.000139,7,1,0,5,1030,-99.0,-99.0,-99.0,0,1,0,20,00 00 00 00 00 00
2,0,0.004482,8,1,0,5,1030,-99.0,-99.0,-99.0,1,0,0,21,00 00 00 00 00
3,0,0.004641,7,1,0,5,1030,-99.0,-99.0,-99.0,0,1,0,21,00 00 00 00 00 00
4,0,0.00933,8,1,0,5,1030,-99.0,-99.0,-99.0,1,0,0,22,00 00 00 00 00


# data decode trial

In [34]:
file_path = xlsx_files[0]
data = pd.read_excel(file_path)
data.shape

(207380, 16)

In [39]:
data2=data

In [67]:
data=data2

In [68]:
data[['tail_byte_hex', 'tail_byte_bin']] = data['data'].apply(lambda x: pd.Series(process_tail_byte(x)))
data[['start_of_message', 'end_of_message']] = data['tail_byte_bin'].apply(lambda x: pd.Series(determine_message_type(x)))
data['single_message_frame'] = data.apply(
    lambda row: True if row['start_of_message'] and row['end_of_message'] else False, axis=1
)
data['transfer_ID'] = data['tail_byte_bin'].apply(extract_transfer_id)
data['effective_data'] = data.apply(get_effective_data, axis=1)
data.shape

(207380, 23)

In [69]:
data.head()

Unnamed: 0,label,timestamp,interface,can_id,data_length,data,source_node_id_decimal,service_flag,priority,discriminator,...,destination_node_id_decimal,request_or_response,service_type_id_decimal,tail_byte_hex,tail_byte_bin,start_of_message,end_of_message,single_message_frame,transfer_ID,effective_data
0,Normal,0.0,can0,5040601,8,A6 35 00 00 00 00 00 94,1,0,5,,...,,,,94,10010100,True,False,False,20,00 00 00 00 00
1,Normal,0.000139,can0,5040601,7,00 00 00 00 00 00 74,1,0,5,,...,,,,74,1110100,False,True,False,20,00 00 00 00 00 00
2,Normal,0.004482,can0,5040601,8,A6 35 00 00 00 00 00 95,1,0,5,,...,,,,95,10010101,True,False,False,21,00 00 00 00 00
3,Normal,0.004641,can0,5040601,7,00 00 00 00 00 00 75,1,0,5,,...,,,,75,1110101,False,True,False,21,00 00 00 00 00 00
4,Normal,0.00933,can0,5040601,8,A6 35 00 00 00 00 00 96,1,0,5,,...,,,,96,10010110,True,False,False,22,00 00 00 00 00


# canID Decode trial code

In [None]:
file_path = xlsx_files[0]
# file_path
df = pd.read_excel(file_path)
df.shape

In [None]:
df.head()

In [None]:
df.columns, df['can_id'].unique(), type

In [None]:
# new_df

In [None]:
unique_elements = df['can_id'].unique()
new_df = pd.DataFrame({'can_id': unique_elements})
new_df = new_df.dropna()
new_df['can_id_32_bit'] = new_df['can_id'].apply(hex_to_32bit_binary)
new_df['can_id_29_bit'] = new_df['can_id_32_bit'].apply(lambda x: x[-29:])

basic_info = new_df['can_id_29_bit'].apply(extract_basic_info)
new_df['source_node_id_decimal'] = basic_info.apply(lambda x: x[0])
new_df['service_flag'] = basic_info.apply(lambda x: x[1])
new_df['priority'] = basic_info.apply(lambda x: x[2])

non_service_messages = new_df[new_df['service_flag'] == '0']
non_service_message_info = non_service_messages.apply(
    lambda row: extract_non_service_message_info(row['can_id_29_bit'], row['source_node_id_decimal']),
    axis=1
    )
non_service_message_info
for key in list(non_service_message_info[0].keys()):
    new_df[key] = non_service_message_info.apply(lambda x: x[key])



service_messages = new_df[new_df['service_flag'] == '1']
service_message_info = service_messages.apply(
    lambda row: extract_service_message_info(row['can_id_29_bit']),
    axis=1
)


new_df.loc[service_messages.index, 'destination_node_id_decimal'] = service_message_info.apply(lambda x: x[1])
new_df.loc[service_messages.index, 'request_or_response'] = service_message_info.apply(lambda x: x[2])
new_df.loc[service_messages.index, 'service_type_id_decimal'] = service_message_info.apply(lambda x: x[4])

new_df.drop(columns=['can_id_32_bit'], inplace=True)
new_df.drop(columns=['can_id_29_bit'], inplace=True)

new_df

In [None]:
new_df

In [None]:
merged_df = pd.merge(parsed_df, new_df, on='can_id', how='left')
merged_df.shape

In [None]:
parsed_df.shape

In [None]:
new_df.loc[2],type(new_df.loc[2])

In [None]:
new_df.loc[2]['service_flag'] = '1'

In [None]:
new_df

In [None]:
# Extract service messages information
service_messages = new_df[new_df['service_flag'] == '1']
service_message_info = service_messages.apply(
    lambda row: extract_service_message_info(row['can_id_29_bit']),
    axis=1
)

# Add service message info to new_df
# new_df.loc[service_messages.index, 'destination_node_id_bits'] = service_message_info.apply(lambda x: x[0])
new_df.loc[service_messages.index, 'destination_node_id_decimal'] = service_message_info.apply(lambda x: x[1])
new_df.loc[service_messages.index, 'request_or_response'] = service_message_info.apply(lambda x: x[2])
# new_df.loc[service_messages.index, 'service_type_id_bits'] = service_message_info.apply(lambda x: x[3])
new_df.loc[service_messages.index, 'service_type_id_decimal'] = service_message_info.apply(lambda x: x[4])


new_df

In [None]:
list(non_service_message_info[0].keys())

In [None]:
non_service_messages = new_df[new_df['service_flag'] == '0']
non_service_messages
non_service_message_info = non_service_messages.apply(
    lambda row: extract_non_service_message_info(row['can_id_29_bit'], row['source_node_id_decimal']),
    axis=1
    )
non_service_message_info_df = pd.DataFrame(non_service_message_info.tolist(), index=non_service_messages.index)
non_service_message_info_df
merged_df = new_df.merge(non_service_message_info_df, left_index=True, right_index=True, how='left')
# merged_df
new_df