In [10]:
import os
import glob
import json
import struct
from datetime import datetime, timezone
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
pd.set_option('display.max_rows', None)

directory_path_original = './logs/dut0_original/logs_client'
directory_path_ecc_bypass = './logs/dut0_ecc_bypass/logs_client'
directory_path_iv_tee = './logs/dut0_iv_tee/logs_client'
file_pattern = 'Client_dut*'

# Use glob to find all files matching the pattern
file_paths_original = glob.glob(os.path.join(directory_path_original, file_pattern))
file_paths_ecc_bypass = glob.glob(os.path.join(directory_path_ecc_bypass, file_pattern))
file_paths_iv_tee = glob.glob(os.path.join(directory_path_iv_tee, file_pattern))

## Create the frame_id_formatting

Create the dictionary that holds the frame formatting for a given frame ID. This will be custom to your specific frame. Find the documentation for each field in payload_parser.

In [11]:

frame_id_formatting = {
    "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB": 0,  # test frame
    "IIIIIIIIIIIIIII": 1,  # fini
    "IIIIII": 16,  # exception
}

keys = ['timestamp', 'total_errors', 'mcycle', 'minstret', 'imem_se', 'imem_de', 'dmem_se', 'dmem_de', 'regfile_se', 'regfile_de', 'iv', 'jump', 'branch', 'dsp_t', 'trap', 'illegal']
# keys = ['timestamp', 'cy_c', 'tm_c', 'ir_c', 'wait_ii', 'wait_if', 'wait_mc', 'load', 'store', 'wait_ls', 'branch', 'tbranch', 'imem_ecc', 'dmem_ecc', 'regfile', 'iv']


Recursively checking all the log files and sorting on Unix timestamp

In [12]:
def parse_files(file_paths):
    frames = []
    for file_path in file_paths:
        with open(file_path, 'r') as file:
            for json_line in file:
                # Parse frame
                try:
                    frame = json.loads(json_line)
                except Exception as error:
                    print(f"Failed decoding JSON for file {file_path}: {error}")
                frames.append(frame)
    return frames

Constructing frames

In [13]:
frames_original = sorted(parse_files(file_paths_original), key=lambda x: x["timestamp"])
frames_ecc_bypass = sorted(parse_files(file_paths_ecc_bypass), key=lambda x: x["timestamp"])
frames_iv_tee = sorted(parse_files(file_paths_iv_tee), key=lambda x: x["timestamp"])
df_original = pd.DataFrame(frames_original)
df_ecc_bypass = pd.DataFrame(frames_ecc_bypass)
df_iv_tee = pd.DataFrame(frames_iv_tee)

Frame helper functions

Converting Unix timestamp to UTC

In [14]:
# def convert_unix_to_utc(pd):
#     crc_errors = 0
#     frame_parsing_errors = 0
#     for frame in frames:
#         frame['timestamp'] = str(datetime.utcfromtimestamp(float(frame['timestamp'])).replace(tzinfo=timezone.utc).strftime('%Y-%m-%d %H:%M:%S.%f UTC'))
#         if 'data' in frame:
#             try:
#                 decoded_frame = decode_frame(bytes.fromhex(frame['data']))
#                 #CRC check failed
#                 if decoded_frame == None:
#                     converted_frames.append({'timestamp': frame['timestamp'], 'error': "CRC check error"})
#                     crc_errors += 1
#                 # Exception
#                 elif len(decoded_frame) < 15:
#                     converted_frames.append({'timestamp': frame['timestamp'], 'error': f"Frame parsing error. Data field too small: {decode_frame}"})
#                 # Data correct
#                 else:
#                     frame['data'] = decoded_frame
#                     converted_frames.append(frame)
#             except Exception as error:
#                 frame_parsing_errors += 1
#                 converted_frames.append({'timestamp': frame['timestamp'], 'error': "Frame parsing error (possibly due to comm failure)"})
#                 #print(f"Parsing error! {error}")
#         elif 'event' in frame:
#             converted_frames.append(frame)
#     return converted_frames, crc_errors, frame_parsing_errors

df_original['timestamp'] = pd.to_datetime(df_original['timestamp'], unit='s').dt.tz_localize('UTC')
df_ecc_bypass['timestamp'] = pd.to_datetime(df_ecc_bypass['timestamp'], unit='s').dt.tz_localize('UTC')
df_iv_tee['timestamp'] = pd.to_datetime(df_iv_tee['timestamp'], unit='s').dt.tz_localize('UTC')

Filtering out duplicate entries

In [15]:
df_original = df_original.drop_duplicates()
df_ecc_bypass = df_ecc_bypass.drop_duplicates()
df_iv_tee = df_iv_tee.drop_duplicates()

Parsing the fields based on frame type

In [16]:
from payload_parser import decode_frame 

def data_dict(data_hex):
    try:
        data_tup = decode_frame(bytes.fromhex(data_hex), frame_id_formatting)
        return data_tup
    except Exception as error:
        print(f"Parsing error! {error}")
        return data_hex 
    
def parse_data(df):
    df_copy = df.copy()

    # Apply the function only to rows where 'data' is not empty
    mask = df_copy['data'].notnull() & (df_copy['data'] != '')

    df_copy.loc[mask, 'data'] = df_copy.loc[mask, 'data'].apply(data_dict)

    return df_copy

df_parsed = parse_data(df_original)

Parsing error! non-hexadecimal number found in fromhex() arg at position 0
Parsing error! non-hexadecimal number found in fromhex() arg at position 0
Parsing error! non-hexadecimal number found in fromhex() arg at position 0
Parsing error! non-hexadecimal number found in fromhex() arg at position 0
Parsing error! non-hexadecimal number found in fromhex() arg at position 0
Parsing error! non-hexadecimal number found in fromhex() arg at position 0
Parsing error! non-hexadecimal number found in fromhex() arg at position 0
Parsing error! non-hexadecimal number found in fromhex() arg at position 0


Parsing the data tuple into readable fields

In [28]:
data_frame = []

for index, row in df_parsed.iterrows():
    if 'data' in row and isinstance(row['data'], tuple):  # Fix: Check if row['data'] is a tuple
        parsed_payload = (row['timestamp'],) + tuple(row['data'])
        mapping = {num: key for num, key in zip(keys, parsed_payload)}
        data_frame.append(mapping)

df = pd.DataFrame(data_frame)

# DSP timeouts

In [29]:
filtered = df[df['dsp_t'] > 0]
filtered

Unnamed: 0,timestamp,total_errors,mcycle,minstret,imem_se,imem_de,dmem_se,dmem_de,regfile_se,regfile_de,iv,jump,branch,dsp_t,trap,illegal
2376,2023-12-06 23:21:52.076274688+00:00,0,284115935,387857734,31016,0,0,0.0,0.0,0.0,0.0,1062528.0,2449585.0,1.0,3.0,0.0


# IV detections

In [31]:
filtered = df[df['iv'] > 0]
filtered

Unnamed: 0,timestamp,total_errors,mcycle,minstret,imem_se,imem_de,dmem_se,dmem_de,regfile_se,regfile_de,iv,jump,branch,dsp_t,trap,illegal


# IV FPR higher than 2%

In [None]:
# scaler = MinMaxScaler()
# df['imem_se'] = scaler.fit_transform(df[['imem_se']])
# df['iv'] = scaler.fit_transform(df[['iv']])
# df_no_duplicates = df.drop_duplicates(subset='iv', keep='first')
# fpr_violation_condition = df_no_duplicates['iv'] < (0.98*df_no_duplicates['imem_se'])
# fpr_violation_condition

# Double bit errors

In [32]:
df_no_duplicates = df.drop_duplicates(subset='imem_de', keep='first')
df_no_duplicates

Unnamed: 0,timestamp,total_errors,mcycle,minstret,imem_se,imem_de,dmem_se,dmem_de,regfile_se,regfile_de,iv,jump,branch,dsp_t,trap,illegal
0,2023-12-06 17:40:11.329293056+00:00,0,284115938,1782107734,0,0,0,0.0,0.0,0.0,0.0,1062528.0,2449585.0,0.0,3.0,0.0
6342,2023-12-07 05:39:14.818258432+00:00,6272,2,255,2002,4276371331,0,,,,,,,,,


In [None]:
fpr_violation_condition = df['iv'] > (0.98*df['imem_se'])
# df[fpr_violation_condition]

# Filtering based on SmartFusion2 reset. Excluding DMEM and Register file because of scrubbing and overwriting

In [None]:
mask = (df['imem_se'] < df['imem_se'].shift()-10) | (df['imem_de'] < df['imem_de'].shift()-10)
resets = df[mask | mask.shift(-1)]
pd.concat([df.head(1), resets])

# Multiple explanations for differences between IMEM and instruction validator detections
## Significant less detections by IV compared to IMEM

## Significant more detections by IV compared to IMEM