In [25]:
import os
import glob
import json
import pandas as pd
# pd.set_option('display.max_rows', None)

directory_path = './logs/dut0_original/logs_client'
file_pattern = 'Client_dut*'

# Use glob to find all files matching the pattern
file_paths = glob.glob(os.path.join(directory_path, file_pattern))

def parse_files(file_paths):
    frames = []
    for file_path in file_paths:
        with open(file_path, 'r') as file:
            for json_line in file:
                # Parse frame
                try:
                    frame = json.loads(json_line)
                except Exception as error:
                    print(f"Failed decoding JSON for file {file_path}: {error}")
                frames.append(frame)
    return frames

frames_original = sorted(parse_files(file_paths), key=lambda x: x["timestamp"])
df_original = pd.DataFrame(frames_original)


### Converting Unix timestamp to UTC

And droping any duplicates.... Why do we do this though?????

In [26]:
df_original['timestamp'] = pd.to_datetime(df_original['timestamp'], unit='s').dt.tz_localize('UTC')
df_original = df_original.drop_duplicates()

## Custom frame decoding

Create the dictionary that holds the frame formatting for a given frame ID. This will be custom to your specific frame. Find the documentation for each field in payload_parser. 

Also define the keys for each element.

In [27]:

frame_id_formatting = {
    "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB": 0,  # test frame
    "IIIIIIIIIIIIIII": 1,  # fini
    "IIIIII": 16,  # exception
}

keys = ['timestamp', 'total_errors', 'mcycle', 'minstret', 'imem_se', 'imem_de', 'dmem_se', 'dmem_de', 'regfile_se', 'regfile_de', 'iv', 'jump', 'branch', 'dsp_t', 'trap', 'illegal']
# keys = ['timestamp', 'cy_c', 'tm_c', 'ir_c', 'wait_ii', 'wait_if', 'wait_mc', 'load', 'store', 'wait_ls', 'branch', 'tbranch', 'imem_ecc', 'dmem_ecc', 'regfile', 'iv']


Parsing the fields based on frame type and then parsing the data tuple into readable fields

In [28]:
# from payload_parser import decode_frame 


# def parse_data(df):
#     data_frame = []
#     for index, row in df.iterrows():
#         if 'data' in row and isinstance(row['data'], str) and row['data'] != '':
#             try:
#                 data_tup = decode_frame(bytes.fromhex(row['data']), frame_id_formatting)
#                 parsed_payload = (row['timestamp'],) + tuple(map(int, data_tup))  # Convert elements to int
#                 mapping = {num: key for num, key in zip(keys, parsed_payload)}
#                 data_frame.append(mapping)
#             except Exception as error:
#                 print(f"Parsing error! {error}")
#                 data_frame.append(row.to_dict())
#         else:
#             data_frame.append(row.to_dict())

#     return pd.DataFrame(data_frame)

# df_parsed = parse_data(df_original)

# for column in df_parsed.columns:
#     if column != 'timestamp':  # Ignore the 'timestamp' column
#         # Convert columns to numeric, coercing errors to NaN
#         df_parsed[column] = pd.to_numeric(df_parsed[column], errors='coerce')
        
#         # Option 1: Fill NaN values with 0 and convert to int
#         df_parsed[column] = df_parsed[column].fillna(0).astype(int)


In [29]:
from payload_parser import decode_frame 

def event_row(row, data_frame, error_code=0):
    mapping = {num: 0 for num in keys}
    # print(row['timestamp'])
    mapping['timestamp'] = row['timestamp']
    # print(mapping['timestamp'])
    if error_code != 0:
        mapping['event'] = -1 ## COMMUNICATION ERROR..  or something
    else:
        mapping['event'] = row['event']

    return mapping
    

def parse_data(df):
    data_frame = []
    for index, row in df.iterrows():
        mapping = None
        if 'id' in row and row['id'] == 20:
            if 'data' in row and isinstance(row['data'], str) and row['data'] != '':
                try:
                    error_code, data_tup = decode_frame(bytes.fromhex(row['data']), frame_id_formatting)
                    
                    if error_code != 0:
                        # print(f"Error decoding frame: {error_code}")
                        mapping = event_row(row, data_frame, error_code)
                    else:
                        parsed_payload = (row['timestamp'],) + tuple(map(int, data_tup)) + (row['event'],)
                        mapping = {num: key for num, key in zip(keys, parsed_payload)}
                        # data_frame.append(mapping)
                except Exception as error:
                    # print(f"Parsing error! {error}")
                    mapping = event_row(row, data_frame)
            else:
                mapping = event_row(row, data_frame)
        else:
            mapping = event_row(row, data_frame)

        data_frame.append(mapping)

    data_frame = pd.DataFrame(data_frame)

    for column in data_frame.columns:
        if column != 'timestamp' or column != 'event':  # Ignore the 'timestamp' column
            # Convert columns to numeric, coercing errors to NaN
            data_frame[column] = pd.to_numeric(data_frame[column], errors='coerce')
            
            # Option 1: Fill NaN values with 0 and convert to int
            data_frame[column] = data_frame[column].fillna(0).astype(int)

    return data_frame

df_parsed = parse_data(df_original)
try:
    df_parsed['timestamp'] = pd.to_datetime(df_parsed['timestamp']).dt.tz_localize('UTC')
except Exception as error:
    print(f"Error converting timestamp to datetime: {error}")


In [36]:
df_original['id']

Unnamed: 0,message,type,id,timestamp,data,event
0,,Serial dut0_monitor,20,2023-12-06 17:40:11.329293056+00:00,aa013c00000000e243ef1056ce386a0000000000000000...,
1,,Serial dut0_monitor,20,2023-12-06 17:40:17.023052032+00:00,aa013c00000000d543ef10c8de396c0000000000000000...,
2,,Serial dut0_monitor,20,2023-12-06 17:40:22.718929664+00:00,aa013c00000000de43ef103aef3a6e0000000000000000...,
3,,Serial dut0_monitor,20,2023-12-06 17:40:28.420879360+00:00,aa013c00000000d043ef10acff3b700000000000000000...,
4,,Serial dut0_monitor,20,2023-12-06 17:40:34.115907840+00:00,aa013c00000000df43ef101e103d720000000000000000...,
5,,Serial dut0_monitor,20,2023-12-06 17:40:39.810676992+00:00,aa013c00000000f927f01025363e740000000000000000...,
6,,Serial dut0_monitor,20,2023-12-06 17:40:45.513253376+00:00,aa013c00000000cf43ef1097463f760000000000000000...,
7,,Serial dut0_monitor,20,2023-12-06 17:40:51.207973376+00:00,aa013c00000000d143ef10095740780000000000000000...,
8,,Serial dut0_monitor,20,2023-12-06 17:40:56.902738176+00:00,aa013c00000000d943ef107b67417a0000000000000000...,
9,,Serial dut0_monitor,20,2023-12-06 17:41:02.598188032+00:00,aa013c00000000e043ef10ed77427c0000000000000000...,


-------------

# Data Analysis

Using ```df_parsed``` as input

### DSP timeouts

In [31]:
filtered = df_parsed[df_parsed['dsp_t'] > 0]
filtered

Unnamed: 0,timestamp,total_errors,mcycle,minstret,imem_se,imem_de,dmem_se,dmem_de,regfile_se,regfile_de,iv,jump,branch,dsp_t,trap,illegal,event
2380,2023-12-06 23:21:52.076274688+00:00,0,284115935,387857734,31016,0,0,0,0,0,0,1062528,2449585,1,3,0,0


### IV detections

In [32]:
filtered = df_parsed[df_parsed['iv'] > 0]
filtered

Unnamed: 0,timestamp,total_errors,mcycle,minstret,imem_se,imem_de,dmem_se,dmem_de,regfile_se,regfile_de,iv,jump,branch,dsp_t,trap,illegal,event


#### IV FPR higher than 2%

In [33]:
# scaler = MinMaxScaler()
# df['imem_se'] = scaler.fit_transform(df[['imem_se']])
# df['iv'] = scaler.fit_transform(df[['iv']])
# df_no_duplicates = df.drop_duplicates(subset='iv', keep='first')
# fpr_violation_condition = df_no_duplicates['iv'] < (0.98*df_no_duplicates['imem_se'])
# fpr_violation_condition

### Double bit errors

In [34]:
df_no_duplicates = df_parsed.drop_duplicates(subset='imem_de', keep='first')
df_no_duplicates

Unnamed: 0,timestamp,total_errors,mcycle,minstret,imem_se,imem_de,dmem_se,dmem_de,regfile_se,regfile_de,iv,jump,branch,dsp_t,trap,illegal,event
0,2023-12-06 17:40:11.329293056+00:00,0,284115938,1782107734,0,0,0,0,0,0,0,1062528,2449585,0,3,0,0
6350,2023-12-07 05:39:14.818258432+00:00,6272,2,255,2002,4276371331,0,0,0,0,0,0,0,0,0,0,0


In [35]:
fpr_violation_condition = df_parsed['iv'] > (0.98*df['imem_se'])
# df[fpr_violation_condition]

NameError: name 'df' is not defined

### Filtering based on SmartFusion2 reset. Excluding DMEM and Register file because of scrubbing and overwriting

In [None]:
mask = (df_parsed['imem_se'] < df_parsed['imem_se'].shift()-10) | (df_parsed['imem_de'] < df_parsed['imem_de'].shift()-10)
resets = df_parsed[mask | mask.shift(-1)]
pd.concat([df_parsed.head(1), resets])

Unnamed: 0,timestamp,total_errors,mcycle,minstret,imem_se,imem_de,dmem_se,dmem_de,regfile_se,regfile_de,...,jump,branch,dsp_t,trap,illegal,message,type,id,data,event
0,2023-12-06 17:40:11.329293056+00:00,0,284115938,1782107734,0,0,0,0,0,0,...,1062528,2449585,0,3,0,0,0,0,0,0
2109,2023-12-06 22:55:34.892076544+00:00,0,284115940,1814223254,313862,0,0,0,0,0,...,1062528,2449585,0,3,0,0,0,0,0,0
2110,2023-12-06 22:55:46.892874240+00:00,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,19,0,0
4235,2023-12-07 02:17:53.791093760+00:00,0,283940681,2605629230,440060,0,0,0,0,0,...,1062483,2444233,0,0,0,0,0,0,0,0
4236,2023-12-07 02:18:05.792368896+00:00,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,19,0,0
6349,2023-12-07 05:39:09.316381440+00:00,0,284115941,2228648044,469169,0,0,0,59284,0,...,1062528,2449585,0,3,0,0,0,0,0,0
6350,2023-12-07 05:39:14.818258432+00:00,6272,2,255,2002,4276371331,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6351,2023-12-07 05:39:26.819175680+00:00,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,19,0,0
6919,2023-12-07 06:33:50.395201280+00:00,0,284115954,1818088411,239166,0,0,0,0,0,...,1062528,2449585,0,3,0,0,0,0,0,0
6920,2023-12-07 08:42:16.060073216+00:00,0,284115933,235373446,0,0,0,0,0,0,...,1062528,2449585,0,3,0,0,0,0,0,0


### Multiple explanations for differences between IMEM and instruction validator detections
#### Significant less detections by IV compared to IMEM

#### Significant more detections by IV compared to IMEM