In [29]:
import os
import glob
import json
import pandas as pd

directory_path = './logs/dut1_ecc_bypass/logs_client'
file_pattern = 'Client_dut*'

# Use glob to find all files matching the pattern
file_paths = glob.glob(os.path.join(directory_path, file_pattern))

def parse_files(file_paths):
    frames = []
    for file_path in file_paths:
        with open(file_path, 'r') as file:
            for json_line in file:
                # Parse frame
                try:
                    frame = json.loads(json_line)
                except Exception as error:
                    print(f"Failed decoding JSON for file {file_path}: {error}")
                frames.append(frame)
    return frames

frames_original = sorted(parse_files(file_paths), key=lambda x: x["timestamp"])
df_original = pd.DataFrame(frames_original)

### Converting Unix timestamp to UTC

And droping any duplicates.... Why do we do this though?????

In [30]:
df_original['timestamp'] = pd.to_datetime(df_original['timestamp'], unit='s').dt.tz_localize('UTC')
df_original = df_original.drop_duplicates()
df_original

Unnamed: 0,message,type,id,timestamp,data,event
0,,Serial dut1_monitor,20,2023-12-06 15:33:10.100897280+00:00,aa013c000000000c28f01085d2294e1e00000000000000...,
1,,Serial dut1_monitor,20,2023-12-06 15:33:15.795449600+00:00,aa013c00000000d043ef10f7e22a501e00000000000000...,
2,,Serial dut1_monitor,20,2023-12-06 15:33:21.490476544+00:00,aa013c00000000d843ef1069f32b521e00000000000000...,
3,,Serial dut1_monitor,20,2023-12-06 15:33:27.192160768+00:00,aa013c00000000df43ef10db032d541e00000000000000...,
4,,Serial dut1_monitor,20,2023-12-06 15:33:32.887571968+00:00,aa013c00000000e243ef104d142e561e00000000000000...,
...,...,...,...,...,...,...
7891,,Serial dut1_monitor,20,2023-12-07 06:33:32.024371456+00:00,aa013c00000000ec43ef1026d8edd134040b0000000000...,
7892,,Serial dut1_monitor,20,2023-12-07 06:33:37.719296256+00:00,aa013c00000000e243ef1098e8eed333040b0000000000...,
7893,,Serial dut1_monitor,20,2023-12-07 06:33:43.413967616+00:00,aa013c00000000d943ef100af9efd533040b0000000000...,
7894,,Serial dut1_monitor,20,2023-12-07 06:33:49.115754752+00:00,aa013c00000000f543ef107c09f1d733040b0000000000...,


## Custom frame decoding

Create the dictionary that holds the frame formatting for a given frame ID. This will be custom to your specific frame. Find the documentation for each field in payload_parser. 

Also define the keys for each element.

In [31]:

frame_id_formatting = {
    "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB": 0,  # test frame
    "IIIIIIIIIIIIIII": 1,  # fini
    "IIIIII": 16,  # exception
}

keys = ['timestamp', 'total_errors', 'mcycle', 'minstret', 'imem_se', 'imem_de', 'dmem_se', 'dmem_de', 'regfile_se', 'regfile_de', 'iv', 'jump', 'branch', 'dsp_t', 'trap', 'illegal', 'event']
# keys = ['timestamp', 'total_errors', 'mcycle', 'minstret', 'ir_c', 'wait_ii', 'wait_if', 'wait_mc', 'load', 'store', 'wait_ls', 'branch', 'tbranch', 'imem_ecc', 'dmem_ecc', 'regfile', 'iv', 'event']


Parsing the fields based on frame type and then parsing the data tuple into readable fields

In [32]:
from payload_parser import decode_frame 

def event_row(row, data_frame, error_code=0):
    mapping = {num: 0 for num in keys}
    # print(row['timestamp'])
    mapping['timestamp'] = row['timestamp']
    # print(mapping['timestamp'])
    if error_code != 0:
        mapping['event'] = -1 ## COMMUNICATION ERROR..  or something
    else:
        mapping['event'] = row['event']

    return mapping
    

def parse_data(df):
    data_frame = []
    for index, row in df.iterrows():
        mapping = None
        if 'id' in row and row['id'] == 20:
            if 'data' in row and isinstance(row['data'], str) and row['data'] != '':
                try:
                    error_code, data_tup = decode_frame(bytes.fromhex(row['data']), frame_id_formatting)
                    
                    if error_code != 0:
                        # print(f"Error decoding frame: {error_code}")
                        mapping = event_row(row, data_frame, error_code)
                    else:
                        parsed_payload = (row['timestamp'],) + tuple(map(int, data_tup)) + (row['event'],)
                        mapping = {num: key for num, key in zip(keys, parsed_payload)}
                        # data_frame.append(mapping)
                except Exception as error:
                    # print(f"Parsing error! {error}")
                    mapping = event_row(row, data_frame)
            else:
                mapping = event_row(row, data_frame)
        else:
            mapping = event_row(row, data_frame)

        data_frame.append(mapping)

    data_frame = pd.DataFrame(data_frame)

    for column in data_frame.columns:
        if column != 'timestamp' or column != 'event':  # Ignore the 'timestamp' column
            # Convert columns to numeric, coercing errors to NaN
            data_frame[column] = pd.to_numeric(data_frame[column], errors='coerce')
            
            # Option 1: Fill NaN values with 0 and convert to int
            data_frame[column] = data_frame[column].fillna(0).astype(int)

    return data_frame

df_parsed = parse_data(df_original)
try:
    df_parsed['timestamp'] = pd.to_datetime(df_parsed['timestamp']).dt.tz_localize('UTC')
except Exception as error:
    print(f"Error converting timestamp to datetime: {error}")


In [33]:
# filtered = df_parsed[df_parsed['event'] != 0]
# filtered2 = filtered[filtered['event'] != -1]
# print(filtered2.loc[:, ['timestamp', 'event']])
# df_parsed

-------------

# Data Analysis

Using ```df_parsed``` as input

### total_errors

Filter for total errors and event transitions. This should give us a good idea of state transitions signifying SEEs.

In [34]:
# Ignore clean runs
filtered = df_parsed[(df_parsed['total_errors'] > 0) | (df_parsed['event'] != 0)]

# This line of code keeps only the rows where at least one of these four values is different from the previous row's value.
filtered = filtered[~((filtered['total_errors'] == filtered['total_errors'].shift()) & (filtered['mcycle'] == filtered['mcycle'].shift()) & (filtered['minstret'] == filtered['minstret'].shift()) & (filtered['event'] == filtered['event'].shift()))]

# This line of code keeps only the rows where the total_errors value is different from the previous row's value.
filtered2 = filtered[~((filtered['total_errors'] == filtered['total_errors'].shift()))]

# This line of code keeps only the rows where the total_errors value is greater than 0.
filtered3 = filtered2[(filtered2['total_errors'] > 0)]
filtered3

Unnamed: 0,timestamp,total_errors,mcycle,minstret,imem_se,imem_de,dmem_se,dmem_de,regfile_se,regfile_de,iv,jump,branch,dsp_t,trap,illegal,event
