## Imports

### Python modules

In [2]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

## Data Processing

### Dataframe Creation

Read target csv to a Dataframe, ignore all bad lines. Python will tell me which lines are ignored for manual cleanup. Only seems to happen when boards were rebooted - no data loss, easy to fix.

In [3]:
data = pd.read_csv('../../SRAM3/1/half/nvRAM_data_02-06-2021-1720.csv', error_bad_lines=False)
log_length = data.shape[0]

### Identifying Runs

Create a list of every instance of M,A,WRCHCKBRD. This log line indicates the start of every new experiment run. Should probably make this a user input to this Python so that different experiment setups are easily accounted for.

Create a list of every instance of M,A,INIT. Indicates start of board power cycle.

This method is the easiest, but will label all INIT cycles as a separate experimental run. No loss of data occurs so this is ok.

In [4]:
wrchckbrd_locs = data.loc[(data['Board'] == 'M') & (data['Mikroe_socket'] == 'A') & (data['Status'] == 'WRCHCKBRD')].index.tolist()
wrchckbrd_index = [(i, 'WRCHCKBRD') for i in wrchckbrd_locs]
init_locs = data.loc[(data['Board'] == 'M') & (data['Mikroe_socket'] == 'A') & (data['Status'] == 'INIT')].index.tolist()
init_index = [(i, 'INIT') for i in init_locs]

Combine both lists, and append new list with final log line number.

In [5]:
run_index = wrchckbrd_index + init_index
run_index = sorted(run_index)
run_index.append((log_length, 'END'))

For each element h in c, if the status c[h][1] is WRCHCKBRD, label everything between its index c[h][0] and the next index c[h+1][0] in c with the same run ID. If the status is INIT, no labels. This ensures that only actual test sequences are associated as runs.

In [6]:
run_id = 1
run_col = ['']*log_length
for h in range(len(run_index)-1):
    if run_index[h][1] == 'WRCHCKBRD':
        start = run_index[h][0]
        end = run_index[h+1][0]
        run_col[start:end] = [run_id]*(end-start)
        run_id+=1

In [7]:
data.insert(loc=0, column='Run_ID', value=run_col)

### Neutron Logs

Get first/last log timestamps into datetime objects

In [8]:
exp_tformat = '%Y-%m-%d_%H-%M-%S-%f'
exp_firststamp = datetime.strptime(data.iloc[0]['Timestamp'], exp_tformat)
exp_laststamp = datetime.strptime(data.iloc[log_length-1]['Timestamp'], exp_tformat)

Get all neutron logs for the days that the experiment was running.

In [9]:
beamlog_fname_tformat = '%Y-%m-%d'
beamlog_firstday = datetime.strftime(exp_firststamp, beamlog_fname_tformat)
beamlog_lastday = datetime.strftime(exp_laststamp, beamlog_fname_tformat)
beamlog_day_dif = int(datetime.strftime(exp_laststamp, "%d")) - int(datetime.strftime(exp_firststamp, "%d"))
beamlog = pd.read_csv(f"../../../neutrons/countlog-{beamlog_firstday}.txt", delim_whitespace=True, header=None, skiprows=1)
beamlog_nextday = beamlog_firstday
for i in range(0, beamlog_day_dif):
    beamlog_nextday = datetime.strftime((exp_firststamp + timedelta(days=1)), beamlog_fname_tformat)
    beamlog_next = pd.read_csv(f"../../../neutrons/countlog-{beamlog_nextday}.txt", delim_whitespace=True, header=None, skiprows=1)
    beamlog_frames = [beamlog, beamlog_next]
    beamlog = pd.concat(beamlog_frames, ignore_index=True)

Remake neutron log column names

In [10]:
beamlog_new_colnames = ["Date", "HMS_time", "Millisecs", "Count1", "Count2", "Count3", "Count4", "protonCharge", "Beam_current"]
beamlog.columns = beamlog_new_colnames

Create list of datetime objects of all timestamps in beamlog

In [11]:
beamlog_tstamps_concat = (beamlog['Date'] + beamlog['HMS_time'] + beamlog['Millisecs'].apply(str)).tolist()
b_tstamps_concat_cln = []
for i in beamlog_tstamps_concat:
    if len(i) > 23:
        i = i[0:26]
    b_tstamps_concat_cln.append(i)

b_tstamps_concat_cln = [datetime.strptime(i, '%d/%m/%Y%H:%M:%S0.%f') for i in b_tstamps_concat_cln]

Function to check if beam was on between two experiment log times. Returns 1 if beam was on, 0 if not.

In [12]:
def beam_on(firsttime, lasttime):
    beamlog_stamp_tformat = '%d/%m/%Y %H:%M:%S'
    firsttime_obj = datetime.strptime(firsttime, exp_tformat)
    lasttime_obj = datetime.strptime(lasttime, exp_tformat) 
    nearest_beamtime_to_firsttime = min([i for i in b_tstamps_concat_cln if i <= firsttime_obj], key=lambda x: abs(x - firsttime_obj))
    nearest_beamtime_to_lasttime = min([i for i in b_tstamps_concat_cln if i >= lasttime_obj], key=lambda x: abs(x - lasttime_obj))
    firstrow = beamlog.loc[(beamlog['Date'] == datetime.strftime(nearest_beamtime_to_firsttime, '%d/%m/%Y')) & (beamlog['HMS_time'] == datetime.strftime(nearest_beamtime_to_firsttime, '%H:%M:%S'))]
    lastrow = beamlog.loc[(beamlog['Date'] == datetime.strftime(nearest_beamtime_to_lasttime, '%d/%m/%Y')) & (beamlog['HMS_time'] == datetime.strftime(nearest_beamtime_to_lasttime, '%H:%M:%S'))]
    count4_dif = lastrow.iloc[0]['Count4'] - firstrow.iloc[0]['Count4']
    num_rows = lastrow.index.astype(int)[0] - firstrow.index.astype(int)[0]
    cps = count4_dif / num_rows
    if cps > 1:
        return 1
    return 0

#beam_on('2021-06-02_17-35-03-568', '2021-06-02_17-35-24-588')

0

In [35]:
before_delay = data.loc[(data['Run_ID'] == 1)].loc[(data['Mikroe_socket'] == 'D') & (data['Status'] == 'STORE_OK')].index
after_delay_index = data.loc[(data['Run_ID']) == 1].loc[(data['Mikroe_socket'] == 'A') & (data['Status'] == 'VERIF')].index[0] + 1

data.iloc[after_delay_index]['Timestamp']

'2021-06-02_17-20-16-004319'

### Creating run objects

In [None]:
class Run:
    def __init__(self, name):
        self.name = name
        self.num = [int(s) for s in name.split('_') if s.isdigit()][0]
        self.df = data.loc[(data['Run_ID'] == self.num)]

    def delay(self):
        before_delay_i = self.df.loc[(self.df['Mikroe_socket'] == 'D') & (self.df['Status'] == 'STORE_OK')].index[0]
        after_delay_i  = self.df.loc[(self.df['Mikroe_socket']) == 'A' & (self.df['Status'] == 'VERIF')].index[0] + 1
        before_delay_t = self.df.iloc[before_delay_i]['Timestamp']
        after_delay_t  = self.df.iloc[after_delay_i]['Timestamp']
        before_delay_dt = datetime.strptime(before_delay_t, exp_tformat)
        after_delay_dt  = datetime.strptime(after_delay_t, exp_tformat)
        # get delay between dt objects, if less than a second delay = 0.1s, if less than 10 seconds, delay = 1s etc

    def error_tot(self):
        self.error_num = self.df['Status'].value_counts()['SDC']
        return(self.error_num)

In [None]:
run_names = []
for i in range(1, run_col[-1]+1):
    run_names.append(f"run_{i}")

In [None]:
runs = {}
for i in run_names:
    runs[i] = Run(i)

In [None]:
runs["run_79"].error_tot()

### How many bits tend to get flipped?

Get occurrences of each type of error

In [None]:
num_see = data.loc[(data['Status'] == 'SDC')].index
see = data['SDC_val']
see_types_hex = see.value_counts().index.tolist()
see_freqs = see.value_counts().tolist()
see_bins = list(zip(see_types_hex, see_freqs))

Use this for Hamming distance between errors and golden value:

In [None]:
xortest1 = 85
xortest2 = 170

bin(xortest1 ^ xortest2).count('1')

### Where do errors tend to happen?

In [None]:
see_loc = data['SDC_loc']
see_locs_hex = see_loc.value_counts().index.tolist()
see_loc_freqs = see_loc.value_counts().tolist()
see_locs_bins = list(zip(see_locs_hex, see_loc_freqs))