In [10]:
#import ipdb # alternative to pdb that works in jupyter notebook (pip3 install ipdb)
import os, subprocess, sys, re, time
from pathlib import Path
from pynq import Overlay
#from pynq import GPIO
from pynq import allocate
import math
import pandas as pd

from dma_receiver import DmaReceiver
from bram_loader import Bram_Loader
from continuous_monitoring_system_controller import ContinuousMonitoringSystemController
from riscv_instruction_decoder import get_riscv_instruction_name

BASE_DIR = Path('/home/xilinx/design_files')
OUTPUT_DIR = Path('/home/xilinx/output_files')
if not os.path.exists(OUTPUT_DIR):
    os.mkdir(OUTPUT_DIR)
    
PATH = BASE_DIR 

base = Overlay(str(BASE_DIR / 'imported_design.bit'))

bram_loader = Bram_Loader(base.bram_loader.axi_gpio_2)

# the long name is because of using hierarchy in Vivado block design
cms_ctrl_axi_gpio = base.continuous_monitoring_system_blocks.axi_gpio_to_cms_ctrl_interface.axi_gpio_cms_ctrl.channel1    
cms_ctrl = ContinuousMonitoringSystemController(cms_ctrl_axi_gpio)



ITEM_BYTE_SIZE = 8
FIFO_SIZE = 32768
# +4 because DMA seems to have it's own buffer it fills before dma.recvchannel.transfer is even called
buffer_length = min( base.continuous_monitoring_system_blocks.axi_dma_0.buffer_max_size // ITEM_BYTE_SIZE, FIFO_SIZE)# + 4) 

print('buffer_length =', buffer_length)
input_buffer = allocate(shape=(buffer_length,), dtype='u8')
input_buffer_2 = allocate(shape=(buffer_length,), dtype='u8')

dma_rec = base.continuous_monitoring_system_blocks.axi_dma_0.recvchannel

# https://pynq.readthedocs.io/en/v2.7.0/_modules/pynq/lib/axigpio.html
gpio_rst_n_out = base.axi_gpio_0.channel1[0]
gpio_rst_n_console_input = base.axi_gpio_0.channel1[1]
gpio_rst_n_console_output = base.axi_gpio_0.channel1[2]
#gpio_en_cpu_reset_server_request_put_out = base.axi_gpio_0.channel1[1]
gpio_pc_stream_m_axis_tlast_interval = base.axi_gpio_1.channel1

gpio_fifo_wr_count = base.axi_gpio_0.channel2[0:16]
gpio_fifo_rd_count = base.axi_gpio_0.channel2[16:32]

PERFORMANCE_EVENTS_FNAME = 'performance_event_names_selected.csv'
with open(PERFORMANCE_EVENTS_FNAME) as f:    
    PERFORMANCE_EVENTS_COUNT = len(f.readlines()) - 1
print(PERFORMANCE_EVENTS_COUNT)
PERFORMANCE_COUNTER_WIDTH = 7
PERFORMANCE_COUNTERS_OVERFLOW_MAP_WIDTH = PERFORMANCE_EVENTS_COUNT
PC_WIDTH = 64
INSTR_WIDTH = 32
CLK_COUNTER_WIDTH = 64
AXI_DATA_WIDTH = 512

def print_dma_channel_status(channel):
    print('dma.running =', channel.running)
    print('dma.idle =', channel.idle)
    print('dma.error =', channel.error)
    print('status =', hex(channel._mmio.read(channel._offset + 4)))
    
def reset_console_input(delay=0.001):
    gpio_rst_n_console_input.write(0)
    time.sleep(delay)
    gpio_rst_n_console_input.write(1)

def reset_console_output(delay=0.001):
    gpio_rst_n_console_output.write(0)
    time.sleep(delay)
    gpio_rst_n_console_output.write(1)
    
def reset_cpu(delay=0.001):
    ''' AXI GPIO controlled reset, active-low. '''
    #gpio_en_cpu_reset_server_request_put_out.write(0)
    reset_console_output()
    gpio_rst_n_out.write(0)
    time.sleep(delay)
    gpio_rst_n_out.write(1)
    time.sleep(delay)
    #gpio_en_cpu_reset_server_request_put_out.write(1)
    #time.sleep(delay)
    #gpio_en_cpu_reset_server_request_put_out.write(0)
    #time.sleep(delay)
    

def print_fifo_data_counts():
    print('gpio_fifo_wr_count =', gpio_fifo_wr_count.read())
    print('gpio_fifo_rd_count =', gpio_fifo_rd_count.read())
    
# def set_pc_stream_tlast_interval(items_count):
#     ''' Sets 32-bit value specifying how many items can 
#     arrive by a single dma.recvchannel.tranfer(ib) call. '''
#     gpio_pc_stream_m_axis_tlast_interval[0:32].write(items_count)

def console_send(s, end_byte=None):
    ''' Uses AXI GPIO and hardware FIFOs. '''
    console_input = base.console_io.axi_gpio_3.channel1[0:8]
    console_write_enable = base.console_io.axi_gpio_3.channel1[8] # using "edge_detector" to avoid continuous writing
    console_write_enable.off()
    for c in s:
        console_input.write(ord(c))
        console_write_enable.on()
        console_write_enable.off()
    if end_byte is not None:
        console_input.write(end_byte)
        console_write_enable.on()
        console_write_enable.off()
    
def console_data_available():
    console_output_empty = base.console_io.axi_gpio_3.channel2[8]
    return console_output_empty.read() == 0

def console_read():
    ''' Uses AXI GPIO and hardware FIFOs. '''
    s = ''
    console_output = base.console_io.axi_gpio_3.channel2[0:8]
    console_read_enable = base.console_io.axi_gpio_3.channel1[9] # using "edge_detector" to avoid continuous reading
    console_read_enable.off()
    while console_data_available():
        s += chr(console_output.read())
        console_read_enable.on()
        console_read_enable.off()
    return s

def instr_to_strings(instructions_integers):
    ''' Requires riscv-python-model installed.
    If network connection is available, "python3 -m pip install riscv-model.
    If not, then on separate machine with internet:
        python3 -m pip download riscv-model -d .  
    Then copy the downloaded .whl file to pynq and install with:
        python3 -m pip install <file.whl> -f ./ --no-index   
    Usage:
        instr_to_string([0xB60006F, 0xFE0791E3])
        '''
    instructions_string = ' 0x'.join(f'{ii:08X}' for ii in instructions_integers)
    return os.popen(f'riscv-machinsn-decode hexstring {instructions_string}').read().strip().split('\n')


####################################################################
# 

# def read_performance_event_names(f_name='performance_event_names.txt'):
#     ''' Reads events names from file, these were collected from CHERI-Flute source code by using this script:
#     https://github.com/michalmonday/Flute/blob/continuous_monitoring/builds/RV64ACDFIMSUxCHERI_Flute_verilator/vcd/read_vcd.py
#     '''
#     with open(f_name) as f:
#         return [line.strip() for line in f.readlines()]

def read_performance_event_names(f_name):
    ''' Reads events names from file, these were collected from CHERI-Flute source code by using this script:
    https://github.com/michalmonday/Flute/blob/continuous_monitoring/builds/RV64ACDFIMSUxCHERI_Flute_verilator/vcd/read_vcd.py
    '''
    with open(f_name) as f:
        return [line.strip().split(',')[2] for line in f.readlines()[1:]]


def pop_n_bits_value(val, n):
    ''' pop_n_bits_value(0xFFFF, 4) returns: (0xFFF, 0xF) '''
    bits_value = val & ((1<<n)-1)
    return val >> n, bits_value

def parse_fifo_item(fifo_item):
    ''' Parses a single fifo item (e.g. 1024 bits) numerical value. 
        Single fifo item = {59bits padding, performance_counters805(7bits*115counters), instr32, clk_counter_delta64, pc64}
        Padding is used because only power of 2s can be used as size in fifo generator block (or axi in general?)'''
    perf_counters = []
    for i in range(PERFORMANCE_EVENTS_COUNT):
        fifo_item, perf_counter = pop_n_bits_value(fifo_item, PERFORMANCE_COUNTER_WIDTH)
        perf_counters.append(perf_counter)
    fifo_item, perf_counters_overflow_map = pop_n_bits_value(fifo_item, PERFORMANCE_COUNTERS_OVERFLOW_MAP_WIDTH)
    fifo_item, pc = pop_n_bits_value(fifo_item, PC_WIDTH)
    fifo_item, clk_counter = pop_n_bits_value(fifo_item, CLK_COUNTER_WIDTH)
    fifo_item, instr = pop_n_bits_value(fifo_item, INSTR_WIDTH)
    return perf_counters, perf_counters_overflow_map, pc, clk_counter, instr

def get_dma_transfer(input_buffer, dma_rec, dont_wait=False):
    ''' Returns the number of transferred items, each having 512 bits. '''
    
    # workaround is used because 2 WFI instructions are sent in a row
    # both having tlast=HIGH, so if the whole transfer equals to a single 
    # WFI instruction then it's ignored.
    received_only_wfi = True
    while received_only_wfi:
        dma_rec.transfer(input_buffer)
        if not dont_wait:
            dma_rec.wait() # depends on tlast
        items_transferred = math.floor(dma_rec.transferred * 64 / AXI_DATA_WIDTH / 8)
        if dont_wait:
            return items_transferred
        if items_transferred != 1:
            received_only_wfi = False
        else:
            print('Ignoring a single received instruction (most likely WFI)')
    print(f'items_transferred = {items_transferred}')
    return items_transferred

def parse_last_dma_transfer(input_buffer, items_transferred):
    pcs = []
    instrs = []
    instr_names = []
    clk_counters = []
    events = []
    events_overflows = []
    for i in range(items_transferred):
        chunks_per_item = math.ceil(AXI_DATA_WIDTH/64)
        start = chunks_per_item * i
        end = start + chunks_per_item
        fifo_item = int.from_bytes(bytes(input_buffer[start:end]), byteorder='little')
        perf_counters, perf_counters_overflow_map, pc, clk_counter, instr = parse_fifo_item(fifo_item)
        events.append(perf_counters)
        events_overflows.append(perf_counters_overflow_map)
        pcs.append(pc)
        clk_counters.append(clk_counter)
        instrs.append(instr)
        
        instr_names.append( get_riscv_instruction_name(instr) )
    instr_strings = instr_to_strings(instrs)
    return events, events_overflows, pcs, clk_counters, instrs, instr_names, instr_strings


def setup_cms(cms_ctrl):
    # Triggerring (exact address must match to start/stop trace)
    cms_ctrl.set_trigger_trace_start_address(0x1000)
    cms_ctrl.set_trigger_trace_end_address(0x80000106)  
    cms_ctrl.set_trigger_trace_start_address_enabled(False)
    cms_ctrl.set_trigger_trace_end_address_enabled(False)

    # Filtering (any address between lower bound and upper bound will be collected)
    cms_ctrl.set_monitored_address_range_lower_bound(0x0FFF)     #(0x80000000)
    cms_ctrl.set_monitored_address_range_upper_bound(0x800000FF)
    cms_ctrl.set_monitored_address_range_lower_bound_enabled(False)
    cms_ctrl.set_monitored_address_range_upper_bound_enabled(False)
    
    # Allow further trace collection if last traced program used "wfi"
    # (wait for interrupt) instruction which stops the trace.
    cms_ctrl.reset_wfi_wait()

    
def run_and_collect(stdin):
    # set CPU into inactive state (active-low reset is set LOW)
    gpio_rst_n_out.write(0)
    
    # activate continous_monitoring_system in case if it's stopped by previously 
    # encountered "wait for interrupt" (WFI) instruction
    cms_ctrl.reset_wfi_wait()
    
    # send standard input into a buffer, this way it will be ready
    # immediately after CPU starts running the program
    reset_console_input()
    console_send(stdin, end_byte=ord('\n')) # '\n' is hardcoded here specifically for "stack-mission.c" program
    
    #print(f'Fifo items count before {gpio_fifo_wr_count.read()}')
    reset_cpu()
    #print(f'Fifo items count after {gpio_fifo_wr_count.read()}')
    
    # transfer all collected data
    items_transferred = get_dma_transfer(input_buffer, dma_rec)#, dont_wait=True)
    
    # parse received data and turn it into pandas DataFrame
    events, events_overflows, pcs, clk_counters, instrs, instr_names, instr_strings = parse_last_dma_transfer(input_buffer, items_transferred)
    df = pd.DataFrame(zip(pcs,clk_counters,instrs,instr_names,instr_strings), columns=['pc','clk_counter','instr', 'instr_names', 'instr_strings'])    
    df['pc'] = df['pc'].apply(lambda x: f'{x:8X}')
    df['instr'] = df['instr'].apply(lambda x: f'{x:08X}')
    df_events = pd.DataFrame(events, columns=event_names)
    df = pd.concat([df, df_events], axis=1)
    stdout = console_read()
    return df, stdout

event_names = read_performance_event_names(PERFORMANCE_EVENTS_FNAME)

print_dma_channel_status(dma_rec)
print_fifo_data_counts()

setup_cms(cms_ctrl)

gpio_rst_n_out.write(0)


buffer_length = 32768
39
dma.running = True
dma.idle = False
dma.error = False
status = 0x0
gpio_fifo_wr_count = 0
gpio_fifo_rd_count = 0


In [11]:
bram_loader.load(PATH / 'riscv-stack-mission.bin')

program_inputs = [
    "==AA==AA==-=-AA====-",
    "=",
    "AA=-==-AAAA-=AA",
    "=-=----AA=AA==AAAAAAAA",
    "--=AA==-AA-==AA-=",
    "AA-=AA=--",
    "AA=",
    "AAAAAA=",
    "-=-=",
    "AA--"
]

for i, stdin in enumerate(program_inputs[:2]):
    print(f'Running program with input: "{stdin}"')
    df, stdout = run_and_collect(stdin)
    print(df.shape)
    df.to_csv(str(OUTPUT_DIR / f'normal_{i}.csv'), index=False)
    print()

Running program with input: "==AA==AA==-=-AA====-"
items_transferred = 1692


NameError: name 'opcode' is not defined

In [6]:
df

Unnamed: 0,pc,clk_counter,instr,instr_names,instr_strings,Core__TRAP,Core__BRANCH,Core__JAL,Core__JALR,Core__AUIPC,...,AXI4_Slave__AR_FLIT,AXI4_Slave__R_FLIT,AXI4_Slave__R_FLIT_FINAL,AXI4_Master__AW_FLIT,AXI4_Master__W_FLIT,AXI4_Master__W_FLIT_FINAL,AXI4_Master__B_FLIT,AXI4_Master__AR_FLIT,AXI4_Master__R_FLIT,AXI4_Master__R_FLIT_FINAL
0,00001010,316,00028067,JALR,"jalr x0, x5, 0",0,0,0,0,1,...,1,8,1,0,0,0,0,1,8,1
1,80000000,35,F14022F3,UNKNOWN,"Cannot decode f14022f3, invalid instruction",0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,80000004,1,02029063,BNE,"bne x5, x0, .+32",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,80000008,1,00001117,AUIPC,"auipc x2, 1",0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,80000010,2,5DC0006F,JAL,"jal x0, .+1500",0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1047,800002F0,1,00000013,ADDI,"addi x0, x0, 0",0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1048,80000300,4,00008067,JALR,"jalr x0, x1, 0",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1049,800005D8,1,00000013,ADDI,"addi x0, x0, 0",0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1050,800005E8,4,00008067,JALR,"jalr x0, x1, 0",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
df.head()

Unnamed: 0,pc,clk_counter,instr,instr_names,instr_strings,Core__TRAP,Core__BRANCH,Core__JAL,Core__JALR,Core__AUIPC,...,AXI4_Slave__AR_FLIT,AXI4_Slave__R_FLIT,AXI4_Slave__R_FLIT_FINAL,AXI4_Master__AW_FLIT,AXI4_Master__W_FLIT,AXI4_Master__W_FLIT_FINAL,AXI4_Master__B_FLIT,AXI4_Master__AR_FLIT,AXI4_Master__R_FLIT,AXI4_Master__R_FLIT_FINAL
0,4112,1092717776,163943,JALR,"jalr x0, x5, 0",0,0,0,0,1,...,1,8,1,0,0,0,0,1,8,1
1,2147483648,35,4047512307,UNKNOWN,"Cannot decode f14022f3, invalid instruction",0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,2147483652,1,33722467,BNE,"bne x5, x0, .+32",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2147483656,1,4375,AUIPC,"auipc x2, 1",0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2147483664,2,1572864111,JAL,"jal x0, .+1500",0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [2]:
gpio_rst_n_out.write(0)

In [3]:
#bram_loader.load(PATH / 'riscv-example-baremetal-short.bin')
bram_loader.load(PATH / 'riscv-stack-mission.bin')

In [4]:
# program input needs to be available immediately, for that reason a separate reset line is used for processor
# and the console input fifo
reset_console_input()
console_send('AA', end_byte=ord('\n'))

In [5]:
print_fifo_data_counts()
reset_cpu()

gpio_fifo_wr_count = 0
gpio_fifo_rd_count = 0


In [6]:
print_fifo_data_counts()

gpio_fifo_wr_count = 1150
gpio_fifo_rd_count = 0


In [7]:
items_transferred = get_dma_transfer(input_buffer, dma_rec)#, dont_wait=True)

items_transferred = 1154


In [8]:
events, events_overflows, pcs, clk_counters, instrs, instr_names, instr_strings = parse_last_dma_transfer(input_buffer, items_transferred)

for i, (pc, instr, instr_str, clk_counter, instr_name) in enumerate(zip(pcs, instrs, instr_strings, clk_counters, instr_names)):
    print(f'{i:<4} CLK_DELTA={clk_counter:<14}PC={pc:>8X}    INSTR={instr:>08X}    INSTR_NAME={instr_name:<6}    {instr_str}')

0    CLK_DELTA=340136626     PC=    1010    INSTR=00028067    INSTR_NAME=JALR      jalr x0, x5, 0
1    CLK_DELTA=35            PC=80000000    INSTR=F14022F3    INSTR_NAME=UNKNOWN    Cannot decode f14022f3, invalid instruction
2    CLK_DELTA=1             PC=80000004    INSTR=02029063    INSTR_NAME=BNE       bne x5, x0, .+32
3    CLK_DELTA=1             PC=80000008    INSTR=00001117    INSTR_NAME=AUIPC     auipc x2, 1
4    CLK_DELTA=2             PC=80000010    INSTR=5DC0006F    INSTR_NAME=JAL       jal x0, .+1500
5    CLK_DELTA=35            PC=800005EC    INSTR=FF010113    INSTR_NAME=ADDI      addi x2, x2, -16
6    CLK_DELTA=35            PC=800005FC    INSTR=D29FF0EF    INSTR_NAME=JAL       jal x1, .-728
7    CLK_DELTA=48            PC=80000324    INSTR=F8010113    INSTR_NAME=ADDI      addi x2, x2, -128
8    CLK_DELTA=1             PC=80000328    INSTR=06113C23    INSTR_NAME=UNKNOWN    Cannot decode 06113c23, invalid instruction
9    CLK_DELTA=41            PC=80000338    INSTR=02800

178  CLK_DELTA=1             PC=8000011C    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid instruction
179  CLK_DELTA=8             PC=80000130    INSTR=F09FF0EF    INSTR_NAME=JAL       jal x1, .-248
180  CLK_DELTA=1             PC=80000038    INSTR=FE010113    INSTR_NAME=ADDI      addi x2, x2, -32
181  CLK_DELTA=21            PC=80000074    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
182  CLK_DELTA=1             PC=80000134    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid instruction
183  CLK_DELTA=4             PC=8000013C    INSTR=FE0790E3    INSTR_NAME=BNE       bne x15, x0, .-32
184  CLK_DELTA=1             PC=8000011C    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid instruction
185  CLK_DELTA=8             PC=80000130    INSTR=F09FF0EF    INSTR_NAME=JAL       jal x1, .-248
186  CLK_DELTA=1             PC=80000038    INSTR=FE010113    INSTR_NAME=ADDI      addi x2, x2, -32
187  CLK_DELTA=21      

365  CLK_DELTA=8             PC=80000130    INSTR=F09FF0EF    INSTR_NAME=JAL       jal x1, .-248
366  CLK_DELTA=1             PC=80000038    INSTR=FE010113    INSTR_NAME=ADDI      addi x2, x2, -32
367  CLK_DELTA=21            PC=80000074    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
368  CLK_DELTA=1             PC=80000134    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid instruction
369  CLK_DELTA=4             PC=8000013C    INSTR=FE0790E3    INSTR_NAME=BNE       bne x15, x0, .-32
370  CLK_DELTA=1             PC=8000011C    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid instruction
371  CLK_DELTA=8             PC=80000130    INSTR=F09FF0EF    INSTR_NAME=JAL       jal x1, .-248
372  CLK_DELTA=1             PC=80000038    INSTR=FE010113    INSTR_NAME=ADDI      addi x2, x2, -32
373  CLK_DELTA=21            PC=80000074    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
374  CLK_DELTA=1             PC=80000134    INSTR=FE8

556  CLK_DELTA=1             PC=8000011C    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid instruction
557  CLK_DELTA=8             PC=80000130    INSTR=F09FF0EF    INSTR_NAME=JAL       jal x1, .-248
558  CLK_DELTA=1             PC=80000038    INSTR=FE010113    INSTR_NAME=ADDI      addi x2, x2, -32
559  CLK_DELTA=21            PC=80000074    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
560  CLK_DELTA=1             PC=80000134    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid instruction
561  CLK_DELTA=4             PC=8000013C    INSTR=FE0790E3    INSTR_NAME=BNE       bne x15, x0, .-32
562  CLK_DELTA=1             PC=8000011C    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid instruction
563  CLK_DELTA=8             PC=80000130    INSTR=F09FF0EF    INSTR_NAME=JAL       jal x1, .-248
564  CLK_DELTA=1             PC=80000038    INSTR=FE010113    INSTR_NAME=ADDI      addi x2, x2, -32
565  CLK_DELTA=21      

732  CLK_DELTA=1             PC=80000134    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid instruction
733  CLK_DELTA=4             PC=8000013C    INSTR=FE0790E3    INSTR_NAME=BNE       bne x15, x0, .-32
734  CLK_DELTA=1             PC=8000011C    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid instruction
735  CLK_DELTA=8             PC=80000130    INSTR=F09FF0EF    INSTR_NAME=JAL       jal x1, .-248
736  CLK_DELTA=1             PC=80000038    INSTR=FE010113    INSTR_NAME=ADDI      addi x2, x2, -32
737  CLK_DELTA=21            PC=80000074    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
738  CLK_DELTA=1             PC=80000134    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid instruction
739  CLK_DELTA=4             PC=8000013C    INSTR=FE0790E3    INSTR_NAME=BNE       bne x15, x0, .-32
740  CLK_DELTA=1             PC=8000011C    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid ins

925  CLK_DELTA=2             PC=8000028C    INSTR=0080006F    INSTR_NAME=JAL       jal x0, .+8
926  CLK_DELTA=4             PC=80000294    INSTR=00078513    INSTR_NAME=ADDI      addi x10, x15, 0
927  CLK_DELTA=3             PC=800002A0    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
928  CLK_DELTA=1             PC=80000484    INSTR=00050793    INSTR_NAME=ADDI      addi x15, x10, 0
929  CLK_DELTA=4             PC=80000490    INSTR=C25FF0EF    INSTR_NAME=JAL       jal x1, .-988
930  CLK_DELTA=4             PC=800000B4    INSTR=FF010113    INSTR_NAME=ADDI      addi x2, x2, -16
931  CLK_DELTA=7             PC=800000C8    INSTR=FB1FF0EF    INSTR_NAME=JAL       jal x1, .-80
932  CLK_DELTA=1             PC=80000078    INSTR=FF010113    INSTR_NAME=ADDI      addi x2, x2, -16
933  CLK_DELTA=42            PC=800000B0    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
934  CLK_DELTA=1             PC=800000CC    INSTR=00050793    INSTR_NAME=ADDI      addi x15, x10, 0
935  CLK_DELTA=1

1108 CLK_DELTA=8             PC=80000130    INSTR=F09FF0EF    INSTR_NAME=JAL       jal x1, .-248
1109 CLK_DELTA=1             PC=80000038    INSTR=FE010113    INSTR_NAME=ADDI      addi x2, x2, -32
1110 CLK_DELTA=21            PC=80000074    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
1111 CLK_DELTA=1             PC=80000134    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid instruction
1112 CLK_DELTA=4             PC=8000013C    INSTR=FE0790E3    INSTR_NAME=BNE       bne x15, x0, .-32
1113 CLK_DELTA=1             PC=8000011C    INSTR=FE843783    INSTR_NAME=UNKNOWN    Cannot decode fe843783, invalid instruction
1114 CLK_DELTA=8             PC=80000130    INSTR=F09FF0EF    INSTR_NAME=JAL       jal x1, .-248
1115 CLK_DELTA=1             PC=80000038    INSTR=FE010113    INSTR_NAME=ADDI      addi x2, x2, -32
1116 CLK_DELTA=21            PC=80000074    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
1117 CLK_DELTA=1             PC=80000134    INSTR=FE8

In [9]:
df = pd.DataFrame(zip(pcs,clk_counters,instrs,instr_names,instr_strings), columns=['pc','clk_counter','instr', 'instr_names', 'instr_strings'])
df.iloc[:,0] = df.iloc[:,0].apply(lambda x: f'{x:08X}')
df[:20]

Unnamed: 0,pc,clk_counter,instr,instr_names,instr_strings
0,00001010,340136626,163943,JALR,"jalr x0, x5, 0"
1,80000000,35,4047512307,UNKNOWN,"Cannot decode f14022f3, invalid instruction"
2,80000004,1,33722467,BNE,"bne x5, x0, .+32"
3,80000008,1,4375,AUIPC,"auipc x2, 1"
4,80000010,2,1572864111,JAL,"jal x0, .+1500"
5,800005EC,35,4278255891,ADDI,"addi x2, x2, -16"
6,800005FC,35,3533697263,JAL,"jal x1, .-728"
7,80000324,48,4160815379,ADDI,"addi x2, x2, -128"
8,80000328,1,101792803,UNKNOWN,"Cannot decode 06113c23, invalid instruction"
9,80000338,41,41943151,JAL,"jal x0, .+40"


In [29]:
list(zip([1,2],[3,4])) + [(1,2)]

[(1, 3), (2, 4), (1, 2)]

[(4112,
  340136626,
  163943,
  'JALR',
  'jalr x0, x5, 0',
  [0,
   0,
   0,
   0,
   1,
   1,
   0,
   0,
   21,
   0,
   21,
   24,
   0,
   0,
   9,
   1,
   21,
   9,
   1,
   1,
   21,
   0,
   1,
   0,
   0,
   0,
   0,
   0,
   0,
   1,
   8,
   1,
   0,
   0,
   0,
   0,
   1,
   8,
   1]),
 (2147483648,
  35,
  4047512307,
  'UNKNOWN',
  'Cannot decode f14022f3, invalid instruction',
  [0,
   0,
   0,
   1,
   0,
   0,
   0,
   0,
   0,
   0,
   27,
   0,
   0,
   0,
   6,
   1,
   27,
   5,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0]),
 (2147483652,
  1,
  33722467,
  'BNE',
  'bne x5, x0, .+32',
  [0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   1,
   0,
   0,
   1,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0,
   0]),
 (2147483656,
  1,
  4375,
  'AUIPC',
  'auipc x2, 1',
  [0,
   1,

In [10]:
for instr in instrs: 
    name = get_riscv_instruction_name(instr)
    print(name)

JALR
UNKNOWN
BNE
AUIPC
JAL
ADDI
JAL
ADDI
UNKNOWN
JAL
LW
BGEU
ADDI
JAL
ADDI
JALR
LW
BGEU
ADDI
JAL
ADDI
JALR
LW
BGEU
ADDI
JAL
ADDI
JALR
LW
BGEU
ADDI
JAL
ADDI
JALR
LW
BGEU
ADDI
JAL
ADDI
JALR
LW
BGEU
ADDI
JAL
ADDI
JALR
LW
BGEU
ADDI
JAL
ADDI
JALR
LW
BGEU
ADDI
JAL
ADDI
JALR
LW
BGEU
ADDI
JAL
ADDI
JALR
LW
BGEU
ADDI
JAL
ADDI
JALR
LW
BGEU
ADDI
JAL
ADDI
JALR
LW
BGEU
ADDI
JAL
ADDI
JALR
LW
BGEU
AUIPC
JALR
JAL
ADDI
UNKNOWN
JAL
ADDI
JAL
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
JALR
UNKNOWN
BNE
UNKNOWN
JAL
ADDI
J

In [11]:
# print performance counters for the first 10 datapoints/instructions from data above
df = pd.DataFrame(events, columns=event_names)
df.iloc[:20]

Unnamed: 0,Core__TRAP,Core__BRANCH,Core__JAL,Core__JALR,Core__AUIPC,Core__LOAD,Core__STORE,Core__SERIAL_SHIFT,Core__LOAD_WAIT,Core__STORE_WAIT,...,AXI4_Slave__AR_FLIT,AXI4_Slave__R_FLIT,AXI4_Slave__R_FLIT_FINAL,AXI4_Master__AW_FLIT,AXI4_Master__W_FLIT,AXI4_Master__W_FLIT_FINAL,AXI4_Master__B_FLIT,AXI4_Master__AR_FLIT,AXI4_Master__R_FLIT,AXI4_Master__R_FLIT_FINAL
0,0,0,0,0,1,1,0,0,21,0,...,1,8,1,0,0,0,0,1,8,1
1,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,2,0,0,2,...,0,0,0,1,1,1,0,1,0,0
7,0,0,1,0,0,0,0,0,0,0,...,0,0,0,1,1,1,1,0,4,1
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,3,0,0,3,...,0,0,0,3,3,3,2,0,0,0


In [12]:
for eo in events_overflows:
    if eo: 
        print(eo)

In [13]:
event_names

['Core__TRAP',
 'Core__BRANCH',
 'Core__JAL',
 'Core__JALR',
 'Core__AUIPC',
 'Core__LOAD',
 'Core__STORE',
 'Core__SERIAL_SHIFT',
 'Core__LOAD_WAIT',
 'Core__STORE_WAIT',
 'Core__F_BUSY_NO_CONSUME',
 'Core__1_BUSY_NO_CONSUME',
 'Core__2_BUSY_NO_CONSUME',
 'Core__INTERRUPT',
 'L1I__LD',
 'L1I__LD_MISS',
 'L1I__LD_MISS_LAT',
 'L1I__TLB',
 'L1D__LD',
 'L1D__LD_MISS',
 'L1D__LD_MISS_LAT',
 'L1D__ST',
 'L1D__TLB',
 'TGC__READ',
 'TGC__READ_MISS',
 'AXI4_Slave__AW_FLIT',
 'AXI4_Slave__W_FLIT',
 'AXI4_Slave__W_FLIT_FINAL',
 'AXI4_Slave__B_FLIT',
 'AXI4_Slave__AR_FLIT',
 'AXI4_Slave__R_FLIT',
 'AXI4_Slave__R_FLIT_FINAL',
 'AXI4_Master__AW_FLIT',
 'AXI4_Master__W_FLIT',
 'AXI4_Master__W_FLIT_FINAL',
 'AXI4_Master__B_FLIT',
 'AXI4_Master__AR_FLIT',
 'AXI4_Master__R_FLIT',
 'AXI4_Master__R_FLIT_FINAL']

In [14]:
# event name format is "CATEGORY__NAME"
df.mean(axis=0).sort_values(ascending=False)[:-1]

L1I__TLB                     3.943674
L1I__LD                      3.943674
L1D__TLB                     1.585789
Core__1_BUSY_NO_CONSUME      1.224437
L1D__LD                      0.996534
Core__LOAD                   0.996534
Core__2_BUSY_NO_CONSUME      0.751300
Core__F_BUSY_NO_CONSUME      0.738302
L1I__LD_MISS_LAT             0.738302
Core__STORE_WAIT             0.710572
AXI4_Slave__W_FLIT           0.589255
AXI4_Slave__W_FLIT_FINAL     0.589255
AXI4_Slave__B_FLIT           0.589255
AXI4_Master__AW_FLIT         0.589255
AXI4_Master__W_FLIT          0.589255
L1D__ST                      0.589255
AXI4_Slave__AW_FLIT          0.589255
AXI4_Master__W_FLIT_FINAL    0.589255
AXI4_Master__B_FLIT          0.589255
Core__STORE                  0.589255
TGC__READ                    0.474003
Core__LOAD_WAIT              0.441075
L1D__LD_MISS_LAT             0.370017
Core__BRANCH                 0.173310
Core__JAL                    0.169844
Core__JALR                   0.160312
Core__AUIPC 

In [15]:
def select_performance_counters(df):
    #hardcoded_ones = ['Core__TRAP', 'Core__INTERRUPT']
    df_max = df.max(axis=0)
    return df_max[df_max > 0].index.tolist()

select_performance_counters(df)

['Core__BRANCH',
 'Core__JAL',
 'Core__JALR',
 'Core__AUIPC',
 'Core__LOAD',
 'Core__STORE',
 'Core__SERIAL_SHIFT',
 'Core__LOAD_WAIT',
 'Core__STORE_WAIT',
 'Core__F_BUSY_NO_CONSUME',
 'Core__1_BUSY_NO_CONSUME',
 'Core__2_BUSY_NO_CONSUME',
 'L1I__LD',
 'L1I__LD_MISS',
 'L1I__LD_MISS_LAT',
 'L1I__TLB',
 'L1D__LD',
 'L1D__LD_MISS',
 'L1D__LD_MISS_LAT',
 'L1D__ST',
 'L1D__TLB',
 'TGC__READ',
 'TGC__READ_MISS',
 'AXI4_Slave__AW_FLIT',
 'AXI4_Slave__W_FLIT',
 'AXI4_Slave__W_FLIT_FINAL',
 'AXI4_Slave__B_FLIT',
 'AXI4_Slave__AR_FLIT',
 'AXI4_Slave__R_FLIT',
 'AXI4_Slave__R_FLIT_FINAL',
 'AXI4_Master__AW_FLIT',
 'AXI4_Master__W_FLIT',
 'AXI4_Master__W_FLIT_FINAL',
 'AXI4_Master__B_FLIT',
 'AXI4_Master__AR_FLIT',
 'AXI4_Master__R_FLIT',
 'AXI4_Master__R_FLIT_FINAL']

In [16]:
df.max(axis=0).sort_values(ascending=False)[:-1]
df['Core__LOAD_WAIT'].sort_values()[-30:]

371       0
422       0
385       0
384       0
383       0
382       0
381       0
380       0
374       0
379       0
421       0
377       0
376       0
375       0
378       0
416       0
983       7
937       7
881       7
877      14
0        21
979      22
933      25
775      36
11       40
97       42
15       50
387      52
1074     70
93      116
Name: Core__LOAD_WAIT, dtype: int64

In [17]:
console_data_available()

True

In [18]:
console_read()

"Cookie monster is hungry, provide some cookies!\n'-' skips to the next character\nXX as two hex digits stores a single cookie\n> \nNo cookies??\n"

In [19]:
#console_send('AA', end_byte=ord('\n'))

In [20]:
#console_send('AAAA')

In [21]:
# import matplotlib.pyplot as plt
# plt.plot(pcs)
# plt.show()