In [1]:
#import ipdb # alternative to pdb that works in jupyter notebook (pip3 install ipdb)
import os, subprocess, sys, re, time
from pathlib import Path
from pynq import Overlay
#from pynq import GPIO
from pynq import allocate
import math
import pandas as pd

from dma_receiver import DmaReceiver
from bram_loader import Bram_Loader
from continuous_monitoring_system_controller import ContinuousMonitoringSystemController
from riscv_instruction_decoder import get_riscv_instruction_name

BASE_DIR = Path('/home/xilinx/design_files')
PATH = BASE_DIR 

base = Overlay(str(BASE_DIR / 'imported_design.bit'))

ITEM_BYTE_SIZE = 8
FIFO_SIZE = 32768
# +4 because DMA seems to have it's own buffer it fills before dma.recvchannel.transfer is even called
buffer_length = min( base.continuous_monitoring_system_blocks.axi_dma_0.buffer_max_size // ITEM_BYTE_SIZE, FIFO_SIZE)# + 4) 

print('buffer_length =', buffer_length)
input_buffer = allocate(shape=(buffer_length,), dtype='u8')
input_buffer_2 = allocate(shape=(buffer_length,), dtype='u8')

dma_rec = base.continuous_monitoring_system_blocks.axi_dma_0.recvchannel

# https://pynq.readthedocs.io/en/v2.7.0/_modules/pynq/lib/axigpio.html
gpio_rst_n_out = base.axi_gpio_0.channel1[0]
gpio_rst_n_console_input = base.axi_gpio_0.channel1[1]
gpio_rst_n_console_output = base.axi_gpio_0.channel1[2]
#gpio_en_cpu_reset_server_request_put_out = base.axi_gpio_0.channel1[1]
gpio_pc_stream_m_axis_tlast_interval = base.axi_gpio_1.channel1

gpio_fifo_wr_count = base.axi_gpio_0.channel2[0:16]
gpio_fifo_rd_count = base.axi_gpio_0.channel2[16:32]

def print_dma_channel_status(channel):
    print('dma.running =', channel.running)
    print('dma.idle =', channel.idle)
    print('dma.error =', channel.error)
    print('status =', hex(channel._mmio.read(channel._offset + 4)))
    
def reset_console_input(delay=0.001):
    gpio_rst_n_console_input.write(0)
    time.sleep(delay)
    gpio_rst_n_console_input.write(1)

def reset_console_output(delay=0.001):
    gpio_rst_n_console_output.write(0)
    time.sleep(delay)
    gpio_rst_n_console_output.write(1)
    
def reset_cpu(delay=0.001):
    ''' AXI GPIO controlled reset, active-low. '''
    #gpio_en_cpu_reset_server_request_put_out.write(0)
    reset_console_output()
    gpio_rst_n_out.write(0)
    time.sleep(delay)
    gpio_rst_n_out.write(1)
    time.sleep(delay)
    #gpio_en_cpu_reset_server_request_put_out.write(1)
    #time.sleep(delay)
    #gpio_en_cpu_reset_server_request_put_out.write(0)
    #time.sleep(delay)
    

def print_fifo_data_counts():
    print('gpio_fifo_wr_count =', gpio_fifo_wr_count.read())
    print('gpio_fifo_rd_count =', gpio_fifo_rd_count.read())
    
# def set_pc_stream_tlast_interval(items_count):
#     ''' Sets 32-bit value specifying how many items can 
#     arrive by a single dma.recvchannel.tranfer(ib) call. '''
#     gpio_pc_stream_m_axis_tlast_interval[0:32].write(items_count)

def console_send(s, end_byte=None):
    ''' Uses AXI GPIO and hardware FIFOs. '''
    console_input = base.console_io.axi_gpio_3.channel1[0:8]
    console_write_enable = base.console_io.axi_gpio_3.channel1[8] # using "edge_detector" to avoid continuous writing
    console_write_enable.off()
    for c in s:
        console_input.write(ord(c))
        console_write_enable.on()
        console_write_enable.off()
    if end_byte is not None:
        console_input.write(end_byte)
        console_write_enable.on()
        console_write_enable.off()
    
def console_data_available():
    console_output_empty = base.console_io.axi_gpio_3.channel2[8]
    return console_output_empty.read() == 0

def console_read():
    ''' Uses AXI GPIO and hardware FIFOs. '''
    s = ''
    console_output = base.console_io.axi_gpio_3.channel2[0:8]
    console_read_enable = base.console_io.axi_gpio_3.channel1[9] # using "edge_detector" to avoid continuous reading
    console_read_enable.off()
    while console_data_available():
        s += chr(console_output.read())
        console_read_enable.on()
        console_read_enable.off()
    return s

def instr_to_strings(instructions_integers):
    ''' Requires riscv-python-model installed.
    If network connection is available, "python3 -m pip install riscv-model.
    If not, then on separate machine with internet:
        python3 -m pip download riscv-model -d .  
    Then copy the downloaded .whl file to pynq and install with:
        python3 -m pip install <file.whl> -f ./ --no-index   
    Usage:
        instr_to_string([0xB60006F, 0xFE0791E3])
        '''
    instructions_string = ' 0x'.join(f'{ii:08X}' for ii in instructions_integers)
    return os.popen(f'riscv-machinsn-decode hexstring {instructions_string}').read().strip().split('\n')


####################################################################
# 

def read_performance_event_names(f_name='performance_event_names.txt'):
    ''' Reads events names from file, these were collected from CHERI-Flute source code by using this script:
    https://github.com/michalmonday/Flute/blob/continuous_monitoring/builds/RV64ACDFIMSUxCHERI_Flute_verilator/vcd/read_vcd.py
    '''
    with open(f_name) as f:
        return [line.strip() for line in f.readlines()]

def pop_n_bits_value(val, n):
    ''' pop_n_bits_value(0xFFFF, 4) returns: (0xFFF, 0xF) '''
    bits_value = val & ((1<<n)-1)
    return val >> n, bits_value

def parse_fifo_item(fifo_item):
    ''' Parses a single fifo item (e.g. 1024 bits) numerical value. 
        Single fifo item = {59bits padding, performance_counters805(7bits*115counters), instr32, clk_counter_delta64, pc64}
        Padding is used because only power of 2s can be used as size in fifo generator block (or axi in general?)'''
    perf_counters = []
    for i in range(PERFORMANCE_EVENTS_COUNT):
        fifo_item, perf_counter = pop_n_bits_value(fifo_item, 7)
        perf_counters.append(perf_counter)
    fifo_item, pc = pop_n_bits_value(fifo_item, 64)
    fifo_item, clk_counter = pop_n_bits_value(fifo_item, 64)
    fifo_item, instr = pop_n_bits_value(fifo_item, 32)
    return perf_counters, pc, clk_counter, instr

def get_dma_transfer(input_buffer, dma_rec):
    ''' Returns the number of transferred items, each having 1024 bits. '''
    dma_rec.transfer(input_buffer)
    dma_rec.wait() # depends on tlast
    items_transferred = math.floor(dma_rec.transferred * 64 / 1024 / 8)
    print(f'items_transferred = {items_transferred}')
    return items_transferred

def parse_last_dma_transfer(input_buffer, items_transferred):
    pcs = []
    instrs = []
    instr_names = []
    clk_counters = []
    events = []
    for i in range(items_transferred):
        chunks_per_item = math.ceil(1024/64)
        start = chunks_per_item * i
        end = start + chunks_per_item
        fifo_item = int.from_bytes(bytes(input_buffer[start:end]), byteorder='little')
        perf_counters, pc, clk_counter, instr = parse_fifo_item(fifo_item)
        events.append(perf_counters)
        pcs.append(pc)
        clk_counters.append(clk_counter)
        instrs.append(instr)
        
        instr_names.append( get_riscv_instruction_name(instr) )
    instr_strings = instr_to_strings(instrs)
    return events, pcs, clk_counters, instrs, instr_names, instr_strings

def decode_riscv_instruction(instruction):
    opcode = instruction & 0x7F
    funct3 = (instruction >> 12) & 0x7
    funct7 = (instruction >> 25) & 0x7F
    return opcode, funct3, funct7

def get_riscv_instruction_name(instruction):
    opcode, funct3, funct7 = decode_riscv_instruction(instruction)
    if opcode == 0x33:
        if funct3 == 0x0:
            if funct7 == 0x0:
                return 'ADD'
            elif funct7 == 0x20:
                return 'SUB'
        elif funct3 == 0x1:
            return 'SLL'
        elif funct3 == 0x2:
            return 'SLT'
        elif funct3 == 0x3:
            return 'SLTU'
        elif funct3 == 0x4:
            return 'XOR'
        elif funct3 == 0x5:
            if funct7 == 0x0:
                return 'SRL'
            elif funct7 == 0x20:
                return 'SRA'
        elif funct3 == 0x6:
            return 'OR'
        elif funct3 == 0x7:
            return 'AND'
    elif opcode == 0x13:
        if funct3 == 0x0:
            return 'ADDI'
        elif funct3 == 0x1:
            return 'SLLI'
        elif funct3 == 0x2:
            return 'SLTI'
        elif funct3 == 0x3:
            return 'SLTIU'
        elif funct3 == 0x4:
            return 'XORI'
        elif funct3 == 0x5:
            if funct7 == 0x0:
                return 'SRLI'
            elif funct7 == 0x20:
                return 'SRAI'
        elif funct3 == 0x6:
            return 'ORI'
        elif funct3 == 0x7:
            return 'ANDI'
    elif opcode == 0x3:
        if funct3 == 0x0:
            return 'LB'
        elif funct3 == 0x1:
            return 'LH'
        elif funct3 == 0x2:
            return 'LW'
        elif funct3 == 0x4:
            return 'LBU'
        elif funct3 == 0x5:
            return 'LHU'
    elif opcode == 0x23:
        if funct3 == 0x0:
            return 'SB'
        elif funct3 == 0x1:
            return 'SH'
        elif funct3 == 0x2:
            return 'SW'
    elif opcode == 0x37:
        return 'LUI'
    elif opcode == 0x17:
        return 'AUIPC'
    elif opcode == 0x6F:
        return 'JAL'
    elif opcode == 0x67:
        return 'JALR'
    elif opcode == 0x63:
        if funct3 == 0x0:
            return 'BEQ'
        elif funct3 == 0x1:
            return 'BNE'
        elif funct3 == 0x4:
            return 'BLT'
        elif funct3 == 0x5:
            return 'BGE'
        elif funct3 == 0x6:
            return 'BLTU'
        elif funct3 == 0x7:
            return 'BGEU'
    elif opcode == 0x73:
        if funct3 == 0x0:
            if funct7 == 0x0:
                return 'ECALL'
            elif funct7 == 0x1:
                return 'EBREAK'
        elif funct3 == 0x1:
            if funct7 == 0x0:
                return 'CSRRW'
            elif funct7 == 0x1:
                return 'CSRRS'
            elif funct7 == 0x2:
                return 'CSRRC'
            elif funct7 == 0x5:
                return 'CSRRWI'
            elif funct7 == 0x6:
                return 'CSRRCI'
    return 'UNKNOWN'

event_names = read_performance_event_names()
PERFORMANCE_EVENTS_COUNT = 115

print_dma_channel_status(dma_rec)
print_fifo_data_counts()

# set_pc_stream_tlast_interval(1000)


buffer_length = 32768
dma.running = True
dma.idle = False
dma.error = False
status = 0x0
gpio_fifo_wr_count = 0
gpio_fifo_rd_count = 0


In [2]:
def setup_cms(cms_ctrl):
    # Triggerring (exact address must match to start/stop trace)
    cms_ctrl.set_trigger_trace_start_address(0x1000)
    cms_ctrl.set_trigger_trace_end_address(0x80000106)  
    cms_ctrl.set_trigger_trace_start_address_enabled(False)
    cms_ctrl.set_trigger_trace_end_address_enabled(False)

    # Filtering (any address between lower bound and upper bound will be collected)
    cms_ctrl.set_monitored_address_range_lower_bound(0x0FFF)     #(0x80000000)
    cms_ctrl.set_monitored_address_range_upper_bound(0x800000FF)
    cms_ctrl.set_monitored_address_range_lower_bound_enabled(False)
    cms_ctrl.set_monitored_address_range_upper_bound_enabled(False)
    
    # Allow further trace collection if last traced program used "wfi"
    # (wait for interrupt) instruction which stops the trace.
    cms_ctrl.reset_wfi_wait()

# the long name is because of using hierarchy in Vivado block design
cms_ctrl_axi_gpio = base.continuous_monitoring_system_blocks.axi_gpio_to_cms_ctrl_interface.axi_gpio_cms_ctrl.channel1    
cms_ctrl = ContinuousMonitoringSystemController(cms_ctrl_axi_gpio)
setup_cms(cms_ctrl)

In [3]:
gpio_rst_n_out.write(0)

In [4]:
bram_loader = Bram_Loader(base.bram_loader.axi_gpio_2)
#bram_loader.load(PATH / 'riscv-example-baremetal-short.bin')
bram_loader.load(PATH / 'riscv-stack-mission.bin')

In [5]:
# program input needs to be available immediately, for that reason a separate reset line is used for processor
# and the console input fifo
reset_console_input()
console_send('AAAA', end_byte=-1)

In [6]:
print_fifo_data_counts()
reset_cpu()

gpio_fifo_wr_count = 0
gpio_fifo_rd_count = 0


In [7]:
print_fifo_data_counts()

gpio_fifo_wr_count = 1026
gpio_fifo_rd_count = 0


In [8]:
items_transferred = get_dma_transfer(input_buffer, dma_rec)

KeyboardInterrupt: 

In [9]:
items_transferred = 1024

In [10]:
events, pcs, clk_counters, instrs, instr_names, instr_strings = parse_last_dma_transfer(input_buffer, items_transferred)

for pc, instr, instr_str, clk_counter, instr_name in zip(pcs, instrs, instr_strings, clk_counters, instr_names):
    print(f'CLK_DELTA={clk_counter:<14}PC={pc:>8X}    INSTR={instr:>08X}    INSTR_NAME={instr_name:<6}    {instr_str}')

CLK_DELTA=5718212254    PC=    1010    INSTR=00028067    INSTR_NAME=JALR      jalr x0, x5, 0
CLK_DELTA=34            PC=80000004    INSTR=00029C63    INSTR_NAME=BNE       bne x5, x0, .+24
CLK_DELTA=36            PC=8000000C    INSTR=D4810113    INSTR_NAME=ADDI      addi x2, x2, -696
CLK_DELTA=1             PC=80000010    INSTR=42C0006F    INSTR_NAME=JAL       jal x0, .+1068
CLK_DELTA=34            PC=8000043E    INSTR=00113423    INSTR_NAME=UNKNOWN    Cannot decode 00113423, invalid instruction
CLK_DELTA=33            PC=80000444    INSTR=DFFFF0EF    INSTR_NAME=JAL       jal x1, .-514
CLK_DELTA=42            PC=80000244    INSTR=06113C23    INSTR_NAME=UNKNOWN    Cannot decode 06113c23, invalid instruction
CLK_DELTA=5             PC=8000024E    INSTR=01E0006F    INSTR_NAME=JAL       jal x0, .+30
CLK_DELTA=55            PC=80000270    INSTR=00F00733    INSTR_NAME=ADD       add x14, x0, x15
CLK_DELTA=57            PC=80000274    INSTR=FCE7FEE3    INSTR_NAME=BGEU      bgeu x15, x14, .-36
C

CLK_DELTA=1             PC=800000B4    INSTR=00178713    INSTR_NAME=ADDI      addi x14, x15, 1
CLK_DELTA=4             PC=800000C2    INSTR=F67FF0EF    INSTR_NAME=JAL       jal x1, .-154
CLK_DELTA=6             PC=8000002A    INSTR=00813C23    INSTR_NAME=UNKNOWN    Cannot decode 00813c23, invalid instruction
CLK_DELTA=18            PC=8000004E    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
CLK_DELTA=2             PC=800000CA    INSTR=0007C783    INSTR_NAME=LBU       lbu x15, 0(x15)
CLK_DELTA=4             PC=800000CE    INSTR=FE0791E3    INSTR_NAME=BNE       bne x15, x0, .-30
CLK_DELTA=1             PC=800000B4    INSTR=00178713    INSTR_NAME=ADDI      addi x14, x15, 1
CLK_DELTA=4             PC=800000C2    INSTR=F67FF0EF    INSTR_NAME=JAL       jal x1, .-154
CLK_DELTA=6             PC=8000002A    INSTR=00813C23    INSTR_NAME=UNKNOWN    Cannot decode 00813c23, invalid instruction
CLK_DELTA=18            PC=8000004E    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
CLK_

CLK_DELTA=18            PC=8000004E    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
CLK_DELTA=2             PC=800000CA    INSTR=0007C783    INSTR_NAME=LBU       lbu x15, 0(x15)
CLK_DELTA=4             PC=800000CE    INSTR=FE0791E3    INSTR_NAME=BNE       bne x15, x0, .-30
CLK_DELTA=1             PC=800000B4    INSTR=00178713    INSTR_NAME=ADDI      addi x14, x15, 1
CLK_DELTA=4             PC=800000C2    INSTR=F67FF0EF    INSTR_NAME=JAL       jal x1, .-154
CLK_DELTA=6             PC=8000002A    INSTR=00813C23    INSTR_NAME=UNKNOWN    Cannot decode 00813c23, invalid instruction
CLK_DELTA=18            PC=8000004E    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
CLK_DELTA=2             PC=800000CA    INSTR=0007C783    INSTR_NAME=LBU       lbu x15, 0(x15)
CLK_DELTA=4             PC=800000CE    INSTR=FE0791E3    INSTR_NAME=BNE       bne x15, x0, .-30
CLK_DELTA=1             PC=800000B4    INSTR=00178713    INSTR_NAME=ADDI      addi x14, x15, 1
CLK_DELTA=4             PC=80

CLK_DELTA=2             PC=800000CA    INSTR=0007C783    INSTR_NAME=LBU       lbu x15, 0(x15)
CLK_DELTA=4             PC=800000CE    INSTR=FE0791E3    INSTR_NAME=BNE       bne x15, x0, .-30
CLK_DELTA=1             PC=800000B4    INSTR=00178713    INSTR_NAME=ADDI      addi x14, x15, 1
CLK_DELTA=4             PC=800000C2    INSTR=F67FF0EF    INSTR_NAME=JAL       jal x1, .-154
CLK_DELTA=6             PC=8000002A    INSTR=00813C23    INSTR_NAME=UNKNOWN    Cannot decode 00813c23, invalid instruction
CLK_DELTA=18            PC=8000004E    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
CLK_DELTA=2             PC=800000CA    INSTR=0007C783    INSTR_NAME=LBU       lbu x15, 0(x15)
CLK_DELTA=4             PC=800000CE    INSTR=FE0791E3    INSTR_NAME=BNE       bne x15, x0, .-30
CLK_DELTA=1             PC=800000B4    INSTR=00178713    INSTR_NAME=ADDI      addi x14, x15, 1
CLK_DELTA=4             PC=800000C2    INSTR=F67FF0EF    INSTR_NAME=JAL       jal x1, .-154
CLK_DELTA=6             PC=800

CLK_DELTA=18            PC=8000004E    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
CLK_DELTA=2             PC=800000CA    INSTR=0007C783    INSTR_NAME=LBU       lbu x15, 0(x15)
CLK_DELTA=4             PC=800000CE    INSTR=FE0791E3    INSTR_NAME=BNE       bne x15, x0, .-30
CLK_DELTA=1             PC=800000B4    INSTR=00178713    INSTR_NAME=ADDI      addi x14, x15, 1
CLK_DELTA=4             PC=800000C2    INSTR=F67FF0EF    INSTR_NAME=JAL       jal x1, .-154
CLK_DELTA=6             PC=8000002A    INSTR=00813C23    INSTR_NAME=UNKNOWN    Cannot decode 00813c23, invalid instruction
CLK_DELTA=18            PC=8000004E    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
CLK_DELTA=2             PC=800000CA    INSTR=0007C783    INSTR_NAME=LBU       lbu x15, 0(x15)
CLK_DELTA=4             PC=800000CE    INSTR=FE0791E3    INSTR_NAME=BNE       bne x15, x0, .-30
CLK_DELTA=1             PC=800000B4    INSTR=00178713    INSTR_NAME=ADDI      addi x14, x15, 1
CLK_DELTA=4             PC=80

CLK_DELTA=1             PC=80000140    INSTR=0007871B    INSTR_NAME=UNKNOWN    Cannot decode 0007871b, invalid instruction
CLK_DELTA=3             PC=80000148    INSTR=00E7C463    INSTR_NAME=BLT       blt x15, x14, .+8
CLK_DELTA=2             PC=8000014E    INSTR=0040006F    INSTR_NAME=JAL       jal x0, .+4
CLK_DELTA=2             PC=80000154    INSTR=01813403    INSTR_NAME=UNKNOWN    Cannot decode 01813403, invalid instruction
CLK_DELTA=2             PC=80000158    INSTR=00008067    INSTR_NAME=JALR      jalr x0, x1, 0
CLK_DELTA=2             PC=80000392    INSTR=00079963    INSTR_NAME=BNE       bne x15, x0, .+18
CLK_DELTA=4             PC=800003A8    INSTR=00F00533    INSTR_NAME=ADD       add x10, x0, x15
CLK_DELTA=5             PC=800003AA    INSTR=DB1FF0EF    INSTR_NAME=JAL       jal x1, .-592
CLK_DELTA=3             PC=8000015C    INSTR=00813C23    INSTR_NAME=UNKNOWN    Cannot decode 00813c23, invalid instruction
CLK_DELTA=8             PC=80000172    INSTR=02E7D063    INSTR_NAME=B

In [None]:
df = pd.DataFrame(zip(pcs,clk_counters,instrs,instr_names,instr_strings), columns=['pc','clk_counter','instr', 'instr_names', 'instr_strings'])
df.iloc[:,0] = df.iloc[:,0].apply(lambda x: f'{x:08X}')
df.head()

In [None]:
for instr in instrs: 
    name = get_riscv_instruction_name(instr)
    print(name)

In [None]:
# print performance counters for the first 10 datapoints/instructions from data above
df = pd.DataFrame(events, columns=event_names)
df.iloc[:10]

In [None]:
event_names

In [None]:
# event name format is "CATEGORY__NAME"
df.mean(axis=0).sort_values(ascending=False)[:-1]

In [8]:
console_data_available()

True

In [9]:
console_read()

"Cookie monster is hungry, provide some cookies!\n'-' skips to the next character\nXX as two hex digits stores a single cookie\n> \nMalformed cookie\n"

In [5]:
console_send('AAAA')

In [16]:
# import matplotlib.pyplot as plt
# plt.plot(pcs)
# plt.show()