From df13124810813630cb3965900f73ff2d053a7812 Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Sat, 9 Mar 2024 22:54:11 +0100 Subject: [PATCH 1/9] [coremark] minor printf cleanup --- sw/example/coremark/core_portme.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sw/example/coremark/core_portme.c b/sw/example/coremark/core_portme.c index 94af997d3..4e08e3479 100644 --- a/sw/example/coremark/core_portme.c +++ b/sw/example/coremark/core_portme.c @@ -16,7 +16,7 @@ limitations under the License. Original Author: Shay Gal-on */ -/* Adjusted for the NEORV32 RISC-V Processor by Stephan Nolting */ +/* Ported to the NEORV32 RISC-V Processor by Stephan Nolting, 2024 */ #include "coremark.h" #include "core_portme.h" @@ -194,14 +194,14 @@ void portable_fini(core_portable *p) { neorv32_uart0_printf(" > Active clock cycles : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MCYCLE)); neorv32_uart0_printf(" > Retired instructions : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MINSTRET)); if (num_hpm_cnts_global == 0) {neorv32_uart0_printf("no HPMs available\n"); } - if (num_hpm_cnts_global > 0) {neorv32_uart0_printf(" > Compressed instructions) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER3)); } - if (num_hpm_cnts_global > 1) {neorv32_uart0_printf(" > Instr. dispatch wait cycles) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER4)); } - if (num_hpm_cnts_global > 2) {neorv32_uart0_printf(" > ALU wait cycles) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER5)); } - if (num_hpm_cnts_global > 3) {neorv32_uart0_printf(" > Branch instructions) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER6)); } - if (num_hpm_cnts_global > 4) {neorv32_uart0_printf(" > Control flow transfers) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER7)); } - if (num_hpm_cnts_global > 5) {neorv32_uart0_printf(" > Load instructions) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER8)); } - if (num_hpm_cnts_global > 6) {neorv32_uart0_printf(" > Store instructions) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER9)); } - if (num_hpm_cnts_global > 7) {neorv32_uart0_printf(" > Load/store wait cycles) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER10)); } - if (num_hpm_cnts_global > 8) {neorv32_uart0_printf(" > Entered traps) : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER11)); } + if (num_hpm_cnts_global > 0) {neorv32_uart0_printf(" > Compressed instructions : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER3)); } + if (num_hpm_cnts_global > 1) {neorv32_uart0_printf(" > Instr. dispatch wait cycles : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER4)); } + if (num_hpm_cnts_global > 2) {neorv32_uart0_printf(" > ALU wait cycles : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER5)); } + if (num_hpm_cnts_global > 3) {neorv32_uart0_printf(" > Branch instructions : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER6)); } + if (num_hpm_cnts_global > 4) {neorv32_uart0_printf(" > Control flow transfers : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER7)); } + if (num_hpm_cnts_global > 5) {neorv32_uart0_printf(" > Load instructions : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER8)); } + if (num_hpm_cnts_global > 6) {neorv32_uart0_printf(" > Store instructions : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER9)); } + if (num_hpm_cnts_global > 7) {neorv32_uart0_printf(" > Load/store wait cycles : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER10)); } + if (num_hpm_cnts_global > 8) {neorv32_uart0_printf(" > Entered traps : %u\n", (uint32_t)neorv32_cpu_csr_read(CSR_MHPMCOUNTER11)); } neorv32_uart0_printf("\n"); } From c0ab01bea5a878d0d93aca359b6c86b8a6184520 Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Sat, 9 Mar 2024 22:54:54 +0100 Subject: [PATCH 2/9] [cache] heavy cleanups and optimizations --- rtl/core/neorv32_cache.vhd | 172 +++++++++++++++++++------------------ 1 file changed, 90 insertions(+), 82 deletions(-) diff --git a/rtl/core/neorv32_cache.vhd b/rtl/core/neorv32_cache.vhd index 30fe848c3..a282fda92 100644 --- a/rtl/core/neorv32_cache.vhd +++ b/rtl/core/neorv32_cache.vhd @@ -196,7 +196,7 @@ begin -- Check if Direct/Uncached Access -------------------------------------------------------- -- ------------------------------------------------------------------------------------------- dir_acc_d <= '1' when (host_req_i.addr(31 downto 28) = UC_BEGIN) or -- uncached memory page - (host_req_i.rvso = '1') else '0'; -- atomic )reservation set) operation + (host_req_i.rvso = '1') else '0'; -- atomic (reservation set) operation -- request splitter: cached or direct access -- req_splitter: process(host_req_i, dir_acc_d) @@ -810,58 +810,71 @@ end neorv32_cache_bus; architecture neorv32_cache_bus_rtl of neorv32_cache_bus is -- cache layout -- - constant offset_size_c : natural := index_size_f(BLOCK_SIZE); + constant offset_size_c : natural := index_size_f(BLOCK_SIZE/4); -- WORD offset! constant index_size_c : natural := index_size_f(NUM_BLOCKS); - constant tag_lsb_c : natural := index_size_c + offset_size_c; + constant tag_size_c : natural := 32 - (offset_size_c + index_size_c + 2); -- host request buffer -- signal hreq : bus_req_t; - -- control engine -- - type ctrl_state_t is (S_IDLE, S_CHECK_PRE, S_CHECK, S_DOWNLOAD_REQ, S_DOWNLOAD_RSP, - S_UPLOAD_GET, S_UPLOAD_REQ, S_UPLOAD_RSP, S_FLUSH_0, S_FLUSH_1, S_FLUSH_2); - type ctrl_t is record - state, state_nxt : ctrl_state_t; -- FSM state - upret, upret_nxt : ctrl_state_t; -- upload-done return state - addr, addr_nxt : std_ulogic_vector(31 downto 0); -- address generator - bcnt, bcnt_nxt : std_ulogic_vector(index_size_c-1 downto 0); -- block counter + -- control fsm -- + type state_t is (S_IDLE, S_CHECK, S_DOWNLOAD_REQ, S_DOWNLOAD_RSP, S_UPLOAD_GET, S_UPLOAD_REQ, S_UPLOAD_RSP, S_FLUSH_START, S_FLUSH_READ, S_FLUSH_CHECK); + signal state, upret, state_nxt, upret_nxt: state_t; + + -- address generator -- + type addr_t is record + tag : std_ulogic_vector(tag_size_c-1 downto 0); + ind : std_ulogic_vector(index_size_c-1 downto 0); + off : std_ulogic_vector(offset_size_c-1 downto 0); -- WORD offset! end record; - signal ctrl : ctrl_t; + signal haddr, baddr, addr, addr_nxt : addr_t; begin + -- Address Decomposition ------------------------------------------------------------------ + -- ------------------------------------------------------------------------------------------- + -- base address of original host access -- + haddr.tag <= host_req_i.addr(31 downto (32-tag_size_c)); + haddr.ind <= host_req_i.addr((offset_size_c+2+index_size_c)-1 downto offset_size_c+2); + haddr.off <= (others => '0'); -- unused + + -- base address of indexed cache block -- + baddr.tag <= base_i(31 downto (32-tag_size_c)); + baddr.ind <= base_i((offset_size_c+2+index_size_c)-1 downto offset_size_c+2); + baddr.off <= (others => '0'); -- unused + + -- Control Engine FSM Sync ---------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- ctrl_engine_sync: process(rstn_i, clk_i) begin if (rstn_i = '0') then - ctrl.state <= S_IDLE; - ctrl.upret <= S_IDLE; - ctrl.addr <= (others => '0'); - ctrl.bcnt <= (others => '0'); - hreq <= req_terminate_c; + state <= S_IDLE; + upret <= S_IDLE; + addr.tag <= (others => '0'); + addr.ind <= (others => '0'); + addr.off <= (others => '0'); + hreq <= req_terminate_c; elsif rising_edge(clk_i) then - ctrl.state <= ctrl.state_nxt; - ctrl.upret <= ctrl.upret_nxt; - ctrl.addr <= ctrl.addr_nxt; - ctrl.bcnt <= ctrl.bcnt_nxt; - hreq <= host_req_i; + state <= state_nxt; + upret <= upret_nxt; + addr <= addr_nxt; + hreq <= host_req_i; end if; end process ctrl_engine_sync; -- Control Engine FSM Comb ---------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - ctrl_engine_comb: process(ctrl, hreq, host_req_i, bus_rsp_i, cmd_sync_i, cmd_miss_i, rdata_i, dirty_i, base_i) + ctrl_engine_comb: process(state, upret, addr, hreq, haddr, baddr, bus_rsp_i, cmd_sync_i, cmd_miss_i, rdata_i, dirty_i) begin - -- control defaults -- - ctrl.state_nxt <= ctrl.state; - ctrl.upret_nxt <= ctrl.upret; - ctrl.addr_nxt <= ctrl.addr; - ctrl.bcnt_nxt <= ctrl.bcnt; + -- control engine defaults -- + state_nxt <= state; + upret_nxt <= upret; + addr_nxt <= addr; -- cache defaults -- - addr_o <= ctrl.addr; + addr_o <= addr.tag & addr.ind & addr.off & "00"; -- always word-aligned we_o <= (others => '0'); swe_o <= '0'; wdata_o <= bus_rsp_i.data; @@ -873,46 +886,42 @@ begin -- bus interface defaults -- bus_req_o <= req_terminate_c; -- all-zero - bus_req_o.addr <= ctrl.addr(31 downto 2) & "00"; -- always word-aligned + bus_req_o.addr <= addr.tag & addr.ind & addr.off & "00"; -- always word-aligned bus_req_o.data <= rdata_i; bus_req_o.ben <= (others => '1'); -- full-word writes only bus_req_o.priv <= hreq.priv; -- keep original privilege level -- fsm -- - case ctrl.state is + case state is when S_IDLE => -- wait for request -- ------------------------------------------------------------ - ctrl.addr_nxt(offset_size_c-1 downto 0) <= (others => '0'); -- align block base address - ctrl.bcnt_nxt <= (others => '0'); -- reset block counter + addr_nxt.off <= (others => '0'); -- align block base address for upload/download (and flush) if (cmd_sync_i = '1') then -- cache sync - ctrl.state_nxt <= S_FLUSH_0; + state_nxt <= S_FLUSH_START; elsif (cmd_miss_i = '1') then -- cache miss - ctrl.addr_nxt(31 downto offset_size_c) <= host_req_i.addr(31 downto offset_size_c); -- buffer original tag + index for cache look-up - ctrl.state_nxt <= S_CHECK_PRE; + state_nxt <= S_CHECK; end if; - when S_CHECK_PRE => -- cache memory access latency + when S_CHECK => -- check if accessed block is dirty (cache address is still applied by host controller!) -- ------------------------------------------------------------ - ctrl.state_nxt <= S_CHECK; - - when S_CHECK => -- check if accessed block is dirty - -- ------------------------------------------------------------ - ctrl.upret_nxt <= S_DOWNLOAD_REQ; -- go straight to S_DOWNLOAD_REQ after S_UPLOAD_GET is completed (if executed) + upret_nxt <= S_DOWNLOAD_REQ; -- go straight to S_DOWNLOAD_REQ when S_UPLOAD_GET has completed (if executed) if (dirty_i = '1') then -- block is dirty, upload first - ctrl.addr_nxt(31 downto offset_size_c) <= base_i(31 downto offset_size_c); -- base address of accessed block - ctrl.state_nxt <= S_UPLOAD_GET; + addr_nxt.tag <= baddr.tag; -- base address (tag + index) of accessed block + addr_nxt.ind <= baddr.ind; + state_nxt <= S_UPLOAD_GET; else -- block is clean, download new block and override - ctrl.addr_nxt(31 downto offset_size_c) <= host_req_i.addr(31 downto offset_size_c); -- base address of requested block - ctrl.state_nxt <= S_DOWNLOAD_REQ; + addr_nxt.tag <= haddr.tag; -- base address (tag + index) of requested block + addr_nxt.ind <= haddr.ind; + state_nxt <= S_DOWNLOAD_REQ; end if; when S_DOWNLOAD_REQ => -- download new cache block: request new word -- ------------------------------------------------------------ - bus_req_o.rw <= '0'; -- read access - bus_req_o.stb <= '1'; -- request new transfer - ctrl.state_nxt <= S_DOWNLOAD_RSP; + bus_req_o.rw <= '0'; -- read access + bus_req_o.stb <= '1'; -- request new transfer + state_nxt <= S_DOWNLOAD_RSP; when S_DOWNLOAD_RSP => -- download new cache block: wait for bus response -- ------------------------------------------------------------ @@ -921,77 +930,76 @@ begin swe_o <= '1'; -- cache: write status bit (bus error response) new_o <= '1'; -- set new block (set tag, make valid, make clean) if (bus_rsp_i.ack = '1') or (bus_rsp_i.err = '1') then -- wait for response - ctrl.addr_nxt(offset_size_c-1 downto 2) <= std_ulogic_vector(unsigned(ctrl.addr(offset_size_c-1 downto 2)) + 1); - if (and_reduce_f(ctrl.addr(offset_size_c-1 downto 2)) = '1') then -- block completed? offset will be all-zero again after block completion - ctrl.state_nxt <= S_IDLE; + addr_nxt.off <= std_ulogic_vector(unsigned(addr.off) + 1); + if (and_reduce_f(addr.off) = '1') then -- block completed? offset will be all-zero again after block completion + state_nxt <= S_IDLE; else -- get next word - ctrl.state_nxt <= S_DOWNLOAD_REQ; + state_nxt <= S_DOWNLOAD_REQ; end if; end if; when S_UPLOAD_GET => -- upload dirty cache block: read word from cache -- ------------------------------------------------------------ - bus_req_o.rw <= '1'; -- write access - ctrl.state_nxt <= S_UPLOAD_REQ; + bus_req_o.rw <= '1'; -- write access + state_nxt <= S_UPLOAD_REQ; when S_UPLOAD_REQ => -- upload dirty cache block: request bus write -- ------------------------------------------------------------ - bus_req_o.rw <= '1'; -- write access - bus_req_o.stb <= '1'; -- request new transfer - ctrl.state_nxt <= S_UPLOAD_RSP; + bus_req_o.rw <= '1'; -- write access + bus_req_o.stb <= '1'; -- request new transfer + state_nxt <= S_UPLOAD_RSP; when S_UPLOAD_RSP => -- upload dirty cache block: wait for bus response -- ------------------------------------------------------------ bus_req_o.rw <= '1'; -- write access new_o <= '1'; -- set new block (set tag, make valid, make clean) if (bus_rsp_i.ack = '1') or (bus_rsp_i.err = '1') then -- wait for response - ctrl.addr_nxt(offset_size_c-1 downto 2) <= std_ulogic_vector(unsigned(ctrl.addr(offset_size_c-1 downto 2)) + 1); - if (and_reduce_f(ctrl.addr(offset_size_c-1 downto 2)) = '1') then -- block completed? offset will be all-zero again after block completion - ctrl.state_nxt <= ctrl.upret; -- go back to "upload-done return state" + addr_nxt.off <= std_ulogic_vector(unsigned(addr.off) + 1); + if (and_reduce_f(addr.off) = '1') then -- block completed? offset will be all-zero again after block completion + state_nxt <= upret; -- go back to "upload-done return state" else -- get next word - ctrl.state_nxt <= S_UPLOAD_GET; + state_nxt <= S_UPLOAD_GET; end if; end if; - when S_FLUSH_0 => -- cache access latency cycle + when S_FLUSH_START => -- start checking for dirty blocks -- ------------------------------------------------------------ - ctrl.addr_nxt(tag_lsb_c-1 downto offset_size_c) <= ctrl.bcnt; -- current block to check if dirty - ctrl.state_nxt <= S_FLUSH_1; + addr_nxt.ind <= (others => '0'); -- start with index 0 + upret_nxt <= S_FLUSH_CHECK; -- come back to S_FLUSH_CHECK after block upload + state_nxt <= S_FLUSH_READ; - when S_FLUSH_1 => -- sync. cache memory read latency cycle + when S_FLUSH_READ => -- cache read access latency cycle -- ------------------------------------------------------------ - ctrl.state_nxt <= S_FLUSH_2; + state_nxt <= S_FLUSH_CHECK; - when S_FLUSH_2 => -- check if currently indexed block is dirty + when S_FLUSH_CHECK => -- check if currently indexed block is dirty -- ------------------------------------------------------------ - ctrl.upret_nxt <= S_FLUSH_2; -- come back here after upload - inval_o <= '1'; -- invalidate currently checked block - ctrl.addr_nxt(31 downto offset_size_c) <= base_i(31 downto offset_size_c); -- tag + index of currently checked block - -- check if dirty / upload required -- - if (dirty_i = '1') then -- upload dirty block to main memory - ctrl.state_nxt <= S_UPLOAD_GET; + addr_nxt.tag <= baddr.tag; -- tag of currently index block + inval_o <= '1'; -- invalidate currently index block + if (dirty_i = '1') then -- block dirty? + state_nxt <= S_UPLOAD_GET; else -- move on to next block - ctrl.bcnt_nxt <= std_ulogic_vector(unsigned(ctrl.bcnt) + 1); - if (and_reduce_f(ctrl.bcnt) = '1') then -- all blocks done? - bus_req_o.fence <= '1'; -- forward fence (sync) to downstream memories - ctrl.state_nxt <= S_IDLE; + addr_nxt.ind <= std_ulogic_vector(unsigned(addr.ind) + 1); + if (and_reduce_f(addr.ind) = '1') then -- all blocks done? + bus_req_o.fence <= '1'; -- forward fence request to downstream memories + state_nxt <= S_IDLE; else -- go to next block - ctrl.state_nxt <= S_FLUSH_0; + state_nxt <= S_FLUSH_READ; end if; end if; when others => -- undefined -- ------------------------------------------------------------ - ctrl.state_nxt <= S_IDLE; + state_nxt <= S_IDLE; end case; end process ctrl_engine_comb; - -- bus arbiter operation in progress -- - cmd_busy_o <= '0' when (ctrl.state = S_IDLE) else '1'; + -- bus arbiter operation in progress (host keeps allying cache address while bud unit reports idle state) -- + cmd_busy_o <= '0' when (state = S_IDLE) or (state = S_CHECK) else '1'; end neorv32_cache_bus_rtl; From b1d3df234d2ff3978051fbea6a9f38cacf5020ef Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Sat, 9 Mar 2024 22:55:21 +0100 Subject: [PATCH 3/9] [rtl] update to version 1.9.6.2 --- rtl/core/neorv32_package.vhd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/core/neorv32_package.vhd b/rtl/core/neorv32_package.vhd index 2b0267ad1..00ca301f2 100644 --- a/rtl/core/neorv32_package.vhd +++ b/rtl/core/neorv32_package.vhd @@ -53,7 +53,7 @@ package neorv32_package is -- Architecture Constants ----------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01090601"; -- hardware version + constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01090602"; -- hardware version constant archid_c : natural := 19; -- official RISC-V architecture ID constant XLEN : natural := 32; -- native data path width From f5887702bf5b0f70c1ab2cb02d6abbecaf2d1b6c Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Sat, 9 Mar 2024 23:37:12 +0100 Subject: [PATCH 4/9] [cache] bug fixes - fix direct access address detection logic - use a real bus switch: buffer direct access requests in case a cache flush is in progress --- rtl/core/neorv32_cache.vhd | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/rtl/core/neorv32_cache.vhd b/rtl/core/neorv32_cache.vhd index a282fda92..8ac9d5ee6 100644 --- a/rtl/core/neorv32_cache.vhd +++ b/rtl/core/neorv32_cache.vhd @@ -195,7 +195,7 @@ begin -- Check if Direct/Uncached Access -------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - dir_acc_d <= '1' when (host_req_i.addr(31 downto 28) = UC_BEGIN) or -- uncached memory page + dir_acc_d <= '1' when (host_req_i.addr(31 downto 28) >= UC_BEGIN) or -- uncached memory page (host_req_i.rvso = '1') else '0'; -- atomic (reservation set) operation -- request splitter: cached or direct access -- @@ -294,7 +294,7 @@ begin cache_in <= cache_in_host when (cmd_busy = '0') else cache_in_bus; - -- Bus Access Arbiter (Handle Cache Miss and Flush/Reload)--------------------------------- + -- Bus Access Arbiter (Handle Cache Miss and Flush/Reload) -------------------------------- -- ------------------------------------------------------------------------------------------- neorv32_cache_bus_inst: neorv32_cache_bus generic map ( @@ -328,10 +328,26 @@ begin rdata_i => cache_out.rdata -- read data ); - -- simple bus multiplexer (as there won't be simultaneous access requests) -- - bus_req_o <= bus_req when (cmd_busy = '1') else dir_req_q; - dir_rsp_d <= bus_rsp_i; - bus_rsp <= bus_rsp_i; + + -- Bus Access Switch ---------------------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + -- Use a real switch here to buffer direct access requests + -- during out-of-band cache operation (fence / cache flush). + neorv32_cache_bus_switch: entity neorv32.neorv32_bus_switch + generic map ( + PORT_A_READ_ONLY => false, + PORT_B_READ_ONLY => false + ) + port map ( + clk_i => clk_i, + rstn_i => rstn_i, + a_req_i => bus_req, + a_rsp_o => bus_rsp, + b_req_i => dir_req_q, + b_rsp_o => dir_rsp_d, + x_req_o => bus_req_o, + x_rsp_i => bus_rsp_i + ); end neorv32_cache_rtl; From aa9164efe18b932933a16f92950a36249d17a2bd Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Sat, 9 Mar 2024 23:37:29 +0100 Subject: [PATCH 5/9] [processor_check] minor fix --- sw/example/processor_check/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sw/example/processor_check/main.c b/sw/example/processor_check/main.c index 2f5ad1dd2..4f134dff2 100644 --- a/sw/example/processor_check/main.c +++ b/sw/example/processor_check/main.c @@ -990,7 +990,8 @@ int main() { neorv32_cpu_csr_write(CSR_MIE, 1 << CSR_MIE_MTIE); neorv32_mtime_set_timecmp(0); // force interrupt - int test_cnt = 0; + volatile int test_cnt = 0; + while(test_cnt < 3) { test_cnt++; } From 8b44e02cc895403c1c2995a124216849ebceadb2 Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Sun, 10 Mar 2024 12:36:20 +0100 Subject: [PATCH 6/9] [rtl] minor comment edits --- rtl/core/neorv32_dcache.vhd | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rtl/core/neorv32_dcache.vhd b/rtl/core/neorv32_dcache.vhd index 762e6637e..9d3381fe1 100644 --- a/rtl/core/neorv32_dcache.vhd +++ b/rtl/core/neorv32_dcache.vhd @@ -342,6 +342,7 @@ begin ctrl_rdata_o => cache.ctrl_rdata ); + end neorv32_dcache_rtl; @@ -460,11 +461,11 @@ begin -- ------------------------------------------------------------------------------------------- host_acc_addr.tag <= host_addr_i(31 downto 31-(cache_tag_size_c-1)); host_acc_addr.index <= host_addr_i(31-cache_tag_size_c downto 2+cache_offset_size_c); - host_acc_addr.offset <= host_addr_i(2+(cache_offset_size_c-1) downto 2); -- discard byte offset + host_acc_addr.offset <= host_addr_i(2+(cache_offset_size_c-1) downto 2); -- word offset ctrl_acc_addr.tag <= ctrl_addr_i(31 downto 31-(cache_tag_size_c-1)); ctrl_acc_addr.index <= ctrl_addr_i(31-cache_tag_size_c downto 2+cache_offset_size_c); - ctrl_acc_addr.offset <= ctrl_addr_i(2+(cache_offset_size_c-1) downto 2); -- discard byte offset + ctrl_acc_addr.offset <= ctrl_addr_i(2+(cache_offset_size_c-1) downto 2); -- word offset -- Status Flag Memory --------------------------------------------------------------------- From 64a59a1492e6125c4b305c1b3349b5d30fdf6ec7 Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Sun, 10 Mar 2024 12:36:55 +0100 Subject: [PATCH 7/9] [rtl] fpu is now in "beta" state thanks to @mikaelsky --- rtl/core/neorv32_cpu_cp_fpu.vhd | 6 ------ 1 file changed, 6 deletions(-) diff --git a/rtl/core/neorv32_cpu_cp_fpu.vhd b/rtl/core/neorv32_cpu_cp_fpu.vhd index 755d03e26..8f64d05c3 100644 --- a/rtl/core/neorv32_cpu_cp_fpu.vhd +++ b/rtl/core/neorv32_cpu_cp_fpu.vhd @@ -287,12 +287,6 @@ architecture neorv32_cpu_cp_fpu_rtl of neorv32_cpu_cp_fpu is begin - -- Sanity Checks -------------------------------------------------------------------------- - -- ------------------------------------------------------------------------------------------- - assert false report - "[NEORV32] The floating-point unit (Zfinx ISA extension) is still in experimental state." severity warning; - - -- **************************************************************************************************************************** -- Control -- **************************************************************************************************************************** From fd089b8b7c6221d502d945b3322289e997daf681 Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Sun, 10 Mar 2024 12:38:13 +0100 Subject: [PATCH 8/9] [cpu] fix minor HW trigger timing issue fix a minor corner case bug: if the hw trigger fires at the same time as an interrupt the hw trigger should trigger the resulting trap --- rtl/core/neorv32_cpu_control.vhd | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rtl/core/neorv32_cpu_control.vhd b/rtl/core/neorv32_cpu_control.vhd index 05689173e..d54b7776c 100644 --- a/rtl/core/neorv32_cpu_control.vhd +++ b/rtl/core/neorv32_cpu_control.vhd @@ -368,9 +368,9 @@ begin when IF_REQUEST => -- request next 32-bit-aligned instruction word -- ------------------------------------------------------------ - if (ipb.free = "11") then -- wait for free IPB space + if (ipb.free = "11") then -- free IPB space? fetch_engine.state <= IF_PENDING; - elsif (fetch_engine.restart = '1') or (fetch_engine.reset = '1') then -- restart request due to branch + elsif (fetch_engine.restart = '1') or (fetch_engine.reset = '1') then -- restart because of branch fetch_engine.state <= IF_RESTART; end if; @@ -849,14 +849,14 @@ begin -- state machine -- case execute_engine.state is - when DISPATCH => -- Wait for ISSUE ENGINE to emit valid instruction word + when DISPATCH => -- Wait for ISSUE ENGINE to emit a valid instruction word -- ------------------------------------------------------------ if (trap_ctrl.env_pending = '1') or (trap_ctrl.exc_fire = '1') then -- pending trap or pending exception (fast) execute_engine.state_nxt <= TRAP_ENTER; elsif (CPU_EXTENSION_RISCV_Sdtrig = true) and (hw_trigger_match = '1') then -- hardware breakpoint execute_engine.pc_we <= '1'; -- pc <= next_pc trap_ctrl.hwtrig <= '1'; - execute_engine.state_nxt <= TRAP_ENTER; + execute_engine.state_nxt <= DISPATCH; -- stay here another round until trap_ctrl.hwtrig arrives in trap_ctrl.env_pending elsif (issue_engine.valid(0) = '1') or (issue_engine.valid(1) = '1') then -- new instruction word available issue_engine.ack <= '1'; trap_ctrl.instr_be <= issue_engine.data(32); -- access fault during instruction fetch @@ -1413,7 +1413,7 @@ begin -- break point -- if (CPU_EXTENSION_RISCV_Sdext = true) then trap_ctrl.exc_buf(exc_ebreak_c) <= (not trap_ctrl.env_enter) and (trap_ctrl.exc_buf(exc_ebreak_c) or - (trap_ctrl.hwtrig and (not csr.tdata1_action)) or -- trigger module fires and enter-debug is disabled + (trap_ctrl.hwtrig and (not csr.tdata1_action)) or -- trigger module fires and enter-debug-action is disabled (trap_ctrl.ebreak and ( csr.privilege) and (not csr.dcsr_ebreakm) and (not debug_ctrl.running)) or -- enter M-mode handler on ebreak in M-mode (trap_ctrl.ebreak and (not csr.privilege) and (not csr.dcsr_ebreaku) and (not debug_ctrl.running))); -- enter M-mode handler on ebreak in U-mode else @@ -1550,7 +1550,7 @@ begin trap_ctrl.env_pending <= '0'; elsif rising_edge(clk_i) then if (trap_ctrl.env_pending = '0') then -- no pending trap environment yet - -- trigger IRQ only in EXECUTE states to *continue execution* even if there are permanent interrupt requests + -- trigger IRQ only in EXECUTE state -- if (trap_ctrl.exc_fire = '1') or ((trap_ctrl.irq_fire = '1') and (execute_engine.state = EXECUTE)) then trap_ctrl.env_pending <= '1'; -- now execute engine can start trap handling end if; From da79ac9e4fb8345ca323ebc50288d22f103ee9ef Mon Sep 17 00:00:00 2001 From: stnolting <22944758+stnolting@users.noreply.github.com> Date: Sun, 10 Mar 2024 12:50:01 +0100 Subject: [PATCH 9/9] [changelog] add v1.9.6.2 --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7f48f393..b9c4063f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12 | Date | Version | Comment | Link | |:----:|:-------:|:--------|:----:| +| 10.03.2024 | 1.9.6.2 | minor rtl clean-ups, optimizations and fixes | [#843](https://github.com/stnolting/neorv32/pull/843) | | 09.03.2024 | 1.9.6.1 | add generic cache module (not used yet) | [#842](https://github.com/stnolting/neorv32/pull/842) | | 01.03.2024 | [**:rocket:1.9.6**](https://github.com/stnolting/neorv32/releases/tag/v1.9.6) | **New release** | | | 25.02.2024 | 1.9.5.10 | :bug: fix minor GPTMR threshold configuration issue | [#834](https://github.com/stnolting/neorv32/pull/834) |