Skip to content

Commit

Permalink
rtl cleanups and optimizations (#843)
Browse files Browse the repository at this point in the history
  • Loading branch information
stnolting committed Mar 10, 2024
2 parents a2bc846 + da79ac9 commit 46baf5a
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 114 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12

| Date | Version | Comment | Link |
|:----:|:-------:|:--------|:----:|
| 10.03.2024 | 1.9.6.2 | minor rtl clean-ups, optimizations and fixes | [#843](https://github.com/stnolting/neorv32/pull/843) |
| 09.03.2024 | 1.9.6.1 | add generic cache module (not used yet) | [#842](https://github.com/stnolting/neorv32/pull/842) |
| 01.03.2024 | [**:rocket:1.9.6**](https://github.com/stnolting/neorv32/releases/tag/v1.9.6) | **New release** | |
| 25.02.2024 | 1.9.5.10 | :bug: fix minor GPTMR threshold configuration issue | [#834](https://github.com/stnolting/neorv32/pull/834) |
Expand Down
200 changes: 112 additions & 88 deletions rtl/core/neorv32_cache.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,8 @@ begin

-- Check if Direct/Uncached Access --------------------------------------------------------
-- -------------------------------------------------------------------------------------------
dir_acc_d <= '1' when (host_req_i.addr(31 downto 28) = UC_BEGIN) or -- uncached memory page
(host_req_i.rvso = '1') else '0'; -- atomic )reservation set) operation
dir_acc_d <= '1' when (host_req_i.addr(31 downto 28) >= UC_BEGIN) or -- uncached memory page
(host_req_i.rvso = '1') else '0'; -- atomic (reservation set) operation

-- request splitter: cached or direct access --
req_splitter: process(host_req_i, dir_acc_d)
Expand Down Expand Up @@ -294,7 +294,7 @@ begin
cache_in <= cache_in_host when (cmd_busy = '0') else cache_in_bus;


-- Bus Access Arbiter (Handle Cache Miss and Flush/Reload)---------------------------------
-- Bus Access Arbiter (Handle Cache Miss and Flush/Reload) --------------------------------
-- -------------------------------------------------------------------------------------------
neorv32_cache_bus_inst: neorv32_cache_bus
generic map (
Expand Down Expand Up @@ -328,10 +328,26 @@ begin
rdata_i => cache_out.rdata -- read data
);

-- simple bus multiplexer (as there won't be simultaneous access requests) --
bus_req_o <= bus_req when (cmd_busy = '1') else dir_req_q;
dir_rsp_d <= bus_rsp_i;
bus_rsp <= bus_rsp_i;

-- Bus Access Switch ----------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
-- Use a real switch here to buffer direct access requests
-- during out-of-band cache operation (fence / cache flush).
neorv32_cache_bus_switch: entity neorv32.neorv32_bus_switch
generic map (
PORT_A_READ_ONLY => false,
PORT_B_READ_ONLY => false
)
port map (
clk_i => clk_i,
rstn_i => rstn_i,
a_req_i => bus_req,
a_rsp_o => bus_rsp,
b_req_i => dir_req_q,
b_rsp_o => dir_rsp_d,
x_req_o => bus_req_o,
x_rsp_i => bus_rsp_i
);


end neorv32_cache_rtl;
Expand Down Expand Up @@ -810,58 +826,71 @@ end neorv32_cache_bus;
architecture neorv32_cache_bus_rtl of neorv32_cache_bus is

-- cache layout --
constant offset_size_c : natural := index_size_f(BLOCK_SIZE);
constant offset_size_c : natural := index_size_f(BLOCK_SIZE/4); -- WORD offset!
constant index_size_c : natural := index_size_f(NUM_BLOCKS);
constant tag_lsb_c : natural := index_size_c + offset_size_c;
constant tag_size_c : natural := 32 - (offset_size_c + index_size_c + 2);

-- host request buffer --
signal hreq : bus_req_t;

-- control engine --
type ctrl_state_t is (S_IDLE, S_CHECK_PRE, S_CHECK, S_DOWNLOAD_REQ, S_DOWNLOAD_RSP,
S_UPLOAD_GET, S_UPLOAD_REQ, S_UPLOAD_RSP, S_FLUSH_0, S_FLUSH_1, S_FLUSH_2);
type ctrl_t is record
state, state_nxt : ctrl_state_t; -- FSM state
upret, upret_nxt : ctrl_state_t; -- upload-done return state
addr, addr_nxt : std_ulogic_vector(31 downto 0); -- address generator
bcnt, bcnt_nxt : std_ulogic_vector(index_size_c-1 downto 0); -- block counter
-- control fsm --
type state_t is (S_IDLE, S_CHECK, S_DOWNLOAD_REQ, S_DOWNLOAD_RSP, S_UPLOAD_GET, S_UPLOAD_REQ, S_UPLOAD_RSP, S_FLUSH_START, S_FLUSH_READ, S_FLUSH_CHECK);
signal state, upret, state_nxt, upret_nxt: state_t;

-- address generator --
type addr_t is record
tag : std_ulogic_vector(tag_size_c-1 downto 0);
ind : std_ulogic_vector(index_size_c-1 downto 0);
off : std_ulogic_vector(offset_size_c-1 downto 0); -- WORD offset!
end record;
signal ctrl : ctrl_t;
signal haddr, baddr, addr, addr_nxt : addr_t;

begin

-- Address Decomposition ------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
-- base address of original host access --
haddr.tag <= host_req_i.addr(31 downto (32-tag_size_c));
haddr.ind <= host_req_i.addr((offset_size_c+2+index_size_c)-1 downto offset_size_c+2);
haddr.off <= (others => '0'); -- unused

-- base address of indexed cache block --
baddr.tag <= base_i(31 downto (32-tag_size_c));
baddr.ind <= base_i((offset_size_c+2+index_size_c)-1 downto offset_size_c+2);
baddr.off <= (others => '0'); -- unused


-- Control Engine FSM Sync ----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
ctrl_engine_sync: process(rstn_i, clk_i)
begin
if (rstn_i = '0') then
ctrl.state <= S_IDLE;
ctrl.upret <= S_IDLE;
ctrl.addr <= (others => '0');
ctrl.bcnt <= (others => '0');
hreq <= req_terminate_c;
state <= S_IDLE;
upret <= S_IDLE;
addr.tag <= (others => '0');
addr.ind <= (others => '0');
addr.off <= (others => '0');
hreq <= req_terminate_c;
elsif rising_edge(clk_i) then
ctrl.state <= ctrl.state_nxt;
ctrl.upret <= ctrl.upret_nxt;
ctrl.addr <= ctrl.addr_nxt;
ctrl.bcnt <= ctrl.bcnt_nxt;
hreq <= host_req_i;
state <= state_nxt;
upret <= upret_nxt;
addr <= addr_nxt;
hreq <= host_req_i;
end if;
end process ctrl_engine_sync;


-- Control Engine FSM Comb ----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
ctrl_engine_comb: process(ctrl, hreq, host_req_i, bus_rsp_i, cmd_sync_i, cmd_miss_i, rdata_i, dirty_i, base_i)
ctrl_engine_comb: process(state, upret, addr, hreq, haddr, baddr, bus_rsp_i, cmd_sync_i, cmd_miss_i, rdata_i, dirty_i)
begin
-- control defaults --
ctrl.state_nxt <= ctrl.state;
ctrl.upret_nxt <= ctrl.upret;
ctrl.addr_nxt <= ctrl.addr;
ctrl.bcnt_nxt <= ctrl.bcnt;
-- control engine defaults --
state_nxt <= state;
upret_nxt <= upret;
addr_nxt <= addr;

-- cache defaults --
addr_o <= ctrl.addr;
addr_o <= addr.tag & addr.ind & addr.off & "00"; -- always word-aligned
we_o <= (others => '0');
swe_o <= '0';
wdata_o <= bus_rsp_i.data;
Expand All @@ -873,46 +902,42 @@ begin

-- bus interface defaults --
bus_req_o <= req_terminate_c; -- all-zero
bus_req_o.addr <= ctrl.addr(31 downto 2) & "00"; -- always word-aligned
bus_req_o.addr <= addr.tag & addr.ind & addr.off & "00"; -- always word-aligned
bus_req_o.data <= rdata_i;
bus_req_o.ben <= (others => '1'); -- full-word writes only
bus_req_o.priv <= hreq.priv; -- keep original privilege level

-- fsm --
case ctrl.state is
case state is

when S_IDLE => -- wait for request
-- ------------------------------------------------------------
ctrl.addr_nxt(offset_size_c-1 downto 0) <= (others => '0'); -- align block base address
ctrl.bcnt_nxt <= (others => '0'); -- reset block counter
addr_nxt.off <= (others => '0'); -- align block base address for upload/download (and flush)
if (cmd_sync_i = '1') then -- cache sync
ctrl.state_nxt <= S_FLUSH_0;
state_nxt <= S_FLUSH_START;
elsif (cmd_miss_i = '1') then -- cache miss
ctrl.addr_nxt(31 downto offset_size_c) <= host_req_i.addr(31 downto offset_size_c); -- buffer original tag + index for cache look-up
ctrl.state_nxt <= S_CHECK_PRE;
state_nxt <= S_CHECK;
end if;

when S_CHECK_PRE => -- cache memory access latency
-- ------------------------------------------------------------
ctrl.state_nxt <= S_CHECK;

when S_CHECK => -- check if accessed block is dirty
when S_CHECK => -- check if accessed block is dirty (cache address is still applied by host controller!)
-- ------------------------------------------------------------
ctrl.upret_nxt <= S_DOWNLOAD_REQ; -- go straight to S_DOWNLOAD_REQ after S_UPLOAD_GET is completed (if executed)
upret_nxt <= S_DOWNLOAD_REQ; -- go straight to S_DOWNLOAD_REQ when S_UPLOAD_GET has completed (if executed)
if (dirty_i = '1') then -- block is dirty, upload first
ctrl.addr_nxt(31 downto offset_size_c) <= base_i(31 downto offset_size_c); -- base address of accessed block
ctrl.state_nxt <= S_UPLOAD_GET;
addr_nxt.tag <= baddr.tag; -- base address (tag + index) of accessed block
addr_nxt.ind <= baddr.ind;
state_nxt <= S_UPLOAD_GET;
else -- block is clean, download new block and override
ctrl.addr_nxt(31 downto offset_size_c) <= host_req_i.addr(31 downto offset_size_c); -- base address of requested block
ctrl.state_nxt <= S_DOWNLOAD_REQ;
addr_nxt.tag <= haddr.tag; -- base address (tag + index) of requested block
addr_nxt.ind <= haddr.ind;
state_nxt <= S_DOWNLOAD_REQ;
end if;


when S_DOWNLOAD_REQ => -- download new cache block: request new word
-- ------------------------------------------------------------
bus_req_o.rw <= '0'; -- read access
bus_req_o.stb <= '1'; -- request new transfer
ctrl.state_nxt <= S_DOWNLOAD_RSP;
bus_req_o.rw <= '0'; -- read access
bus_req_o.stb <= '1'; -- request new transfer
state_nxt <= S_DOWNLOAD_RSP;

when S_DOWNLOAD_RSP => -- download new cache block: wait for bus response
-- ------------------------------------------------------------
Expand All @@ -921,77 +946,76 @@ begin
swe_o <= '1'; -- cache: write status bit (bus error response)
new_o <= '1'; -- set new block (set tag, make valid, make clean)
if (bus_rsp_i.ack = '1') or (bus_rsp_i.err = '1') then -- wait for response
ctrl.addr_nxt(offset_size_c-1 downto 2) <= std_ulogic_vector(unsigned(ctrl.addr(offset_size_c-1 downto 2)) + 1);
if (and_reduce_f(ctrl.addr(offset_size_c-1 downto 2)) = '1') then -- block completed? offset will be all-zero again after block completion
ctrl.state_nxt <= S_IDLE;
addr_nxt.off <= std_ulogic_vector(unsigned(addr.off) + 1);
if (and_reduce_f(addr.off) = '1') then -- block completed? offset will be all-zero again after block completion
state_nxt <= S_IDLE;
else -- get next word
ctrl.state_nxt <= S_DOWNLOAD_REQ;
state_nxt <= S_DOWNLOAD_REQ;
end if;
end if;


when S_UPLOAD_GET => -- upload dirty cache block: read word from cache
-- ------------------------------------------------------------
bus_req_o.rw <= '1'; -- write access
ctrl.state_nxt <= S_UPLOAD_REQ;
bus_req_o.rw <= '1'; -- write access
state_nxt <= S_UPLOAD_REQ;

when S_UPLOAD_REQ => -- upload dirty cache block: request bus write
-- ------------------------------------------------------------
bus_req_o.rw <= '1'; -- write access
bus_req_o.stb <= '1'; -- request new transfer
ctrl.state_nxt <= S_UPLOAD_RSP;
bus_req_o.rw <= '1'; -- write access
bus_req_o.stb <= '1'; -- request new transfer
state_nxt <= S_UPLOAD_RSP;

when S_UPLOAD_RSP => -- upload dirty cache block: wait for bus response
-- ------------------------------------------------------------
bus_req_o.rw <= '1'; -- write access
new_o <= '1'; -- set new block (set tag, make valid, make clean)
if (bus_rsp_i.ack = '1') or (bus_rsp_i.err = '1') then -- wait for response
ctrl.addr_nxt(offset_size_c-1 downto 2) <= std_ulogic_vector(unsigned(ctrl.addr(offset_size_c-1 downto 2)) + 1);
if (and_reduce_f(ctrl.addr(offset_size_c-1 downto 2)) = '1') then -- block completed? offset will be all-zero again after block completion
ctrl.state_nxt <= ctrl.upret; -- go back to "upload-done return state"
addr_nxt.off <= std_ulogic_vector(unsigned(addr.off) + 1);
if (and_reduce_f(addr.off) = '1') then -- block completed? offset will be all-zero again after block completion
state_nxt <= upret; -- go back to "upload-done return state"
else -- get next word
ctrl.state_nxt <= S_UPLOAD_GET;
state_nxt <= S_UPLOAD_GET;
end if;
end if;


when S_FLUSH_0 => -- cache access latency cycle
when S_FLUSH_START => -- start checking for dirty blocks
-- ------------------------------------------------------------
ctrl.addr_nxt(tag_lsb_c-1 downto offset_size_c) <= ctrl.bcnt; -- current block to check if dirty
ctrl.state_nxt <= S_FLUSH_1;
addr_nxt.ind <= (others => '0'); -- start with index 0
upret_nxt <= S_FLUSH_CHECK; -- come back to S_FLUSH_CHECK after block upload
state_nxt <= S_FLUSH_READ;

when S_FLUSH_1 => -- sync. cache memory read latency cycle
when S_FLUSH_READ => -- cache read access latency cycle
-- ------------------------------------------------------------
ctrl.state_nxt <= S_FLUSH_2;
state_nxt <= S_FLUSH_CHECK;

when S_FLUSH_2 => -- check if currently indexed block is dirty
when S_FLUSH_CHECK => -- check if currently indexed block is dirty
-- ------------------------------------------------------------
ctrl.upret_nxt <= S_FLUSH_2; -- come back here after upload
inval_o <= '1'; -- invalidate currently checked block
ctrl.addr_nxt(31 downto offset_size_c) <= base_i(31 downto offset_size_c); -- tag + index of currently checked block
-- check if dirty / upload required --
if (dirty_i = '1') then -- upload dirty block to main memory
ctrl.state_nxt <= S_UPLOAD_GET;
addr_nxt.tag <= baddr.tag; -- tag of currently index block
inval_o <= '1'; -- invalidate currently index block
if (dirty_i = '1') then -- block dirty?
state_nxt <= S_UPLOAD_GET;
else -- move on to next block
ctrl.bcnt_nxt <= std_ulogic_vector(unsigned(ctrl.bcnt) + 1);
if (and_reduce_f(ctrl.bcnt) = '1') then -- all blocks done?
bus_req_o.fence <= '1'; -- forward fence (sync) to downstream memories
ctrl.state_nxt <= S_IDLE;
addr_nxt.ind <= std_ulogic_vector(unsigned(addr.ind) + 1);
if (and_reduce_f(addr.ind) = '1') then -- all blocks done?
bus_req_o.fence <= '1'; -- forward fence request to downstream memories
state_nxt <= S_IDLE;
else -- go to next block
ctrl.state_nxt <= S_FLUSH_0;
state_nxt <= S_FLUSH_READ;
end if;
end if;


when others => -- undefined
-- ------------------------------------------------------------
ctrl.state_nxt <= S_IDLE;
state_nxt <= S_IDLE;

end case;
end process ctrl_engine_comb;

-- bus arbiter operation in progress --
cmd_busy_o <= '0' when (ctrl.state = S_IDLE) else '1';
-- bus arbiter operation in progress (host keeps allying cache address while bud unit reports idle state) --
cmd_busy_o <= '0' when (state = S_IDLE) or (state = S_CHECK) else '1';


end neorv32_cache_bus_rtl;
12 changes: 6 additions & 6 deletions rtl/core/neorv32_cpu_control.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -368,9 +368,9 @@ begin

when IF_REQUEST => -- request next 32-bit-aligned instruction word
-- ------------------------------------------------------------
if (ipb.free = "11") then -- wait for free IPB space
if (ipb.free = "11") then -- free IPB space?
fetch_engine.state <= IF_PENDING;
elsif (fetch_engine.restart = '1') or (fetch_engine.reset = '1') then -- restart request due to branch
elsif (fetch_engine.restart = '1') or (fetch_engine.reset = '1') then -- restart because of branch
fetch_engine.state <= IF_RESTART;
end if;

Expand Down Expand Up @@ -849,14 +849,14 @@ begin
-- state machine --
case execute_engine.state is

when DISPATCH => -- Wait for ISSUE ENGINE to emit valid instruction word
when DISPATCH => -- Wait for ISSUE ENGINE to emit a valid instruction word
-- ------------------------------------------------------------
if (trap_ctrl.env_pending = '1') or (trap_ctrl.exc_fire = '1') then -- pending trap or pending exception (fast)
execute_engine.state_nxt <= TRAP_ENTER;
elsif (CPU_EXTENSION_RISCV_Sdtrig = true) and (hw_trigger_match = '1') then -- hardware breakpoint
execute_engine.pc_we <= '1'; -- pc <= next_pc
trap_ctrl.hwtrig <= '1';
execute_engine.state_nxt <= TRAP_ENTER;
execute_engine.state_nxt <= DISPATCH; -- stay here another round until trap_ctrl.hwtrig arrives in trap_ctrl.env_pending
elsif (issue_engine.valid(0) = '1') or (issue_engine.valid(1) = '1') then -- new instruction word available
issue_engine.ack <= '1';
trap_ctrl.instr_be <= issue_engine.data(32); -- access fault during instruction fetch
Expand Down Expand Up @@ -1413,7 +1413,7 @@ begin
-- break point --
if (CPU_EXTENSION_RISCV_Sdext = true) then
trap_ctrl.exc_buf(exc_ebreak_c) <= (not trap_ctrl.env_enter) and (trap_ctrl.exc_buf(exc_ebreak_c) or
(trap_ctrl.hwtrig and (not csr.tdata1_action)) or -- trigger module fires and enter-debug is disabled
(trap_ctrl.hwtrig and (not csr.tdata1_action)) or -- trigger module fires and enter-debug-action is disabled
(trap_ctrl.ebreak and ( csr.privilege) and (not csr.dcsr_ebreakm) and (not debug_ctrl.running)) or -- enter M-mode handler on ebreak in M-mode
(trap_ctrl.ebreak and (not csr.privilege) and (not csr.dcsr_ebreaku) and (not debug_ctrl.running))); -- enter M-mode handler on ebreak in U-mode
else
Expand Down Expand Up @@ -1550,7 +1550,7 @@ begin
trap_ctrl.env_pending <= '0';
elsif rising_edge(clk_i) then
if (trap_ctrl.env_pending = '0') then -- no pending trap environment yet
-- trigger IRQ only in EXECUTE states to *continue execution* even if there are permanent interrupt requests
-- trigger IRQ only in EXECUTE state --
if (trap_ctrl.exc_fire = '1') or ((trap_ctrl.irq_fire = '1') and (execute_engine.state = EXECUTE)) then
trap_ctrl.env_pending <= '1'; -- now execute engine can start trap handling
end if;
Expand Down
6 changes: 0 additions & 6 deletions rtl/core/neorv32_cpu_cp_fpu.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,6 @@ architecture neorv32_cpu_cp_fpu_rtl of neorv32_cpu_cp_fpu is

begin

-- Sanity Checks --------------------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
assert false report
"[NEORV32] The floating-point unit (Zfinx ISA extension) is still in experimental state." severity warning;


-- ****************************************************************************************************************************
-- Control
-- ****************************************************************************************************************************
Expand Down
Loading

0 comments on commit 46baf5a

Please sign in to comment.