Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement fence instruction #724

Merged
merged 3 commits into from Dec 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
67 changes: 46 additions & 21 deletions rtl/cv32e40x_controller_fsm.sv
Expand Up @@ -170,6 +170,7 @@ module cv32e40x_controller_fsm import cv32e40x_pkg::*;
logic wfi_in_wb;
logic wfe_in_wb;
logic fencei_in_wb;
logic fence_in_wb;
logic mret_in_wb;
logic mret_ptr_in_wb; // CLIC pointer caused by mret is in WB
logic dret_in_wb;
Expand Down Expand Up @@ -212,8 +213,9 @@ module cv32e40x_controller_fsm import cv32e40x_pkg::*;

logic [10:0] exc_cause; // id of taken interrupt. Max width, unused bits are tied off.

logic fencei_ready;
logic fencei_flush_req_set;
logic fence_req_set;
logic fence_req_clr;
logic fence_req_q;
logic fencei_req_and_ack_q;
logic fencei_ongoing;

Expand Down Expand Up @@ -254,8 +256,6 @@ module cv32e40x_controller_fsm import cv32e40x_pkg::*;

assign id_stage_haltable = !(sequence_in_progress_id || clic_ptr_in_progress_id);

assign fencei_ready = !lsu_busy_i;

// Once the fencei handshake is initiated, it must complete and the instruction must retire.
// The instruction retires when fencei_req_and_ack_q = 1
assign fencei_ongoing = fencei_flush_req_o || fencei_req_and_ack_q;
Expand Down Expand Up @@ -354,6 +354,9 @@ module cv32e40x_controller_fsm import cv32e40x_pkg::*;
// fencei in wb
assign fencei_in_wb = ex_wb_pipe_i.sys_en && ex_wb_pipe_i.sys_fencei_insn && ex_wb_pipe_i.instr_valid;

// fence in wb
assign fence_in_wb = ex_wb_pipe_i.sys_en && ex_wb_pipe_i.sys_fence_insn && ex_wb_pipe_i.instr_valid;

// mret in wb - only valid when last_op_wb_i == 1 (which means only mret that did not cause a CLIC pointer fetch)
// Restricts CSR updates due to mret to not happen if the mret caused a CLIC pointer fetch, such CSR updates
// should only happen once the instruction fully completes (pointer arrives in WB).
Expand Down Expand Up @@ -452,7 +455,7 @@ module cv32e40x_controller_fsm import cv32e40x_pkg::*;
// or if an offloaded instruction is in WB.
// LSU will not be interruptible if the outstanding counter != 0, or
// a trans_valid has been clocked without ex_valid && wb_ready handshake.
// The cycle after fencei enters WB, the fencei handshake will be initiated. This must complete and the fencei instruction must retire before allowing external debug.
// When a fencei is present in WB and the LSU has completed all tranfers, the fencei handshake will be initiated. This must complete and the fencei instruction must retire before allowing external debug.
// Any multi operation instruction (table jumps, push/pop and double moves) may not be interrupted once the first operation has completed its operation in WB.
// - This is guarded with using the sequence_interruptible, which tracks sequence progress through the WB stage.
// When a CLIC pointer is in the pipeline stages EX or WB, we must block debug.
Expand Down Expand Up @@ -515,7 +518,7 @@ module cv32e40x_controller_fsm import cv32e40x_pkg::*;
// Offloaded instructions in WB also block, as they cannot be killed after commit_kill=0 (EX stage)
// LSU instructions which were suppressed due to previous exceptions or trigger match
// will be interruptable as they were converted to NOP in ID stage.
// The cycle after fencei enters WB, the fencei handshake will be initiated. This must complete and the fencei instruction must retire before allowing interrupts.
// When a fencei is present in WB and the LSU has completed all tranfers, the fencei handshake will be initiated. This must complete and the fencei instruction must retire before allowing interrupts.
// TODO:OK:low May allow interuption of Zce to idempotent memories
// Any multi operation instruction (table jumps, push/pop and double moves) may not be interrupted once the first operation has completed its operation in WB.
// - This is guarded with using the sequence_interruptible, which tracks sequence progress through the WB stage.
Expand Down Expand Up @@ -646,7 +649,8 @@ module cv32e40x_controller_fsm import cv32e40x_pkg::*;
// Ensure jumps and branches are taken only once
branch_taken_n = branch_taken_q;

fencei_flush_req_set = 1'b0;
fence_req_set = 1'b0;
fence_req_clr = 1'b1;

ctrl_fsm_o.pc_set_clicv = 1'b0;
ctrl_fsm_o.pc_set_tbljmp = 1'b0;
Expand Down Expand Up @@ -811,20 +815,36 @@ module cv32e40x_controller_fsm import cv32e40x_pkg::*;
ctrl_fsm_o.halt_wb = 1'b1;
ctrl_fsm_o.instr_req = 1'b0;
ctrl_fsm_ns = SLEEP;
end else if (fencei_in_wb) begin

end else if (fencei_in_wb || fence_in_wb) begin

// fence.i behavior:
//
// - Can be killed due to interrupts and debug at any time before fencei_flush_req_o is asserted (so even after initial cycle in WB).
// - Initially halt entire pipeline, making sure that possibly following loads/stores do not initiate transactions.
// - Wait until the LSU is ready (i.e. write buffer must also be empty and possible NMIs will have been raised).
// - Once the LSU is ready take the NMI if present or otherwise continue fence.i handling by initiating the fencei_flush_req_o handshake.
// - Once the fencei_flush_req_o handshake is complete flush the entire pipeline (branch to next instruction) and retire the fence.i.

// fence behavior:
//
// - Can be killed due to interrupts and debug at any time (so even after initial cycle in WB).
// - Initially halt entire pipeline, making sure that possibly following loads/stores do not initiate transactions.
// - Wait until the LSU is ready (i.e. write buffer must also be empty and possible NMIs will have been raised).
// - Once the LSU is ready take the NMI if present or otherwise continue fence handling.
// - Flush the entire pipeline (branch to next instruction) and retire the fence.

// Halt the pipeline
ctrl_fsm_o.halt_if = 1'b1;
ctrl_fsm_o.halt_id = 1'b1;
ctrl_fsm_o.halt_ex = 1'b1;
ctrl_fsm_o.halt_wb = 1'b1;

if (fencei_ready) begin
// Set fencei_flush_req_o in the next cycle
fencei_flush_req_set = 1'b1;
end
if (fencei_req_and_ack_q) begin
// fencei req and ack were set at in the same cycle, complete handshake and jump to PC_WB_PLUS4
// Set fence_req_q when the LSU is no longer busy
fence_req_set = !lsu_busy_i;
fence_req_clr = 1'b0;

if (fencei_in_wb ? fencei_req_and_ack_q : fence_req_q) begin

// Unhalt wb, kill if,id,ex
ctrl_fsm_o.kill_if = 1'b1;
Expand All @@ -844,7 +864,9 @@ module cv32e40x_controller_fsm import cv32e40x_pkg::*;
ctrl_fsm_o.pc_set = 1'b1;
ctrl_fsm_o.pc_mux = PC_WB_PLUS4;

fencei_flush_req_set = 1'b0;
// Clear fence_req_q
fence_req_set = 1'b0;
fence_req_clr = 1'b1;
end
end else if (dret_in_wb) begin
// dret takes jump from WB stage
Expand Down Expand Up @@ -1178,24 +1200,27 @@ module cv32e40x_controller_fsm import cv32e40x_pkg::*;
// Flops for fencei handshake request
always_ff @(posedge clk, negedge rst_n) begin
if (rst_n == 1'b0) begin
fencei_flush_req_o <= 1'b0;
fence_req_q <= 1'b0;
fencei_req_and_ack_q <= 1'b0;
end else begin

// Flop fencei_flush_ack_i to break timing paths
// fencei_flush_ack_i must be qualified with fencei_flush_req_o
fencei_req_and_ack_q <= fencei_flush_req_o && fencei_flush_ack_i;

// Set fencei_flush_req_o based on FSM output. Clear upon req&&ack.
if (fencei_flush_req_o && fencei_flush_ack_i) begin
fencei_flush_req_o <= 1'b0;
// Set and clear fence_req_q based on FSM output. Also clear when fence.i handshake is completed
if (fence_req_clr || (fencei_flush_req_o && fencei_flush_ack_i)) begin
fence_req_q <= 1'b0;
end
else if (fencei_flush_req_set) begin
fencei_flush_req_o <= 1'b1;
else if (fence_req_set) begin
fence_req_q <= 1'b1;
end
end
end

// Set flush request if we have a fence.i
assign fencei_flush_req_o = fencei_in_wb ? fence_req_q : 1'b0;

// minstret event
always_ff @(posedge clk, negedge rst_n) begin
if (rst_n == 1'b0) begin
Expand Down
2 changes: 2 additions & 0 deletions rtl/cv32e40x_decoder.sv
Expand Up @@ -47,6 +47,7 @@ module cv32e40x_decoder import cv32e40x_pkg::*;
output logic sys_ecall_insn_o, // Environment call (syscall) instruction encountered
output logic sys_wfi_insn_o,
output logic sys_wfe_insn_o,
output logic sys_fence_insn_o, // fence instruction
output logic sys_fencei_insn_o, // fence.i instruction

// from IF/ID pipeline
Expand Down Expand Up @@ -281,6 +282,7 @@ module cv32e40x_decoder import cv32e40x_pkg::*;
assign sys_ecall_insn_o = decoder_i_ctrl.sys_ecall_insn; // Only I decoder handles ECALL
assign sys_wfi_insn_o = decoder_i_ctrl.sys_wfi_insn; // Only I decoder handles WFI
assign sys_wfe_insn_o = decoder_i_ctrl.sys_wfe_insn; // Only I decoder handles WFE
assign sys_fence_insn_o = decoder_i_ctrl.sys_fence_insn; // Only I decoder handles FENCE
assign sys_fencei_insn_o = decoder_i_ctrl.sys_fencei_insn; // Only I decoder handles FENCE.I

// Suppress control signals
Expand Down
2 changes: 2 additions & 0 deletions rtl/cv32e40x_ex_stage.sv
Expand Up @@ -341,6 +341,7 @@ module cv32e40x_ex_stage import cv32e40x_pkg::*;
ex_wb_pipe_o.sys_dret_insn <= 1'b0;
ex_wb_pipe_o.sys_ebrk_insn <= 1'b0;
ex_wb_pipe_o.sys_ecall_insn <= 1'b0;
ex_wb_pipe_o.sys_fence_insn <= 1'b0;
ex_wb_pipe_o.sys_fencei_insn <= 1'b0;
ex_wb_pipe_o.sys_mret_insn <= 1'b0;
ex_wb_pipe_o.sys_wfi_insn <= 1'b0;
Expand Down Expand Up @@ -407,6 +408,7 @@ module cv32e40x_ex_stage import cv32e40x_pkg::*;
ex_wb_pipe_o.sys_dret_insn <= id_ex_pipe_i.sys_dret_insn;
ex_wb_pipe_o.sys_ebrk_insn <= id_ex_pipe_i.sys_ebrk_insn;
ex_wb_pipe_o.sys_ecall_insn <= id_ex_pipe_i.sys_ecall_insn;
ex_wb_pipe_o.sys_fence_insn <= id_ex_pipe_i.sys_fence_insn;
ex_wb_pipe_o.sys_fencei_insn <= id_ex_pipe_i.sys_fencei_insn;
ex_wb_pipe_o.sys_mret_insn <= id_ex_pipe_i.sys_mret_insn;
ex_wb_pipe_o.sys_wfi_insn <= id_ex_pipe_i.sys_wfi_insn;
Expand Down
5 changes: 2 additions & 3 deletions rtl/cv32e40x_i_decoder.sv
Expand Up @@ -274,11 +274,10 @@ module cv32e40x_i_decoder import cv32e40x_pkg::*;

OPCODE_FENCE: begin
decoder_ctrl_o.sys_en = 1'b1;
// todo: We may not want the fence handshake for regular (none .i) fences
unique case (instr_rdata_i[14:12])
3'b000: begin // FENCE (FENCE.I instead, a bit more conservative)
3'b000: begin // FENCE
// Flush pipeline
decoder_ctrl_o.sys_fencei_insn = 1'b1;
decoder_ctrl_o.sys_fence_insn = 1'b1;
end

3'b001: begin // FENCE.I
Expand Down
4 changes: 4 additions & 0 deletions rtl/cv32e40x_id_stage.sv
Expand Up @@ -154,6 +154,7 @@ module cv32e40x_id_stage import cv32e40x_pkg::*;

// SYS
logic sys_en;
logic sys_fence_insn;
logic sys_fencei_insn;
logic sys_ecall_insn;
logic sys_ebrk_insn;
Expand Down Expand Up @@ -430,6 +431,7 @@ module cv32e40x_id_stage import cv32e40x_pkg::*;
.sys_ecall_insn_o ( sys_ecall_insn ),
.sys_wfi_insn_o ( sys_wfi_insn ),
.sys_wfe_insn_o ( sys_wfe_insn ),
.sys_fence_insn_o ( sys_fence_insn ),
.sys_fencei_insn_o ( sys_fencei_insn ),

// from IF/ID pipeline
Expand Down Expand Up @@ -543,6 +545,7 @@ module cv32e40x_id_stage import cv32e40x_pkg::*;
id_ex_pipe_o.sys_dret_insn <= 1'b0;
id_ex_pipe_o.sys_ebrk_insn <= 1'b0;
id_ex_pipe_o.sys_ecall_insn <= 1'b0;
id_ex_pipe_o.sys_fence_insn <= 1'b0;
id_ex_pipe_o.sys_fencei_insn <= 1'b0;
id_ex_pipe_o.sys_mret_insn <= 1'b0;
id_ex_pipe_o.sys_wfi_insn <= 1'b0;
Expand Down Expand Up @@ -632,6 +635,7 @@ module cv32e40x_id_stage import cv32e40x_pkg::*;
id_ex_pipe_o.sys_dret_insn <= sys_dret_insn;
id_ex_pipe_o.sys_ebrk_insn <= sys_ebrk_insn;
id_ex_pipe_o.sys_ecall_insn <= sys_ecall_insn;
id_ex_pipe_o.sys_fence_insn <= sys_fence_insn;
id_ex_pipe_o.sys_fencei_insn <= sys_fencei_insn;
id_ex_pipe_o.sys_mret_insn <= sys_mret_insn;
id_ex_pipe_o.sys_wfi_insn <= sys_wfi_insn;
Expand Down
4 changes: 4 additions & 0 deletions rtl/include/cv32e40x_pkg.sv
Expand Up @@ -818,6 +818,7 @@ typedef struct packed {
logic sys_dret_insn;
logic sys_ebrk_insn;
logic sys_ecall_insn;
logic sys_fence_insn;
logic sys_fencei_insn;
logic sys_mret_insn;
logic sys_wfi_insn;
Expand Down Expand Up @@ -855,6 +856,7 @@ typedef struct packed {
sys_dret_insn : 1'b0,
sys_ebrk_insn : 1'b0,
sys_ecall_insn : 1'b0,
sys_fence_insn : 1'b0,
sys_fencei_insn : 1'b0,
sys_mret_insn : 1'b0,
sys_wfi_insn : 1'b0,
Expand Down Expand Up @@ -1134,6 +1136,7 @@ typedef struct packed {
logic sys_dret_insn;
logic sys_ebrk_insn;
logic sys_ecall_insn;
logic sys_fence_insn;
logic sys_fencei_insn;
logic sys_mret_insn;
logic sys_wfi_insn;
Expand Down Expand Up @@ -1203,6 +1206,7 @@ typedef struct packed {
logic sys_dret_insn;
logic sys_ebrk_insn;
logic sys_ecall_insn;
logic sys_fence_insn;
logic sys_fencei_insn;
logic sys_mret_insn;
logic sys_wfi_insn;
Expand Down
14 changes: 7 additions & 7 deletions sva/cv32e40x_controller_fsm_sva.sv
Expand Up @@ -71,7 +71,7 @@ module cv32e40x_controller_fsm_sva
input logic interrupt_allowed,
input logic pending_nmi,
input logic nmi_allowed,
input logic fencei_ready,
input logic lsu_busy_i,
input logic xif_commit_kill,
input logic xif_commit_valid,
input logic nmi_is_store_q,
Expand Down Expand Up @@ -243,11 +243,11 @@ module cv32e40x_controller_fsm_sva
fencei_flush_req_o |-> fencei_in_wb)
else `uvm_error("controller", "Fencei request when no fencei in writeback")

// Assert that the fencei request is set the cycle after fencei instruction enters WB (if fencei_ready=1 and there are no higher priority events)
// Assert that the fencei request is set the cycle after fencei instruction enters WB (if lsu_busy_i=0 and there are no higher priority events)
// Only check when no higher priority event is pending (nmi, async debug or interrupts) and WB stage is not killed
a_fencei_hndshk_req_when_fencei_wb :
assert property (@(posedge clk) disable iff (!rst_n)
$rose(fencei_in_wb && fencei_ready) && !ctrl_fsm_o.kill_wb && !(pending_nmi || (pending_async_debug && async_debug_allowed) || (pending_interrupt && interrupt_allowed))
$rose(fencei_in_wb && !lsu_busy_i) && !ctrl_fsm_o.kill_wb && !(pending_nmi || (pending_async_debug && async_debug_allowed) || (pending_interrupt && interrupt_allowed))
|=> $rose(fencei_flush_req_o))
else `uvm_error("controller", "Fencei in WB did not result in fencei_flush_req_o")

Expand Down Expand Up @@ -277,11 +277,11 @@ module cv32e40x_controller_fsm_sva
$rose(fencei_in_wb) |-> !(fencei_flush_req_o || fencei_req_and_ack_q))
else `uvm_error("controller", "Fencei handshake not idle when fencei instruction entered writeback")

// assert that the fencei_ready signal (i.e. write buffer empty) is always set when fencei handshake is active
a_fencei_ready :
// assert that the lsu_busy_i signal (i.e. write buffer empty) is always cleared when fencei handshake is active
a_fencei_lsu_busy :
assert property (@(posedge clk) disable iff (!rst_n)
fencei_flush_req_o |-> fencei_ready)
else `uvm_error("controller", "Fencei handshake active while fencei_ready = 0")
fencei_flush_req_o |-> !lsu_busy_i)
else `uvm_error("controller", "Fencei handshake active while lsu_busy_o = 1")

// assert that NMI's are not reported on irq_ack
a_irq_ack_no_nmi :
Expand Down