Skip to content

Commit

Permalink
Merge pull request #317 from riscv-boom/delayed-dispatch
Browse files Browse the repository at this point in the history
Delayed Dispatch
  • Loading branch information
bkorpan committed Jul 18, 2019
2 parents 5b52af8 + 1658de7 commit bbcd292
Show file tree
Hide file tree
Showing 6 changed files with 213 additions and 165 deletions.
139 changes: 71 additions & 68 deletions src/main/scala/exu/core.scala
Original file line number Diff line number Diff line change
Expand Up @@ -378,10 +378,6 @@ class BoomCore(implicit p: Parameters, edge: freechips.rocketchip.tilelink.TLEdg
//-------------------------------------------------------------
// Decoders

// send only 1 RoCC instructions at a time
var dec_rocc_found = if (usingRoCC) exe_units.rocc_unit.io.rocc.rxq_full else false.B
val rocc_shim_busy = if (usingRoCC) !exe_units.rocc_unit.io.rocc.rxq_empty else false.B

// stall fetch/dcode because we ran out of branch tags
val branch_mask_full = Wire(Vec(coreWidth, Bool()))

Expand All @@ -399,25 +395,14 @@ class BoomCore(implicit p: Parameters, edge: freechips.rocketchip.tilelink.TLEdg

// Decode/Rename1 pipeline logic

val dec_prior_slot_valid = dec_valids.scanLeft(false.B) ((s,v) => s || v)
val dec_prior_slot_unique = (dec_uops zip dec_valids).scanLeft(false.B) {case (s,(u,v)) => s || v && u.is_unique}
val wait_for_empty_pipeline = (0 until coreWidth).map(w => (dec_uops(w).is_unique || custom_csrs.disableOOO) &&
(!rob.io.empty || !lsu.io.lsu_fencei_rdy || dec_prior_slot_valid(w)))
val wait_for_rocc = (0 until coreWidth).map(w =>
(dec_uops(w).is_fence || dec_uops(w).is_fencei) && (io.rocc.busy || rocc_shim_busy))

val dec_hazards = (0 until coreWidth).map(w =>
dec_valids(w) &&
( !rob.io.ready
|| !dis_ready
( !dis_ready
|| rob.io.commit.rollback
|| branch_mask_full(w)
|| lsu.io.laq_full(w) && dec_uops(w).is_load
|| lsu.io.stq_full(w) && dec_uops(w).is_store
|| !rename_stage.io.inst_can_proceed(w)
|| wait_for_empty_pipeline(w)
|| wait_for_rocc(w)
|| dec_prior_slot_unique(w)
|| flush_ifu))

val dec_stalls = dec_hazards.scanLeft(false.B) ((s,h) => s || h).takeRight(coreWidth)
dec_fire := (0 until coreWidth).map(w => dec_valids(w) && !dec_stalls(w))

Expand Down Expand Up @@ -447,50 +432,6 @@ class BoomCore(implicit p: Parameters, edge: freechips.rocketchip.tilelink.TLEdg

branch_mask_full := dec_brmask_logic.io.is_full

//-------------------------------------------------------------
// LDQ/STQ Allocation Logic

lsu.io.dec_uops := dec_uops

for (w <- 0 until coreWidth) {
// Dispatching instructions request load/store queue entries when they can proceed.
lsu.io.dec_ld_vals(w) := dec_fire(w) && dec_uops(w).is_load
lsu.io.dec_st_vals(w) := dec_fire(w) && dec_uops(w).is_store

lsu.io.dec_uops(w).rob_idx := dec_uops(w).rob_idx
dec_uops(w).ldq_idx := lsu.io.new_ldq_idx(w)
dec_uops(w).stq_idx := lsu.io.new_stq_idx(w)
}

//-------------------------------------------------------------
// Rob Allocation Logic

rob.io.enq_valids := rename_stage.io.ren1_mask
rob.io.enq_uops := rename_stage.io.ren1_uops
rob.io.enq_partial_stall := dec_stalls.last // TODO come up with better ROB compacting scheme.
rob.io.debug_tsc := debug_tsc_reg
rob.io.csr_stall := csr.io.csr_stall

for (w <- 0 until coreWidth) {
// note: this assumes uops haven't been shifted - there's a 1:1 match between PC's LSBs and "w" here
// (thus the LSB of the rob_idx gives part of the PC)
if (coreWidth == 1) {
dec_uops(w).rob_idx := rob.io.rob_tail_idx
} else {
dec_uops(w).rob_idx := Cat(rob.io.rob_tail_idx >> log2Ceil(coreWidth).U,
w.U(log2Ceil(coreWidth).W))
}
}

//-------------------------------------------------------------
// RoCC allocation logic
if (usingRoCC) {
for (w <- 0 until coreWidth) {
// We guarantee only decoding 1 RoCC instruction per cycle
dec_uops(w).rxq_idx := exe_units.rocc_unit.io.rocc.rxq_idx
}
}

//-------------------------------------------------------------
//-------------------------------------------------------------
// **** Register Rename Stage ****
Expand All @@ -500,7 +441,7 @@ class BoomCore(implicit p: Parameters, edge: freechips.rocketchip.tilelink.TLEdg
rename_stage.io.kill := flush_ifu
rename_stage.io.brinfo := br_unit.brinfo

rename_stage.io.flush_pipeline := rob.io.flush.valid
rename_stage.io.flush := rob.io.flush.valid || io.ifu.sfence_take_pc
rename_stage.io.debug_rob_empty := rob.io.empty

rename_stage.io.dec_fire := dec_fire
Expand All @@ -525,15 +466,77 @@ class BoomCore(implicit p: Parameters, edge: freechips.rocketchip.tilelink.TLEdg

// Rename2/Dispatch pipeline logic

val dis_prior_slot_valid = dis_valids.scanLeft(false.B) ((s,v) => s || v)
val dis_prior_slot_unique = (dis_uops zip dis_valids).scanLeft(false.B) {case (s,(u,v)) => s || v && u.is_unique}
val wait_for_empty_pipeline = (0 until coreWidth).map(w => (dis_uops(w).is_unique || custom_csrs.disableOOO) &&
(!rob.io.empty || !lsu.io.lsu_fencei_rdy || dis_prior_slot_valid(w)))
val rocc_shim_busy = if (usingRoCC) !exe_units.rocc_unit.io.rocc.rxq_empty else false.B
val wait_for_rocc = (0 until coreWidth).map(w =>
(dis_uops(w).is_fence || dis_uops(w).is_fencei) && (io.rocc.busy || rocc_shim_busy))
val rxq_full = if (usingRoCC) exe_units.rocc_unit.io.rocc.rxq_full else false.B
val block_rocc = (dis_uops zip dis_valids).map{case (u,v) => v && u.uopc === uopROCC}.scanLeft(rxq_full)(_||_)
val dis_rocc_alloc_stall = (dis_uops.map(_.uopc === uopROCC) zip block_rocc) map {case (p,r) =>
if (usingRoCC) p && r else false.B}

val dis_hazards = (0 until coreWidth).map(w =>
dis_valids(w) &&
( !dispatcher.io.ren_uops(w).ready
( !rob.io.ready
|| lsu.io.laq_full(w) && dis_uops(w).is_load
|| lsu.io.stq_full(w) && dis_uops(w).is_store
|| !dispatcher.io.ren_uops(w).ready
|| wait_for_empty_pipeline(w)
|| wait_for_rocc(w)
|| dis_prior_slot_unique(w)
|| dis_rocc_alloc_stall(w)
|| flush_ifu))

val dis_stalls = dis_hazards.scanLeft(false.B) ((s,h) => s || h).takeRight(coreWidth)
dis_fire := dis_valids zip dis_stalls map {case (v,s) => v && !s}
dis_ready := !dis_stalls.last

//-------------------------------------------------------------
// LDQ/STQ Allocation Logic

lsu.io.dis_uops := dis_uops

for (w <- 0 until coreWidth) {
// Dispatching instructions request load/store queue entries when they can proceed.
lsu.io.dis_ld_vals(w) := dis_fire(w) && dis_uops(w).is_load
lsu.io.dis_st_vals(w) := dis_fire(w) && dis_uops(w).is_store

dis_uops(w).ldq_idx := lsu.io.new_ldq_idx(w)
dis_uops(w).stq_idx := lsu.io.new_stq_idx(w)
}

//-------------------------------------------------------------
// Rob Allocation Logic

rob.io.enq_valids := dis_fire
rob.io.enq_uops := dis_uops
rob.io.enq_partial_stall := dis_stalls.last // TODO come up with better ROB compacting scheme.
rob.io.debug_tsc := debug_tsc_reg
rob.io.csr_stall := csr.io.csr_stall

for (w <- 0 until coreWidth) {
// note: this assumes uops haven't been shifted - there's a 1:1 match between PC's LSBs and "w" here
// (thus the LSB of the rob_idx gives part of the PC)
if (coreWidth == 1) {
dis_uops(w).rob_idx := rob.io.rob_tail_idx
} else {
dis_uops(w).rob_idx := Cat(rob.io.rob_tail_idx >> log2Ceil(coreWidth).U,
w.U(log2Ceil(coreWidth).W))
}
}

//-------------------------------------------------------------
// RoCC allocation logic
if (usingRoCC) {
for (w <- 0 until coreWidth) {
// We guarantee only decoding 1 RoCC instruction per cycle
dis_uops(w).rxq_idx := exe_units.rocc_unit.io.rocc.rxq_idx
}
}

//-------------------------------------------------------------
// Dispatch to issue queues

Expand Down Expand Up @@ -1273,16 +1276,16 @@ class BoomCore(implicit p: Parameters, edge: freechips.rocketchip.tilelink.TLEdg
io.rocc.exception := csr.io.exception && csr.io.status.xs.orR
if (usingRoCC) {
exe_units.rocc_unit.io.rocc.rocc <> io.rocc
exe_units.rocc_unit.io.rocc.dec_uops := dec_uops
exe_units.rocc_unit.io.rocc.dis_uops := dis_uops
exe_units.rocc_unit.io.rocc.rob_head_idx := rob.io.rob_head_idx
exe_units.rocc_unit.io.rocc.rob_pnr_idx := rob.io.rob_pnr_idx
exe_units.rocc_unit.io.com_exception := rob.io.com_xcpt.valid
exe_units.rocc_unit.io.status := csr.io.status

for (w <- 0 until coreWidth) {
exe_units.rocc_unit.io.rocc.dec_rocc_vals(w) := (
dec_fire(w) &&
dec_uops(w).uopc === uopROCC)
exe_units.rocc_unit.io.rocc.dis_rocc_vals(w) := (
dis_fire(w) &&
dis_uops(w).uopc === uopROCC)
}
}

Expand Down
14 changes: 7 additions & 7 deletions src/main/scala/exu/execution-units/rocc.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// The RoCC shim unit. Similar to the LSU, in that we need to allocate entries
// for instruction bits at decode, and send commands strictly in order.
// for instruction bits at dispatch, and send commands strictly in order.
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------

Expand All @@ -31,8 +31,8 @@ import boom.util._
class RoCCShimCoreIO(implicit p: Parameters) extends BoomBundle
{
// Decode Stage
val dec_rocc_vals = Input(Vec(coreWidth, Bool()))
val dec_uops = Input(Vec(coreWidth, new MicroOp))
val dis_rocc_vals = Input(Vec(coreWidth, Bool()))
val dis_uops = Input(Vec(coreWidth, new MicroOp))
val rxq_full = Output(Bool())
val rxq_empty = Output(Bool())
val rxq_idx = Output(UInt(log2Ceil(numRxqEntries).W))
Expand Down Expand Up @@ -95,8 +95,8 @@ class RoCCShim(implicit p: Parameters) extends BoomModule
val br_mask = WireInit(0.U(maxBrCount.W))

for (w <- 0 until coreWidth) {
when (io.core.dec_rocc_vals(w)
&& io.core.dec_uops(w).uopc === uopROCC) {
when (io.core.dis_rocc_vals(w)
&& io.core.dis_uops(w).uopc === uopROCC) {
enq_val := true.B
rocc_idx := w.U
}
Expand All @@ -106,8 +106,8 @@ class RoCCShim(implicit p: Parameters) extends BoomModule
rxq_val (rxq_tail) := true.B
rxq_op_val (rxq_tail) := false.B
rxq_committed(rxq_tail) := false.B
rxq_uop (rxq_tail) := io.core.dec_uops(rocc_idx)
rxq_inst (rxq_tail) := io.core.dec_uops(rocc_idx).debug_inst
rxq_uop (rxq_tail) := io.core.dis_uops(rocc_idx)
rxq_inst (rxq_tail) := io.core.dis_uops(rocc_idx).debug_inst
rxq_tail := WrapInc(rxq_tail, numRxqEntries)
}

Expand Down
41 changes: 18 additions & 23 deletions src/main/scala/exu/rename/rename-freelist.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,20 @@ class RenameFreeList(

val io = IO(new BoomBundle()(p) {
// Physical register requests.
val reqs = Input(Vec(plWidth, Bool()))
val can_allocate = Output(Vec(plWidth, Bool()))
val alloc_pregs = Output(Vec(plWidth, UInt(pregSz.W)))
val reqs = Input(Vec(plWidth, Bool()))
val alloc_pregs = Output(Vec(plWidth, Valid(UInt(pregSz.W))))

// Pregs returned by the ROB.
// They come from the "stale" field of committed uops during normal operation,
// or the pdst field of uops at the tail during exception rollback.
val rob_uops = Input(Vec(plWidth, new MicroOp))
val com_valids = Input(Vec(plWidth, Bool()))
val rbk_valids = Input(Vec(plWidth, Bool()))
val rollback = Input(Bool())
val dealloc_pregs = Input(Vec(plWidth, Valid(UInt(pregSz.W))))

// Branch info for starting new allocation lists.
val ren_br_tags = Input(Vec(plWidth, Valid(UInt(brTagSz.W))))
val ren_br_tags = Input(Vec(plWidth, Valid(UInt(brTagSz.W))))

// Mispredict info for recovering speculatively allocated registers.
val brinfo = Input(new BrResolutionInfo)
val brinfo = Input(new BrResolutionInfo)

val debug = new Bundle {
val rob_empty = Input(Bool())
val pipeline_empty = Input(Bool())
val freelist = Output(Bits(numPregs.W))
val isprlist = Output(Bits(numPregs.W))
}
Expand All @@ -60,41 +54,42 @@ class RenameFreeList(

// Select pregs from the free list.
val preg_sels = SelectFirstN(free_list, plWidth)
io.can_allocate := preg_sels.map(_.orR)

// Allocations seen by branches in each pipeline slot.
val alloc_masks = (preg_sels zip io.reqs).scanRight(0.U(numPregs.W)) {case ((preg, req), mask) => Mux(req, mask | preg, mask)}
val alloc_masks = (preg_sels zip io.reqs).scanRight(0.U(numPregs.W))
{case ((preg, req), mask) => Mux(req, mask | preg, mask)}

// Pregs returned by the ROB via commit or rollback.
val ret_valids = io.rbk_valids zip io.com_valids map {case (r,c) => r || c}
val ret_pregs = io.rob_uops.map(uop => Mux(io.rollback, uop.pdst, uop.stale_pdst))
val ret_mask = (ret_pregs zip ret_valids).map {case (preg, valid) => UIntToOH(preg)(numPregs-1,0) & Cat(Fill(numPregs-1, valid.asUInt), 0.U(1.W))}.reduce(_|_)
val dealloc_mask = io.dealloc_pregs.map(d =>
UIntToOH(d.bits)(numPregs-1,0) & Fill(numPregs, d.valid.asUInt)).reduce(_|_)

val br_slots = VecInit(io.ren_br_tags.map(tag => tag.valid)).asUInt
// Create branch allocation lists.
for (i <- 0 until maxBrCount) {
val list_req = VecInit(io.ren_br_tags.map(tag => UIntToOH(tag.bits)(i))).asUInt & br_slots
val new_list = list_req.orR
br_alloc_lists(i) := Mux(new_list, Mux1H(list_req, alloc_masks.slice(1, plWidth+1)), br_alloc_lists(i) | alloc_masks(0))
br_alloc_lists(i) := Mux(new_list, Mux1H(list_req, alloc_masks.slice(1, plWidth+1)),
br_alloc_lists(i) | alloc_masks(0))
}

when (io.brinfo.mispredict) {
// Recover pregs allocated past a mispredicted branch.
free_list := free_list | br_alloc_lists(io.brinfo.tag) | ret_mask
free_list := (free_list | br_alloc_lists(io.brinfo.tag) | dealloc_mask) & ~(1.U(numPregs.W))
} .otherwise {
// Update the free list.
free_list := free_list & ~alloc_masks(0) | ret_mask
free_list := (free_list & ~alloc_masks(0) | dealloc_mask) & ~(1.U(numPregs.W))
}

// Encode outputs.
io.alloc_pregs := VecInit(preg_sels.map(s => OHToUInt(s)))
io.alloc_pregs zip preg_sels map {case (p,s) => p.bits := OHToUInt(s)}
io.alloc_pregs zip preg_sels map {case (p,s) => p.valid := s.orR}

io.debug.freelist := free_list
io.debug.isprlist := 0.U // TODO track commit free list.

assert (!(free_list & ret_mask).orR, "[freelist] Returning a free physical register.")
assert (!(free_list & dealloc_mask).orR, "[freelist] Returning a free physical register.")

val numLregs = if(float) 32 else 31
assert (!io.debug.rob_empty || PopCount(free_list) >= (numPregs - numLregs - 1).U,
assert (!io.debug.pipeline_empty || PopCount(free_list) >= (numPregs - numLregs - 1).U,
"[freelist] Leaking physical registers.")
}

0 comments on commit bbcd292

Please sign in to comment.