Skip to content

Commit

Permalink
Merge pull request #361 from riscv-boom/delayed-allocation
Browse files Browse the repository at this point in the history
[ren] Delayed Allocation
  • Loading branch information
bkorpan committed Aug 13, 2019
2 parents d1b24f9 + edc2ded commit 2a61ca7
Show file tree
Hide file tree
Showing 5 changed files with 135 additions and 82 deletions.
3 changes: 1 addition & 2 deletions src/main/scala/exu/core.scala
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,6 @@ class BoomCore(implicit p: Parameters, edge: freechips.rocketchip.tilelink.TLEdg
|| rob.io.commit.rollback
|| dec_xcpt_stall
|| branch_mask_full(w)
|| ren_stalls(w)
|| flush_ifu))

val dec_stalls = dec_hazards.scanLeft(false.B) ((s,h) => s || h).takeRight(coreWidth)
Expand Down Expand Up @@ -483,7 +482,6 @@ class BoomCore(implicit p: Parameters, edge: freechips.rocketchip.tilelink.TLEdg
rename.io.kill := flush_ifu
rename.io.brinfo := br_unit.brinfo

rename.io.flush := rob.io.flush.valid || io.ifu.sfence_take_pc
rename.io.debug_rob_empty := rob.io.empty

rename.io.dec_fire := dec_fire
Expand Down Expand Up @@ -551,6 +549,7 @@ class BoomCore(implicit p: Parameters, edge: freechips.rocketchip.tilelink.TLEdg
val dis_hazards = (0 until coreWidth).map(w =>
dis_valids(w) &&
( !rob.io.ready
|| ren_stalls(w)
|| lsu.io.laq_full(w) && dis_uops(w).is_load
|| lsu.io.stq_full(w) && dis_uops(w).is_store
|| !dispatcher.io.ren_uops(w).ready
Expand Down
21 changes: 11 additions & 10 deletions src/main/scala/exu/rename/rename-busytable.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class RenameBusyTable(
val plWidth: Int,
val numPregs: Int,
val numWbPorts: Int,
val bypass: Boolean,
val float: Boolean)
(implicit p: Parameters) extends BoomModule
{
Expand Down Expand Up @@ -56,17 +57,17 @@ class RenameBusyTable(

// Read the busy table.
for (i <- 0 until plWidth) {
val prs1_was_bypassed = (0 until i).map(j => io.ren_uops(i).lrs1 === io.ren_uops(j).ldst && io.rebusy_reqs(j))
.foldLeft(false.B)(_||_)
val prs2_was_bypassed = (0 until i).map(j => io.ren_uops(i).lrs2 === io.ren_uops(j).ldst && io.rebusy_reqs(j))
.foldLeft(false.B)(_||_)
val prs3_was_bypassed = (0 until i).map(j => io.ren_uops(i).lrs3 === io.ren_uops(j).ldst && io.rebusy_reqs(j))
.foldLeft(false.B)(_||_)
val prs1_was_bypassed = (0 until i).map(j =>
io.ren_uops(i).lrs1 === io.ren_uops(j).ldst && io.rebusy_reqs(j)).foldLeft(false.B)(_||_)
val prs2_was_bypassed = (0 until i).map(j =>
io.ren_uops(i).lrs2 === io.ren_uops(j).ldst && io.rebusy_reqs(j)).foldLeft(false.B)(_||_)
val prs3_was_bypassed = (0 until i).map(j =>
io.ren_uops(i).lrs3 === io.ren_uops(j).ldst && io.rebusy_reqs(j)).foldLeft(false.B)(_||_)

io.busy_resps(i).prs1_busy := busy_table(io.ren_uops(i).prs1) || prs1_was_bypassed
io.busy_resps(i).prs2_busy := busy_table(io.ren_uops(i).prs2) || prs2_was_bypassed
if (float) io.busy_resps(i).prs3_busy := busy_table(io.ren_uops(i).prs3) || prs3_was_bypassed
else io.busy_resps(i).prs3_busy := false.B
io.busy_resps(i).prs1_busy := busy_table(io.ren_uops(i).prs1) || prs1_was_bypassed && bypass.B
io.busy_resps(i).prs2_busy := busy_table(io.ren_uops(i).prs2) || prs2_was_bypassed && bypass.B
io.busy_resps(i).prs3_busy := busy_table(io.ren_uops(i).prs3) || prs3_was_bypassed && bypass.B
if (!float) io.busy_resps(i).prs3_busy := false.B
}

io.debug.busytable := busy_table
Expand Down
49 changes: 27 additions & 22 deletions src/main/scala/exu/rename/rename-freelist.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class RenameFreeList(
(implicit p: Parameters) extends BoomModule
{
private val pregSz = log2Ceil(numPregs)
private val n = numPregs

val io = IO(new BoomBundle()(p) {
// Physical register requests.
Expand All @@ -45,49 +46,53 @@ class RenameFreeList(
val isprlist = Output(Bits(numPregs.W))
}
})

// The free list register array and its branch allocation lists.
val free_list = RegInit(UInt(numPregs.W), ~(1.U(numPregs.W)))
val br_alloc_lists = Reg(Vec(maxBrCount, UInt(numPregs.W)))

// Select pregs from the free list.
val preg_sels = SelectFirstN(free_list, plWidth)
val sels = SelectFirstN(free_list, plWidth)
val sel_fire = Wire(Vec(plWidth, Bool()))

// Allocations seen by branches in each pipeline slot.
val alloc_masks = (preg_sels zip io.reqs).scanRight(0.U(numPregs.W))
{case ((preg, req), mask) => Mux(req, mask | preg, mask)}
val allocs = io.alloc_pregs map (a => UIntToOH(a.bits))
val alloc_masks = (allocs zip io.reqs).scanRight(0.U(n.W)) { case ((a,r),m) => m | a & Fill(n,r) }

// Pregs returned by the ROB via commit or rollback.
val dealloc_mask = io.dealloc_pregs.map(d =>
UIntToOH(d.bits)(numPregs-1,0) & Fill(numPregs, d.valid.asUInt)).reduce(_|_)
// Masks that modify the freelist array.
val sel_mask = (sels zip sel_fire) map { case (s,f) => s & Fill(n,f) } reduce(_|_)
val br_deallocs = br_alloc_lists(io.brinfo.tag) & Fill(n, io.brinfo.mispredict)
val dealloc_mask = io.dealloc_pregs.map(d => UIntToOH(d.bits)(numPregs-1,0) & Fill(n,d.valid)).reduce(_|_) | br_deallocs

val br_slots = VecInit(io.ren_br_tags.map(tag => tag.valid)).asUInt
// Create branch allocation lists.
for (i <- 0 until maxBrCount) {
val list_req = VecInit(io.ren_br_tags.map(tag => UIntToOH(tag.bits)(i))).asUInt & br_slots
val new_list = list_req.orR
br_alloc_lists(i) := Mux(new_list, Mux1H(list_req, alloc_masks.slice(1, plWidth+1)),
br_alloc_lists(i) | alloc_masks(0))
br_alloc_lists(i) & ~br_deallocs | alloc_masks(0))
}

when (io.brinfo.mispredict) {
// Recover pregs allocated past a mispredicted branch.
free_list := (free_list | br_alloc_lists(io.brinfo.tag) | dealloc_mask) & ~(1.U(numPregs.W))
} .otherwise {
// Update the free list.
free_list := (free_list & ~alloc_masks(0) | dealloc_mask) & ~(1.U(numPregs.W))
}
// Update the free list.
free_list := (free_list & ~sel_mask | dealloc_mask) & ~(1.U(numPregs.W))

// Encode outputs.
io.alloc_pregs zip preg_sels map {case (p,s) => p.bits := OHToUInt(s)}
io.alloc_pregs zip preg_sels map {case (p,s) => p.valid := s.orR}
// Pipeline logic | hookup outputs.
for (w <- 0 until plWidth) {
val can_sel = sels(w).orR
val r_valid = RegInit(false.B)
val r_sel = RegEnable(OHToUInt(sels(w)), sel_fire(w))

io.debug.freelist := free_list
io.debug.isprlist := 0.U // TODO track commit free list.
r_valid := r_valid && !io.reqs(w) || can_sel
sel_fire(w) := (!r_valid || io.reqs(w)) && can_sel

assert (!(free_list & dealloc_mask).orR, "[freelist] Returning a free physical register.")
io.alloc_pregs(w).bits := r_sel
io.alloc_pregs(w).valid := r_valid
}

io.debug.freelist := free_list | io.alloc_pregs.map(p => UIntToOH(p.bits) & Fill(n,p.valid)).reduce(_|_)
io.debug.isprlist := 0.U // TODO track commit free list.

val numLregs = if(float) 32 else 31
assert (!io.debug.pipeline_empty || PopCount(free_list) >= (numPregs - numLregs - 1).U,
assert (!(io.debug.freelist & dealloc_mask).orR, "[freelist] Returning a free physical register.")
assert (!io.debug.pipeline_empty || PopCount(io.debug.freelist) >= (numPregs - numLregs - 1).U,
"[freelist] Leaking physical registers.")
}
9 changes: 5 additions & 4 deletions src/main/scala/exu/rename/rename-maptable.scala
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class RenameMapTable(
val plWidth: Int,
val numLregs: Int,
val numPregs: Int,
val bypass: Boolean,
val float: Boolean)
(implicit p: Parameters) extends BoomModule
{
Expand Down Expand Up @@ -110,13 +111,13 @@ class RenameMapTable(
// Read out mappings.
for (i <- 0 until plWidth) {
io.map_resps(i).prs1 := (0 until i).foldLeft(map_table(io.map_reqs(i).lrs1)) ((p,k) =>
Mux(io.remap_reqs(k).valid && io.remap_reqs(k).ldst === io.map_reqs(i).lrs1, io.remap_reqs(k).pdst, p))
Mux(bypass.B && io.remap_reqs(k).valid && io.remap_reqs(k).ldst === io.map_reqs(i).lrs1, io.remap_reqs(k).pdst, p))
io.map_resps(i).prs2 := (0 until i).foldLeft(map_table(io.map_reqs(i).lrs2)) ((p,k) =>
Mux(io.remap_reqs(k).valid && io.remap_reqs(k).ldst === io.map_reqs(i).lrs2, io.remap_reqs(k).pdst, p))
Mux(bypass.B && io.remap_reqs(k).valid && io.remap_reqs(k).ldst === io.map_reqs(i).lrs2, io.remap_reqs(k).pdst, p))
io.map_resps(i).prs3 := (0 until i).foldLeft(map_table(io.map_reqs(i).lrs3)) ((p,k) =>
Mux(io.remap_reqs(k).valid && io.remap_reqs(k).ldst === io.map_reqs(i).lrs3, io.remap_reqs(k).pdst, p))
Mux(bypass.B && io.remap_reqs(k).valid && io.remap_reqs(k).ldst === io.map_reqs(i).lrs3, io.remap_reqs(k).pdst, p))
io.map_resps(i).stale_pdst := (0 until i).foldLeft(map_table(io.map_reqs(i).ldst)) ((p,k) =>
Mux(io.remap_reqs(k).valid && io.remap_reqs(k).ldst === io.map_reqs(i).ldst, io.remap_reqs(k).pdst, p))
Mux(bypass.B && io.remap_reqs(k).valid && io.remap_reqs(k).ldst === io.map_reqs(i).ldst, io.remap_reqs(k).pdst, p))

if (!float) io.map_resps(i).prs3 := DontCare
}
Expand Down

0 comments on commit 2a61ca7

Please sign in to comment.