Skip to content

Commit

Permalink
[dcache] Fork NBDCache, re-enable PMPs
Browse files Browse the repository at this point in the history
  • Loading branch information
jerryz123 committed Dec 23, 2018
1 parent 21021c3 commit efe7eb4
Show file tree
Hide file tree
Showing 3 changed files with 349 additions and 2 deletions.
2 changes: 1 addition & 1 deletion src/main/scala/common/parameters.scala
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ case class BoomCoreParams(
val lrscCycles: Int = 80 // worst case is 14 mispredicted branches + slop

val jumpInFrontend: Boolean = false // unused in boom
val nPMPs: Int = 0
val nPMPs: Int = 8
}

trait HasBoomCoreParameters extends freechips.rocketchip.tile.HasCoreParameters
Expand Down
347 changes: 347 additions & 0 deletions src/main/scala/lsu/dcache.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,347 @@
// See LICENSE.Berkeley for license details.
// See LICENSE.SiFive for license details.

package boom.lsu

import Chisel._
import Chisel.ImplicitConversions._
import freechips.rocketchip.config.Parameters
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.util._
import freechips.rocketchip.rocket._

class BoomNonBlockingDCache(hartid: Int)(implicit p: Parameters) extends HellaCache(hartid)(p) {
override lazy val module = new BoomNonBlockingDCacheModule(this)
}

class BoomNonBlockingDCacheModule(outer: BoomNonBlockingDCache) extends HellaCacheModule(outer) {

require(isPow2(nWays)) // TODO: relax this
require(dataScratchpadSize == 0)
require(!usingVM || untagBits <= pgIdxBits, s"untagBits($untagBits) > pgIdxBits($pgIdxBits)")

// ECC is only supported on the data array
require(cacheParams.tagCode.isInstanceOf[IdentityCode])
val dECC = cacheParams.dataCode

val wb = Module(new WritebackUnit)
val prober = Module(new ProbeUnit)
val mshrs = Module(new MSHRFile)

io.cpu.req.ready := Bool(true)
val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false))
val s1_req = Reg(io.cpu.req.bits)
val s1_valid_masked = s1_valid && !io.cpu.s1_kill
val s1_replay = Reg(init=Bool(false))
val s1_clk_en = Reg(Bool())
val s1_sfence = s1_req.cmd === M_SFENCE

val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false)) && !io.cpu.s2_xcpt.asUInt.orR
val s2_req = Reg(io.cpu.req.bits)
val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_FLUSH_ALL
val s2_recycle = Wire(Bool())
val s2_valid_masked = Wire(Bool())

val s3_valid = Reg(init=Bool(false))
val s3_req = Reg(io.cpu.req.bits)
val s3_way = Reg(Bits())

val s1_recycled = RegEnable(s2_recycle, Bool(false), s1_clk_en)
val s1_read = isRead(s1_req.cmd)
val s1_write = isWrite(s1_req.cmd)
val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd)
// check for unsupported operations
assert(!s1_valid || !s1_req.cmd.isOneOf(M_PWR))

val dtlb = Module(new TLB(false, log2Ceil(coreDataBytes), TLBConfig(nTLBEntries)))
io.ptw <> dtlb.io.ptw
dtlb.io.kill := io.cpu.s2_kill
dtlb.io.req.valid := s1_valid && !io.cpu.s1_kill && s1_readwrite
dtlb.io.req.bits.passthrough := s1_req.phys
dtlb.io.req.bits.vaddr := s1_req.addr
dtlb.io.req.bits.size := s1_req.typ
dtlb.io.req.bits.cmd := s1_req.cmd
when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) }

dtlb.io.sfence.valid := s1_valid && !io.cpu.s1_kill && s1_sfence
dtlb.io.sfence.bits.rs1 := s1_req.typ(0)
dtlb.io.sfence.bits.rs2 := s1_req.typ(1)
dtlb.io.sfence.bits.addr := s1_req.addr
dtlb.io.sfence.bits.asid := io.cpu.s1_data.data


when (io.cpu.req.valid) {
s1_req := io.cpu.req.bits
}
when (wb.io.meta_read.valid) {
s1_req.addr := Cat(wb.io.meta_read.bits.tag, wb.io.meta_read.bits.idx) << blockOffBits
s1_req.phys := Bool(true)
}
when (prober.io.meta_read.valid) {
s1_req.addr := Cat(prober.io.meta_read.bits.tag, prober.io.meta_read.bits.idx) << blockOffBits
s1_req.phys := Bool(true)
}
when (mshrs.io.replay.valid) {
s1_req := mshrs.io.replay.bits
}
when (s2_recycle) {
s1_req := s2_req
}
val s1_addr = s1_req.addr
when (s1_clk_en) {
s2_req.typ := s1_req.typ
s2_req.phys := s1_req.phys
s2_req.addr := s1_addr
when (s1_write) {
s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data.data)
}
when (s1_recycled) { s2_req.data := s1_req.data }
s2_req.tag := s1_req.tag
s2_req.cmd := s1_req.cmd
}

// tags
def onReset = L1Metadata(UInt(0), ClientMetadata.onReset)
val meta = Module(new L1MetadataArray(onReset _))
val metaReadArb = Module(new Arbiter(new L1MetaReadReq, 5))
val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 2))
meta.io.read <> metaReadArb.io.out
meta.io.write <> metaWriteArb.io.out

// data
val data = Module(new DataArray)
val readArb = Module(new Arbiter(new L1DataReadReq, 4))
val writeArb = Module(new Arbiter(new L1DataWriteReq, 2))
data.io.write.valid := writeArb.io.out.valid
writeArb.io.out.ready := data.io.write.ready
data.io.write.bits := writeArb.io.out.bits
val wdata_encoded = (0 until rowWords).map(i => dECC.encode(writeArb.io.out.bits.data(coreDataBits*(i+1)-1,coreDataBits*i)))
data.io.write.bits.data := wdata_encoded.asUInt

// tag read for new requests
metaReadArb.io.in(4).valid := io.cpu.req.valid
metaReadArb.io.in(4).bits.idx := io.cpu.req.bits.addr >> blockOffBits
when (!metaReadArb.io.in(4).ready) { io.cpu.req.ready := Bool(false) }

// data read for new requests
readArb.io.in(3).valid := io.cpu.req.valid
readArb.io.in(3).bits.addr := io.cpu.req.bits.addr
readArb.io.in(3).bits.way_en := ~UInt(0, nWays)
when (!readArb.io.in(3).ready) { io.cpu.req.ready := Bool(false) }

// recycled requests
metaReadArb.io.in(0).valid := s2_recycle
metaReadArb.io.in(0).bits.idx := s2_req.addr >> blockOffBits
readArb.io.in(0).valid := s2_recycle
readArb.io.in(0).bits.addr := s2_req.addr
readArb.io.in(0).bits.way_en := ~UInt(0, nWays)

// tag check and way muxing
def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f))
val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> untagBits)).asUInt
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta.io.resp(w).coh.isValid()).asUInt
s1_clk_en := metaReadArb.io.out.valid //TODO: should be metaReadArb.io.out.fire(), but triggers Verilog backend bug
val s1_writeback = s1_clk_en && !s1_valid && !s1_replay
val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en)
val s2_tag_match = s2_tag_match_way.orR
val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en)))
val (s2_has_permission, _, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd)
val s2_hit = s2_tag_match && s2_has_permission && s2_hit_state === s2_new_hit_state

// load-reserved/store-conditional
val lrsc_count = Reg(init=UInt(0))
val lrsc_valid = lrsc_count > lrscBackoff
val lrsc_addr = Reg(UInt())
val (s2_lr, s2_sc) = (s2_req.cmd === M_XLR, s2_req.cmd === M_XSC)
val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === (s2_req.addr >> blockOffBits)
val s2_sc_fail = s2_sc && !s2_lrsc_addr_match
when (lrsc_count > 0) { lrsc_count := lrsc_count - 1 }
when (s2_valid_masked && s2_hit || s2_replay) {
when (s2_lr) {
lrsc_count := lrscCycles - 1
lrsc_addr := s2_req.addr >> blockOffBits
}
when (lrsc_count > 0) {
lrsc_count := 0
}
}

val s2_data = Wire(Vec(nWays, Bits(width=encRowBits)))
for (w <- 0 until nWays) {
val regs = Reg(Vec(rowWords, Bits(width = encDataBits)))
val en1 = s1_clk_en && s1_tag_eq_way(w)
for (i <- 0 until regs.size) {
val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback)
when (en) { regs(i) := data.io.resp(w) >> encDataBits*i }
}
s2_data(w) := regs.asUInt
}
val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data)
val s2_data_decoded = (0 until rowWords).map(i => dECC.decode(s2_data_muxed(encDataBits*(i+1)-1,encDataBits*i)))
val s2_data_corrected = s2_data_decoded.map(_.corrected).asUInt
val s2_data_uncorrected = s2_data_decoded.map(_.uncorrected).asUInt
val s2_word_idx = if(doNarrowRead) UInt(0) else s2_req.addr(log2Up(rowWords*coreDataBytes)-1,log2Up(wordBytes))
val s2_data_correctable = s2_data_decoded.map(_.correctable).asUInt()(s2_word_idx)

// store/amo hits
s3_valid := (s2_valid_masked && s2_hit || s2_replay) && !s2_sc_fail && isWrite(s2_req.cmd)
val amoalu = Module(new AMOALU(xLen))
when ((s2_valid || s2_replay) && (isWrite(s2_req.cmd) || s2_data_correctable)) {
s3_req := s2_req
s3_req.data := Mux(s2_data_correctable, s2_data_corrected, amoalu.io.out)
s3_way := s2_tag_match_way
}

writeArb.io.in(0).bits.addr := s3_req.addr
writeArb.io.in(0).bits.wmask := UIntToOH(s3_req.addr.extract(rowOffBits-1,offsetlsb))
writeArb.io.in(0).bits.data := Fill(rowWords, s3_req.data)
writeArb.io.in(0).valid := s3_valid
writeArb.io.in(0).bits.way_en := s3_way

// replacement policy
val replacer = cacheParams.replacement
val s1_replaced_way_en = UIntToOH(replacer.way)
val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en))
val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq)

// miss handling
mshrs.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd))
mshrs.io.req.bits := s2_req
mshrs.io.req.bits.tag_match := s2_tag_match
mshrs.io.req.bits.old_meta := Mux(s2_tag_match, L1Metadata(s2_repl_meta.tag, s2_hit_state), s2_repl_meta)
mshrs.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en)
mshrs.io.req.bits.data := s2_req.data
when (mshrs.io.req.fire()) { replacer.miss }
tl_out.a <> mshrs.io.mem_acquire

// replays
readArb.io.in(1).valid := mshrs.io.replay.valid
readArb.io.in(1).bits := mshrs.io.replay.bits
readArb.io.in(1).bits.way_en := ~UInt(0, nWays)
mshrs.io.replay.ready := readArb.io.in(1).ready
s1_replay := mshrs.io.replay.valid && readArb.io.in(1).ready
metaReadArb.io.in(1) <> mshrs.io.meta_read
metaWriteArb.io.in(0) <> mshrs.io.meta_write

// probes and releases
prober.io.req.valid := tl_out.b.valid && !lrsc_valid
tl_out.b.ready := prober.io.req.ready && !lrsc_valid
prober.io.req.bits := tl_out.b.bits
prober.io.way_en := s2_tag_match_way
prober.io.block_state := s2_hit_state
metaReadArb.io.in(2) <> prober.io.meta_read
metaWriteArb.io.in(1) <> prober.io.meta_write
prober.io.mshr_rdy := mshrs.io.probe_rdy

// refills
val grant_has_data = edge.hasData(tl_out.d.bits)
mshrs.io.mem_grant.valid := tl_out.d.fire()
mshrs.io.mem_grant.bits := tl_out.d.bits
tl_out.d.ready := writeArb.io.in(1).ready || !grant_has_data
/* The last clause here is necessary in order to prevent the responses for
* the IOMSHRs from being written into the data array. It works because the
* IOMSHR ids start right the ones for the regular MSHRs. */
writeArb.io.in(1).valid := tl_out.d.valid && grant_has_data &&
tl_out.d.bits.source < UInt(cfg.nMSHRs)
writeArb.io.in(1).bits.addr := mshrs.io.refill.addr
writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en
writeArb.io.in(1).bits.wmask := ~UInt(0, rowWords)
writeArb.io.in(1).bits.data := tl_out.d.bits.data(encRowBits-1,0)
data.io.read <> readArb.io.out
readArb.io.out.ready := !tl_out.d.valid || tl_out.d.ready // insert bubble if refill gets blocked
tl_out.e <> mshrs.io.mem_finish

// writebacks
val wbArb = Module(new Arbiter(new WritebackReq(edge.bundle), 2))
wbArb.io.in(0) <> prober.io.wb_req
wbArb.io.in(1) <> mshrs.io.wb_req
wb.io.req <> wbArb.io.out
metaReadArb.io.in(3) <> wb.io.meta_read
readArb.io.in(2) <> wb.io.data_req
wb.io.data_resp := s2_data_corrected
TLArbiter.lowest(edge, tl_out.c, wb.io.release, prober.io.rep)

// store->load bypassing
val s4_valid = Reg(next=s3_valid, init=Bool(false))
val s4_req = RegEnable(s3_req, s3_valid && metaReadArb.io.out.valid)
val bypasses = List(
((s2_valid_masked || s2_replay) && !s2_sc_fail, s2_req, amoalu.io.out),
(s3_valid, s3_req, s3_req.data),
(s4_valid, s4_req, s4_req.data)
).map(r => (r._1 && (s1_addr >> wordOffBits === r._2.addr >> wordOffBits) && isWrite(r._2.cmd), r._3))
val s2_store_bypass_data = Reg(Bits(width = coreDataBits))
val s2_store_bypass = Reg(Bool())
when (s1_clk_en) {
s2_store_bypass := false
when (bypasses.map(_._1).reduce(_||_)) {
s2_store_bypass_data := PriorityMux(bypasses)
s2_store_bypass := true
}
}

// load data subword mux/sign extension
val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits)))
val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass)
val loadgen = new LoadGen(s2_req.typ, mtSigned(s2_req.typ), s2_req.addr, s2_data_word, s2_sc, wordBytes)

amoalu.io.mask := new StoreGen(s2_req.typ, s2_req.addr, 0.U, xLen/8).mask
amoalu.io.cmd := s2_req.cmd
amoalu.io.lhs := s2_data_word
amoalu.io.rhs := s2_req.data

// nack it like it's hot
val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss ||
s1_req.addr(idxMSB,idxLSB) === prober.io.meta_write.bits.idx && !prober.io.req.ready
val s2_nack_hit = RegEnable(s1_nack, s1_valid || s1_replay)
when (s2_nack_hit) { mshrs.io.req.valid := Bool(false) }
val s2_nack_victim = s2_hit && mshrs.io.secondary_miss
val s2_nack_miss = !s2_hit && !mshrs.io.req.ready
val s2_nack = s2_nack_hit || s2_nack_victim || s2_nack_miss
s2_valid_masked := s2_valid && !s2_nack && !io.cpu.s2_kill

val s2_recycle_ecc = (s2_valid || s2_replay) && s2_hit && s2_data_correctable
val s2_recycle_next = Reg(init=Bool(false))
when (s1_valid || s1_replay) { s2_recycle_next := s2_recycle_ecc }
s2_recycle := s2_recycle_ecc || s2_recycle_next

// after a nack, block until nack condition resolves to save energy
val block_miss = Reg(init=Bool(false))
block_miss := (s2_valid || block_miss) && s2_nack_miss
when (block_miss) {
io.cpu.req.ready := Bool(false)
}

val cache_resp = Wire(Valid(new HellaCacheResp))
cache_resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable
cache_resp.bits := s2_req
cache_resp.bits.has_data := isRead(s2_req.cmd)
cache_resp.bits.data := loadgen.data | s2_sc_fail
cache_resp.bits.store_data := s2_req.data
cache_resp.bits.replay := s2_replay

val uncache_resp = Wire(Valid(new HellaCacheResp))
uncache_resp.bits := mshrs.io.resp.bits
uncache_resp.valid := mshrs.io.resp.valid
mshrs.io.resp.ready := Reg(next= !(s1_valid || s1_replay))

io.cpu.s2_nack := s2_valid && s2_nack
io.cpu.resp := Mux(mshrs.io.resp.ready, uncache_resp, cache_resp)
io.cpu.resp.bits.data_word_bypass := loadgen.wordData
io.cpu.resp.bits.data_raw := s2_data_word
io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid
io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next


// Tie off the s2_xcpt signal, since exceptions generated
// here are due to TLB falsely setting requests to S level
io.cpu.s2_xcpt := new TLBResp().fromBits(0.U)

// performance events
io.cpu.perf.acquire := edge.done(tl_out.a)
io.cpu.perf.release := edge.done(tl_out.c)
io.cpu.perf.tlbMiss := io.ptw.req.fire()

// no clock-gating support
io.cpu.clock_enabled := true
}
2 changes: 1 addition & 1 deletion src/main/scala/lsu/types.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ trait HasBoomHellaCache { this: BaseTile =>
val dcache: HellaCache = LazyModule(
if(tileParams.dcache.get.nMSHRs == 0) {
new DCache(hartId, findScratchpadFromICache _, p(RocketCrossingKey).head.knownRatio)
} else { new NonBlockingDCache(hartId) })
} else { new BoomNonBlockingDCache(hartId) })

//tlMasterXbar.node := dcache.node
val dCacheTap = TLIdentityNode()
Expand Down

0 comments on commit efe7eb4

Please sign in to comment.