-
Notifications
You must be signed in to change notification settings - Fork 406
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[dcache] Fork NBDCache, re-enable PMPs
- Loading branch information
Showing
3 changed files
with
349 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,347 @@ | ||
// See LICENSE.Berkeley for license details. | ||
// See LICENSE.SiFive for license details. | ||
|
||
package boom.lsu | ||
|
||
import Chisel._ | ||
import Chisel.ImplicitConversions._ | ||
import freechips.rocketchip.config.Parameters | ||
import freechips.rocketchip.diplomacy._ | ||
import freechips.rocketchip.tilelink._ | ||
import freechips.rocketchip.util._ | ||
import freechips.rocketchip.rocket._ | ||
|
||
class BoomNonBlockingDCache(hartid: Int)(implicit p: Parameters) extends HellaCache(hartid)(p) { | ||
override lazy val module = new BoomNonBlockingDCacheModule(this) | ||
} | ||
|
||
class BoomNonBlockingDCacheModule(outer: BoomNonBlockingDCache) extends HellaCacheModule(outer) { | ||
|
||
require(isPow2(nWays)) // TODO: relax this | ||
require(dataScratchpadSize == 0) | ||
require(!usingVM || untagBits <= pgIdxBits, s"untagBits($untagBits) > pgIdxBits($pgIdxBits)") | ||
|
||
// ECC is only supported on the data array | ||
require(cacheParams.tagCode.isInstanceOf[IdentityCode]) | ||
val dECC = cacheParams.dataCode | ||
|
||
val wb = Module(new WritebackUnit) | ||
val prober = Module(new ProbeUnit) | ||
val mshrs = Module(new MSHRFile) | ||
|
||
io.cpu.req.ready := Bool(true) | ||
val s1_valid = Reg(next=io.cpu.req.fire(), init=Bool(false)) | ||
val s1_req = Reg(io.cpu.req.bits) | ||
val s1_valid_masked = s1_valid && !io.cpu.s1_kill | ||
val s1_replay = Reg(init=Bool(false)) | ||
val s1_clk_en = Reg(Bool()) | ||
val s1_sfence = s1_req.cmd === M_SFENCE | ||
|
||
val s2_valid = Reg(next=s1_valid_masked && !s1_sfence, init=Bool(false)) && !io.cpu.s2_xcpt.asUInt.orR | ||
val s2_req = Reg(io.cpu.req.bits) | ||
val s2_replay = Reg(next=s1_replay, init=Bool(false)) && s2_req.cmd =/= M_FLUSH_ALL | ||
val s2_recycle = Wire(Bool()) | ||
val s2_valid_masked = Wire(Bool()) | ||
|
||
val s3_valid = Reg(init=Bool(false)) | ||
val s3_req = Reg(io.cpu.req.bits) | ||
val s3_way = Reg(Bits()) | ||
|
||
val s1_recycled = RegEnable(s2_recycle, Bool(false), s1_clk_en) | ||
val s1_read = isRead(s1_req.cmd) | ||
val s1_write = isWrite(s1_req.cmd) | ||
val s1_readwrite = s1_read || s1_write || isPrefetch(s1_req.cmd) | ||
// check for unsupported operations | ||
assert(!s1_valid || !s1_req.cmd.isOneOf(M_PWR)) | ||
|
||
val dtlb = Module(new TLB(false, log2Ceil(coreDataBytes), TLBConfig(nTLBEntries))) | ||
io.ptw <> dtlb.io.ptw | ||
dtlb.io.kill := io.cpu.s2_kill | ||
dtlb.io.req.valid := s1_valid && !io.cpu.s1_kill && s1_readwrite | ||
dtlb.io.req.bits.passthrough := s1_req.phys | ||
dtlb.io.req.bits.vaddr := s1_req.addr | ||
dtlb.io.req.bits.size := s1_req.typ | ||
dtlb.io.req.bits.cmd := s1_req.cmd | ||
when (!dtlb.io.req.ready && !io.cpu.req.bits.phys) { io.cpu.req.ready := Bool(false) } | ||
|
||
dtlb.io.sfence.valid := s1_valid && !io.cpu.s1_kill && s1_sfence | ||
dtlb.io.sfence.bits.rs1 := s1_req.typ(0) | ||
dtlb.io.sfence.bits.rs2 := s1_req.typ(1) | ||
dtlb.io.sfence.bits.addr := s1_req.addr | ||
dtlb.io.sfence.bits.asid := io.cpu.s1_data.data | ||
|
||
|
||
when (io.cpu.req.valid) { | ||
s1_req := io.cpu.req.bits | ||
} | ||
when (wb.io.meta_read.valid) { | ||
s1_req.addr := Cat(wb.io.meta_read.bits.tag, wb.io.meta_read.bits.idx) << blockOffBits | ||
s1_req.phys := Bool(true) | ||
} | ||
when (prober.io.meta_read.valid) { | ||
s1_req.addr := Cat(prober.io.meta_read.bits.tag, prober.io.meta_read.bits.idx) << blockOffBits | ||
s1_req.phys := Bool(true) | ||
} | ||
when (mshrs.io.replay.valid) { | ||
s1_req := mshrs.io.replay.bits | ||
} | ||
when (s2_recycle) { | ||
s1_req := s2_req | ||
} | ||
val s1_addr = s1_req.addr | ||
when (s1_clk_en) { | ||
s2_req.typ := s1_req.typ | ||
s2_req.phys := s1_req.phys | ||
s2_req.addr := s1_addr | ||
when (s1_write) { | ||
s2_req.data := Mux(s1_replay, mshrs.io.replay.bits.data, io.cpu.s1_data.data) | ||
} | ||
when (s1_recycled) { s2_req.data := s1_req.data } | ||
s2_req.tag := s1_req.tag | ||
s2_req.cmd := s1_req.cmd | ||
} | ||
|
||
// tags | ||
def onReset = L1Metadata(UInt(0), ClientMetadata.onReset) | ||
val meta = Module(new L1MetadataArray(onReset _)) | ||
val metaReadArb = Module(new Arbiter(new L1MetaReadReq, 5)) | ||
val metaWriteArb = Module(new Arbiter(new L1MetaWriteReq, 2)) | ||
meta.io.read <> metaReadArb.io.out | ||
meta.io.write <> metaWriteArb.io.out | ||
|
||
// data | ||
val data = Module(new DataArray) | ||
val readArb = Module(new Arbiter(new L1DataReadReq, 4)) | ||
val writeArb = Module(new Arbiter(new L1DataWriteReq, 2)) | ||
data.io.write.valid := writeArb.io.out.valid | ||
writeArb.io.out.ready := data.io.write.ready | ||
data.io.write.bits := writeArb.io.out.bits | ||
val wdata_encoded = (0 until rowWords).map(i => dECC.encode(writeArb.io.out.bits.data(coreDataBits*(i+1)-1,coreDataBits*i))) | ||
data.io.write.bits.data := wdata_encoded.asUInt | ||
|
||
// tag read for new requests | ||
metaReadArb.io.in(4).valid := io.cpu.req.valid | ||
metaReadArb.io.in(4).bits.idx := io.cpu.req.bits.addr >> blockOffBits | ||
when (!metaReadArb.io.in(4).ready) { io.cpu.req.ready := Bool(false) } | ||
|
||
// data read for new requests | ||
readArb.io.in(3).valid := io.cpu.req.valid | ||
readArb.io.in(3).bits.addr := io.cpu.req.bits.addr | ||
readArb.io.in(3).bits.way_en := ~UInt(0, nWays) | ||
when (!readArb.io.in(3).ready) { io.cpu.req.ready := Bool(false) } | ||
|
||
// recycled requests | ||
metaReadArb.io.in(0).valid := s2_recycle | ||
metaReadArb.io.in(0).bits.idx := s2_req.addr >> blockOffBits | ||
readArb.io.in(0).valid := s2_recycle | ||
readArb.io.in(0).bits.addr := s2_req.addr | ||
readArb.io.in(0).bits.way_en := ~UInt(0, nWays) | ||
|
||
// tag check and way muxing | ||
def wayMap[T <: Data](f: Int => T) = Vec((0 until nWays).map(f)) | ||
val s1_tag_eq_way = wayMap((w: Int) => meta.io.resp(w).tag === (s1_addr >> untagBits)).asUInt | ||
val s1_tag_match_way = wayMap((w: Int) => s1_tag_eq_way(w) && meta.io.resp(w).coh.isValid()).asUInt | ||
s1_clk_en := metaReadArb.io.out.valid //TODO: should be metaReadArb.io.out.fire(), but triggers Verilog backend bug | ||
val s1_writeback = s1_clk_en && !s1_valid && !s1_replay | ||
val s2_tag_match_way = RegEnable(s1_tag_match_way, s1_clk_en) | ||
val s2_tag_match = s2_tag_match_way.orR | ||
val s2_hit_state = Mux1H(s2_tag_match_way, wayMap((w: Int) => RegEnable(meta.io.resp(w).coh, s1_clk_en))) | ||
val (s2_has_permission, _, s2_new_hit_state) = s2_hit_state.onAccess(s2_req.cmd) | ||
val s2_hit = s2_tag_match && s2_has_permission && s2_hit_state === s2_new_hit_state | ||
|
||
// load-reserved/store-conditional | ||
val lrsc_count = Reg(init=UInt(0)) | ||
val lrsc_valid = lrsc_count > lrscBackoff | ||
val lrsc_addr = Reg(UInt()) | ||
val (s2_lr, s2_sc) = (s2_req.cmd === M_XLR, s2_req.cmd === M_XSC) | ||
val s2_lrsc_addr_match = lrsc_valid && lrsc_addr === (s2_req.addr >> blockOffBits) | ||
val s2_sc_fail = s2_sc && !s2_lrsc_addr_match | ||
when (lrsc_count > 0) { lrsc_count := lrsc_count - 1 } | ||
when (s2_valid_masked && s2_hit || s2_replay) { | ||
when (s2_lr) { | ||
lrsc_count := lrscCycles - 1 | ||
lrsc_addr := s2_req.addr >> blockOffBits | ||
} | ||
when (lrsc_count > 0) { | ||
lrsc_count := 0 | ||
} | ||
} | ||
|
||
val s2_data = Wire(Vec(nWays, Bits(width=encRowBits))) | ||
for (w <- 0 until nWays) { | ||
val regs = Reg(Vec(rowWords, Bits(width = encDataBits))) | ||
val en1 = s1_clk_en && s1_tag_eq_way(w) | ||
for (i <- 0 until regs.size) { | ||
val en = en1 && ((Bool(i == 0) || !Bool(doNarrowRead)) || s1_writeback) | ||
when (en) { regs(i) := data.io.resp(w) >> encDataBits*i } | ||
} | ||
s2_data(w) := regs.asUInt | ||
} | ||
val s2_data_muxed = Mux1H(s2_tag_match_way, s2_data) | ||
val s2_data_decoded = (0 until rowWords).map(i => dECC.decode(s2_data_muxed(encDataBits*(i+1)-1,encDataBits*i))) | ||
val s2_data_corrected = s2_data_decoded.map(_.corrected).asUInt | ||
val s2_data_uncorrected = s2_data_decoded.map(_.uncorrected).asUInt | ||
val s2_word_idx = if(doNarrowRead) UInt(0) else s2_req.addr(log2Up(rowWords*coreDataBytes)-1,log2Up(wordBytes)) | ||
val s2_data_correctable = s2_data_decoded.map(_.correctable).asUInt()(s2_word_idx) | ||
|
||
// store/amo hits | ||
s3_valid := (s2_valid_masked && s2_hit || s2_replay) && !s2_sc_fail && isWrite(s2_req.cmd) | ||
val amoalu = Module(new AMOALU(xLen)) | ||
when ((s2_valid || s2_replay) && (isWrite(s2_req.cmd) || s2_data_correctable)) { | ||
s3_req := s2_req | ||
s3_req.data := Mux(s2_data_correctable, s2_data_corrected, amoalu.io.out) | ||
s3_way := s2_tag_match_way | ||
} | ||
|
||
writeArb.io.in(0).bits.addr := s3_req.addr | ||
writeArb.io.in(0).bits.wmask := UIntToOH(s3_req.addr.extract(rowOffBits-1,offsetlsb)) | ||
writeArb.io.in(0).bits.data := Fill(rowWords, s3_req.data) | ||
writeArb.io.in(0).valid := s3_valid | ||
writeArb.io.in(0).bits.way_en := s3_way | ||
|
||
// replacement policy | ||
val replacer = cacheParams.replacement | ||
val s1_replaced_way_en = UIntToOH(replacer.way) | ||
val s2_replaced_way_en = UIntToOH(RegEnable(replacer.way, s1_clk_en)) | ||
val s2_repl_meta = Mux1H(s2_replaced_way_en, wayMap((w: Int) => RegEnable(meta.io.resp(w), s1_clk_en && s1_replaced_way_en(w))).toSeq) | ||
|
||
// miss handling | ||
mshrs.io.req.valid := s2_valid_masked && !s2_hit && (isPrefetch(s2_req.cmd) || isRead(s2_req.cmd) || isWrite(s2_req.cmd)) | ||
mshrs.io.req.bits := s2_req | ||
mshrs.io.req.bits.tag_match := s2_tag_match | ||
mshrs.io.req.bits.old_meta := Mux(s2_tag_match, L1Metadata(s2_repl_meta.tag, s2_hit_state), s2_repl_meta) | ||
mshrs.io.req.bits.way_en := Mux(s2_tag_match, s2_tag_match_way, s2_replaced_way_en) | ||
mshrs.io.req.bits.data := s2_req.data | ||
when (mshrs.io.req.fire()) { replacer.miss } | ||
tl_out.a <> mshrs.io.mem_acquire | ||
|
||
// replays | ||
readArb.io.in(1).valid := mshrs.io.replay.valid | ||
readArb.io.in(1).bits := mshrs.io.replay.bits | ||
readArb.io.in(1).bits.way_en := ~UInt(0, nWays) | ||
mshrs.io.replay.ready := readArb.io.in(1).ready | ||
s1_replay := mshrs.io.replay.valid && readArb.io.in(1).ready | ||
metaReadArb.io.in(1) <> mshrs.io.meta_read | ||
metaWriteArb.io.in(0) <> mshrs.io.meta_write | ||
|
||
// probes and releases | ||
prober.io.req.valid := tl_out.b.valid && !lrsc_valid | ||
tl_out.b.ready := prober.io.req.ready && !lrsc_valid | ||
prober.io.req.bits := tl_out.b.bits | ||
prober.io.way_en := s2_tag_match_way | ||
prober.io.block_state := s2_hit_state | ||
metaReadArb.io.in(2) <> prober.io.meta_read | ||
metaWriteArb.io.in(1) <> prober.io.meta_write | ||
prober.io.mshr_rdy := mshrs.io.probe_rdy | ||
|
||
// refills | ||
val grant_has_data = edge.hasData(tl_out.d.bits) | ||
mshrs.io.mem_grant.valid := tl_out.d.fire() | ||
mshrs.io.mem_grant.bits := tl_out.d.bits | ||
tl_out.d.ready := writeArb.io.in(1).ready || !grant_has_data | ||
/* The last clause here is necessary in order to prevent the responses for | ||
* the IOMSHRs from being written into the data array. It works because the | ||
* IOMSHR ids start right the ones for the regular MSHRs. */ | ||
writeArb.io.in(1).valid := tl_out.d.valid && grant_has_data && | ||
tl_out.d.bits.source < UInt(cfg.nMSHRs) | ||
writeArb.io.in(1).bits.addr := mshrs.io.refill.addr | ||
writeArb.io.in(1).bits.way_en := mshrs.io.refill.way_en | ||
writeArb.io.in(1).bits.wmask := ~UInt(0, rowWords) | ||
writeArb.io.in(1).bits.data := tl_out.d.bits.data(encRowBits-1,0) | ||
data.io.read <> readArb.io.out | ||
readArb.io.out.ready := !tl_out.d.valid || tl_out.d.ready // insert bubble if refill gets blocked | ||
tl_out.e <> mshrs.io.mem_finish | ||
|
||
// writebacks | ||
val wbArb = Module(new Arbiter(new WritebackReq(edge.bundle), 2)) | ||
wbArb.io.in(0) <> prober.io.wb_req | ||
wbArb.io.in(1) <> mshrs.io.wb_req | ||
wb.io.req <> wbArb.io.out | ||
metaReadArb.io.in(3) <> wb.io.meta_read | ||
readArb.io.in(2) <> wb.io.data_req | ||
wb.io.data_resp := s2_data_corrected | ||
TLArbiter.lowest(edge, tl_out.c, wb.io.release, prober.io.rep) | ||
|
||
// store->load bypassing | ||
val s4_valid = Reg(next=s3_valid, init=Bool(false)) | ||
val s4_req = RegEnable(s3_req, s3_valid && metaReadArb.io.out.valid) | ||
val bypasses = List( | ||
((s2_valid_masked || s2_replay) && !s2_sc_fail, s2_req, amoalu.io.out), | ||
(s3_valid, s3_req, s3_req.data), | ||
(s4_valid, s4_req, s4_req.data) | ||
).map(r => (r._1 && (s1_addr >> wordOffBits === r._2.addr >> wordOffBits) && isWrite(r._2.cmd), r._3)) | ||
val s2_store_bypass_data = Reg(Bits(width = coreDataBits)) | ||
val s2_store_bypass = Reg(Bool()) | ||
when (s1_clk_en) { | ||
s2_store_bypass := false | ||
when (bypasses.map(_._1).reduce(_||_)) { | ||
s2_store_bypass_data := PriorityMux(bypasses) | ||
s2_store_bypass := true | ||
} | ||
} | ||
|
||
// load data subword mux/sign extension | ||
val s2_data_word_prebypass = s2_data_uncorrected >> Cat(s2_word_idx, Bits(0,log2Up(coreDataBits))) | ||
val s2_data_word = Mux(s2_store_bypass, s2_store_bypass_data, s2_data_word_prebypass) | ||
val loadgen = new LoadGen(s2_req.typ, mtSigned(s2_req.typ), s2_req.addr, s2_data_word, s2_sc, wordBytes) | ||
|
||
amoalu.io.mask := new StoreGen(s2_req.typ, s2_req.addr, 0.U, xLen/8).mask | ||
amoalu.io.cmd := s2_req.cmd | ||
amoalu.io.lhs := s2_data_word | ||
amoalu.io.rhs := s2_req.data | ||
|
||
// nack it like it's hot | ||
val s1_nack = dtlb.io.req.valid && dtlb.io.resp.miss || | ||
s1_req.addr(idxMSB,idxLSB) === prober.io.meta_write.bits.idx && !prober.io.req.ready | ||
val s2_nack_hit = RegEnable(s1_nack, s1_valid || s1_replay) | ||
when (s2_nack_hit) { mshrs.io.req.valid := Bool(false) } | ||
val s2_nack_victim = s2_hit && mshrs.io.secondary_miss | ||
val s2_nack_miss = !s2_hit && !mshrs.io.req.ready | ||
val s2_nack = s2_nack_hit || s2_nack_victim || s2_nack_miss | ||
s2_valid_masked := s2_valid && !s2_nack && !io.cpu.s2_kill | ||
|
||
val s2_recycle_ecc = (s2_valid || s2_replay) && s2_hit && s2_data_correctable | ||
val s2_recycle_next = Reg(init=Bool(false)) | ||
when (s1_valid || s1_replay) { s2_recycle_next := s2_recycle_ecc } | ||
s2_recycle := s2_recycle_ecc || s2_recycle_next | ||
|
||
// after a nack, block until nack condition resolves to save energy | ||
val block_miss = Reg(init=Bool(false)) | ||
block_miss := (s2_valid || block_miss) && s2_nack_miss | ||
when (block_miss) { | ||
io.cpu.req.ready := Bool(false) | ||
} | ||
|
||
val cache_resp = Wire(Valid(new HellaCacheResp)) | ||
cache_resp.valid := (s2_replay || s2_valid_masked && s2_hit) && !s2_data_correctable | ||
cache_resp.bits := s2_req | ||
cache_resp.bits.has_data := isRead(s2_req.cmd) | ||
cache_resp.bits.data := loadgen.data | s2_sc_fail | ||
cache_resp.bits.store_data := s2_req.data | ||
cache_resp.bits.replay := s2_replay | ||
|
||
val uncache_resp = Wire(Valid(new HellaCacheResp)) | ||
uncache_resp.bits := mshrs.io.resp.bits | ||
uncache_resp.valid := mshrs.io.resp.valid | ||
mshrs.io.resp.ready := Reg(next= !(s1_valid || s1_replay)) | ||
|
||
io.cpu.s2_nack := s2_valid && s2_nack | ||
io.cpu.resp := Mux(mshrs.io.resp.ready, uncache_resp, cache_resp) | ||
io.cpu.resp.bits.data_word_bypass := loadgen.wordData | ||
io.cpu.resp.bits.data_raw := s2_data_word | ||
io.cpu.ordered := mshrs.io.fence_rdy && !s1_valid && !s2_valid | ||
io.cpu.replay_next := (s1_replay && s1_read) || mshrs.io.replay_next | ||
|
||
|
||
// Tie off the s2_xcpt signal, since exceptions generated | ||
// here are due to TLB falsely setting requests to S level | ||
io.cpu.s2_xcpt := new TLBResp().fromBits(0.U) | ||
|
||
// performance events | ||
io.cpu.perf.acquire := edge.done(tl_out.a) | ||
io.cpu.perf.release := edge.done(tl_out.c) | ||
io.cpu.perf.tlbMiss := io.ptw.req.fire() | ||
|
||
// no clock-gating support | ||
io.cpu.clock_enabled := true | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters