Skip to content

Commit

Permalink
review comments + fix register names
Browse files Browse the repository at this point in the history
  • Loading branch information
mortbopet committed Jun 30, 2023
1 parent c36fca2 commit b1448c2
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 251 deletions.
5 changes: 3 additions & 2 deletions include/circt/Dialect/Pipeline/PipelineOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -207,12 +207,13 @@ def StageOp : Op<Pipeline_Dialect, "stage", [
to communicate:
1. which stage (block) to transition to next
2. which registers to build at this stage boundary
3. An optional hierarchy of boolean values to be used for clock gates for
3. which values to pass through to the next stage without registering
4. An optional hierarchy of boolean values to be used for clock gates for
each register.
- The implicit '!stalled' gate will always be the first signal in the
hierarchy. Further signals are added to the hierarchy from left to
right.
4. which values to pass through to the next stage without registering


Example:
```mlir
Expand Down
55 changes: 11 additions & 44 deletions lib/Conversion/PipelineToHW/PipelineToHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include "circt/Dialect/HW/HWOps.h"
#include "circt/Dialect/Pipeline/PipelineOps.h"
#include "circt/Dialect/Seq/SeqOps.h"
#include "circt/Support/BackedgeBuilder.h"
#include "mlir/IR/Builders.h"
#include "llvm/ADT/TypeSwitch.h"

Expand Down Expand Up @@ -93,7 +92,6 @@ class PipelineLowering {

// Build data registers.
auto stageRegPrefix = getStageRegPrefix(stageIndex);
BackedgeBuilder bb(builder, stageTerminator->getLoc());
auto loc = stageTerminator->getLoc();

// Build the clock enable signal: valid && !stall (if applicable)
Expand All @@ -120,7 +118,6 @@ class PipelineLowering {
auto regIn = it.value();
auto regName = builder.getStringAttr(stageRegPrefix.strref() + "_reg" +
std::to_string(regIdx));
Type dataType = regIn.getType();
Value dataReg;
if (this->clockGateRegs) {
// Use the clock gate instead of input muxing.
Expand All @@ -131,29 +128,12 @@ class PipelineLowering {
currClockGate = builder.create<seq::ClockGateOp>(
loc, currClockGate, hierClockGateEnable, /*test_enable=*/Value());
}
dataReg = builder.create<seq::CompRegOp>(
stageTerminator->getLoc(), dataType, regIn, currClockGate, regName,
reset, /*resetValue*/ Value(), /*sym_name*/ StringAttr());
dataReg = builder.create<seq::CompRegOp>(stageTerminator->getLoc(),
regIn, currClockGate, regName);
} else {
// Use input muxing. The select signal is &&'ed with any clock gates
// that may be present.
auto dataRegBE = bb.get(dataType);
Value imuxSelect = stageValidAndNotStalled;
if (auto clockGates = stageTerminator.getClockGatesForReg(regIdx);
!clockGates.empty()) {
imuxSelect = builder.create<comb::AndOp>(
loc, imuxSelect,
builder.create<comb::AndOp>(loc, builder.getI1Type(),
clockGates));
}

Value dataRegNext = builder.create<comb::MuxOp>(
stageTerminator->getLoc(), imuxSelect, regIn, dataRegBE);
dataReg = builder.create<seq::CompRegOp>(
stageTerminator->getLoc(), dataType, dataRegNext, clock, regName,
reset,
/*resetValue*/ Value(), /*sym_name*/ StringAttr());
dataRegBE.setValue(dataReg);
dataReg = builder.create<seq::CompRegClockEnabledOp>(
stageTerminator->getLoc(), regIn, clock, stageValidAndNotStalled,
regName);
}
rets.regs.push_back(dataReg);
}
Expand All @@ -166,26 +146,13 @@ class PipelineLowering {
builder.create<hw::ConstantOp>(terminator->getLoc(), APInt(1, 0, false))
.getResult();
if (hasStall) {
if (clockGateRegs) {
rets.valid = builder.create<seq::CompRegClockEnabledOp>(
loc, builder.getI1Type(), valid, clock, notStalled, validRegName,
reset, validRegResetVal,
/*sym_name*/ StringAttr());
} else {
auto validRegBE = bb.get(builder.getI1Type());
auto validRegNext =
builder.create<comb::MuxOp>(loc, notStalled, valid, validRegBE);
rets.valid = builder.create<seq::CompRegOp>(
loc, builder.getI1Type(), validRegNext, clock, validRegName, reset,
validRegResetVal,
/*sym_name*/ StringAttr());
validRegBE.setValue(rets.valid);
}
rets.valid = builder.create<seq::CompRegClockEnabledOp>(
loc, builder.getI1Type(), valid, clock, notStalled, validRegName,
reset, validRegResetVal, validRegName);
} else {
rets.valid =
builder.create<seq::CompRegOp>(loc, builder.getI1Type(), valid, clock,
validRegName, reset, validRegResetVal,
/*sym_name*/ StringAttr());
rets.valid = builder.create<seq::CompRegOp>(
loc, builder.getI1Type(), valid, clock, validRegName, reset,
validRegResetVal, validRegName);
}

rets.passthroughs = stageTerminator.getPassthroughs();
Expand Down
34 changes: 2 additions & 32 deletions lib/Dialect/Pipeline/PipelineOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,41 +422,11 @@ void StageOp::build(OpBuilder &odsBuilder, OperationState &odsState,
odsBuilder.getI64ArrayAttr(clockGatesPerRegister));
}

void StageOp::build(
OpBuilder &odsBuilder, OperationState &odsState, Block *dest,
ValueRange registers, ValueRange passthroughs,
const llvm::DenseMap<Value, llvm::SmallVector<Value>> &clockGates) {
odsState.addSuccessors(dest);
odsState.addOperands(registers);
odsState.addOperands(passthroughs);

// Get a sorted list of clock gates wrt. the registers.
llvm::SmallVector<int64_t> clockGatesPerRegister;
llvm::SmallVector<Value> sortedClockGates;
for (Value reg : registers) {
size_t nClockGates = 0;
if (auto it = clockGates.find(reg); it != clockGates.end()) {
llvm::append_range(sortedClockGates, it->second);
nClockGates = it->second.size();
}
clockGatesPerRegister.push_back(nClockGates);
}
odsState.addOperands(sortedClockGates);
odsState.addAttribute(
"operand_segment_sizes",
odsBuilder.getDenseI32ArrayAttr(
{static_cast<int32_t>(registers.size()),
static_cast<int32_t>(passthroughs.size()),
/*clock gates*/ static_cast<int32_t>(sortedClockGates.size())}));
odsState.addAttribute("clockGatesPerRegister",
odsBuilder.getI64ArrayAttr(clockGatesPerRegister));
}

ValueRange StageOp::getClockGatesForReg(unsigned regIdx) {
assert(regIdx < getRegisters().size() && "register index out of bounds.");

// This could be optimized quite a bit if we didn't store clock gates per
// register as an array of sizes... TODO: look into using properties and maybe
// TODO: This could be optimized quite a bit if we didn't store clock gates
// per register as an array of sizes... look into using properties and maybe
// attaching a more complex datastructure to reduce compute here.

unsigned clockGateStartIdx = 0;
Expand Down
6 changes: 3 additions & 3 deletions test/Conversion/PipelineToHW/test_ce.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
// CHECK-SAME: %[[VAL_0:.*]]: i32, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: i1, %[[VAL_3:.*]]: i1, %[[VAL_4:.*]]: i1) -> (out0: i32, out1: i1) {
// CHECK: %[[VAL_5:.*]] = comb.sub %[[VAL_0]], %[[VAL_1]] : i32
// CHECK: %[[VAL_6:.*]] = seq.clock_gate %[[VAL_3]], %[[VAL_2]]
// CHECK: %[[VAL_7:.*]] = seq.compreg %[[VAL_5]], %[[VAL_6]] : i32
// CHECK: %[[VAL_8:.*]] = seq.compreg %[[VAL_0]], %[[VAL_6]] : i32
// CHECK: %[[VAL_7:.*]] = seq.compreg sym @p0_s0_reg0 %[[VAL_5]], %[[VAL_6]] : i32
// CHECK: %[[VAL_8:.*]] = seq.compreg sym @p0_s0_reg1 %[[VAL_0]], %[[VAL_6]] : i32
// CHECK: %[[VAL_9:.*]] = hw.constant false
// CHECK: %[[VAL_10:.*]] = seq.compreg %[[VAL_2]], %[[VAL_3]], %[[VAL_4]], %[[VAL_9]] : i1
// CHECK: %[[VAL_10:.*]] = seq.compreg sym @p0_s0_valid %[[VAL_2]], %[[VAL_3]], %[[VAL_4]], %[[VAL_9]] : i1
// CHECK: %[[VAL_11:.*]] = comb.add %[[VAL_7]], %[[VAL_8]] : i32
// CHECK: hw.output %[[VAL_11]], %[[VAL_10]] : i32, i1
// CHECK: }
Expand Down
22 changes: 9 additions & 13 deletions test/Conversion/PipelineToHW/test_clockgates.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,12 @@
// CHECK: %[[VAL_5:.*]] = comb.sub %[[VAL_0]], %[[VAL_1]] : i32
// CHECK: %[[VAL_6:.*]] = hw.constant true
// CHECK: %[[VAL_7:.*]] = hw.constant false
// CHECK: %[[VAL_8:.*]] = comb.and %[[VAL_6]], %[[VAL_7]] : i1
// CHECK: %[[VAL_9:.*]] = comb.and %[[VAL_2]], %[[VAL_8]] : i1
// CHECK: %[[VAL_10:.*]] = comb.mux %[[VAL_9]], %[[VAL_5]], %[[VAL_11:.*]] : i32
// CHECK: %[[VAL_11]] = seq.compreg %[[VAL_10]], %[[VAL_3]] : i32
// CHECK: %[[VAL_12:.*]] = comb.mux %[[VAL_2]], %[[VAL_0]], %[[VAL_13:.*]] : i32
// CHECK: %[[VAL_13]] = seq.compreg %[[VAL_12]], %[[VAL_3]] : i32
// CHECK: %[[VAL_14:.*]] = hw.constant false
// CHECK: %[[VAL_15:.*]] = seq.compreg %[[VAL_2]], %[[VAL_3]], %[[VAL_4]], %[[VAL_14]] : i1
// CHECK: %[[VAL_16:.*]] = comb.add %[[VAL_11]], %[[VAL_13]] : i32
// CHECK: hw.output %[[VAL_16]], %[[VAL_15]] : i32, i1
// CHECK: %[[VAL_8:.*]] = seq.compreg.ce sym @p0_s0_reg0 %[[VAL_5]], %[[VAL_3]], %[[VAL_2]] : i32
// CHECK: %[[VAL_9:.*]] = seq.compreg.ce sym @p0_s0_reg1 %[[VAL_0]], %[[VAL_3]], %[[VAL_2]] : i32
// CHECK: %[[VAL_10:.*]] = hw.constant false
// CHECK: %[[VAL_11:.*]] = seq.compreg sym @p0_s0_valid %[[VAL_2]], %[[VAL_3]], %[[VAL_4]], %[[VAL_10]] : i1
// CHECK: %[[VAL_12:.*]] = comb.add %[[VAL_8]], %[[VAL_9]] : i32
// CHECK: hw.output %[[VAL_12]], %[[VAL_11]] : i32, i1
// CHECK: }

// CGATE-LABEL: hw.module @testSingle(
Expand All @@ -27,10 +23,10 @@
// CGATE: %[[VAL_8:.*]] = seq.clock_gate %[[VAL_3]], %[[VAL_2]]
// CGATE: %[[VAL_9:.*]] = seq.clock_gate %[[VAL_8]], %[[VAL_6]]
// CGATE: %[[VAL_10:.*]] = seq.clock_gate %[[VAL_9]], %[[VAL_7]]
// CGATE: %[[VAL_11:.*]] = seq.compreg %[[VAL_5]], %[[VAL_10]] : i32
// CGATE: %[[VAL_12:.*]] = seq.compreg %[[VAL_0]], %[[VAL_8]] : i32
// CGATE: %[[VAL_11:.*]] = seq.compreg sym @p0_s0_reg0 %[[VAL_5]], %[[VAL_10]] : i32
// CGATE: %[[VAL_12:.*]] = seq.compreg sym @p0_s0_reg1 %[[VAL_0]], %[[VAL_8]] : i32
// CGATE: %[[VAL_13:.*]] = hw.constant false
// CGATE: %[[VAL_14:.*]] = seq.compreg %[[VAL_2]], %[[VAL_3]], %[[VAL_4]], %[[VAL_13]] : i1
// CGATE: %[[VAL_14:.*]] = seq.compreg sym @p0_s0_valid %[[VAL_2]], %[[VAL_3]], %[[VAL_4]], %[[VAL_13]] : i1
// CGATE: %[[VAL_15:.*]] = comb.add %[[VAL_11]], %[[VAL_12]] : i32
// CGATE: hw.output %[[VAL_15]], %[[VAL_14]] : i32, i1
// CGATE: }
Expand Down
Loading

0 comments on commit b1448c2

Please sign in to comment.