-
Notifications
You must be signed in to change notification settings - Fork 11.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV][GISEL] Legalize, regbankselect, and instruction-select G_VSCALE #85967
Conversation
G_VSCALE should be lowered using VLENB.
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-risc-v Author: Michael Maitland (michaelmaitland) ChangesG_VSCALE should be lowered using VLENB. Full diff: https://github.com/llvm/llvm-project/pull/85967.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 64ae4e94a8c929..a7829d4819ebd0 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -374,6 +374,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.clampScalar(0, s32, sXLen)
.lowerForCartesianProduct({s32, sXLen, p0}, {p0});
+ getActionDefinitionsBuilder(G_VSCALE).customFor({sXLen});
+
getLegacyLegalizerInfo().computeTables();
}
@@ -495,6 +497,46 @@ bool RISCVLegalizerInfo::shouldBeInConstantPool(APInt APImm,
return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
}
+bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
+ MachineIRBuilder &MIB) const {
+ const LLT XLenTy(STI.getXLenVT());
+ Register Dst = MI.getOperand(0).getReg();
+
+ // We define our scalable vector types for lmul=1 to use a 64 bit known
+ // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
+ // vscale as VLENB / 8.
+ static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
+ if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
+ report_fatal_error("Support for VLEN==32 is incomplete.");
+ // We assume VLENB is a multiple of 8. We manually choose the best shift
+ // here because SimplifyDemandedBits isn't always able to simplify it.
+ uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue();
+ if (isPowerOf2_64(Val)) {
+ uint64_t Log2 = Log2_64(Val);
+ if (Log2 < 3) {
+ auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
+ MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2));
+ } else if (Log2 > 3) {
+ auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
+ MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3));
+ } else {
+ MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {});
+ }
+ } else if ((Val % 8) == 0) {
+ // If the multiplier is a multiple of 8, scale it down to avoid needing
+ // to shift the VLENB value.
+ auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
+ MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8));
+ } else {
+ auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
+ auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3));
+ MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val));
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool RISCVLegalizerInfo::legalizeCustom(
LegalizerHelper &Helper, MachineInstr &MI,
LostDebugLocObserver &LocObserver) const {
@@ -552,6 +594,8 @@ bool RISCVLegalizerInfo::legalizeCustom(
}
case TargetOpcode::G_VASTART:
return legalizeVAStart(MI, MIRBuilder);
+ case TargetOpcode::G_VSCALE:
+ return legalizeVScale(MI, MIRBuilder);
}
llvm_unreachable("expected switch to return");
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
index 323426034827e4..e2a98c8d2c736c 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
@@ -42,6 +42,7 @@ class RISCVLegalizerInfo : public LegalizerInfo {
GISelChangeObserver &Observer) const;
bool legalizeVAStart(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const;
+ bool legalizeVScale(MachineInstr &MI, MachineIRBuilder &MIB) const;
};
} // end namespace llvm
#endif
diff --git a/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
index ede8c9809833cc..54e22d6257814a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
@@ -24,3 +24,11 @@ def G_FCLASS : RISCVGenericInstruction {
let hasSideEffects = false;
}
def : GINodeEquiv<G_FCLASS, riscv_fclass>;
+
+// Pseudo equivalent to a RISCVISD::READ_VLENB.
+def G_READ_VLENB : RISCVGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins);
+ let hasSideEffects = false;
+}
+def : GINodeEquiv<G_READ_VLENB, riscv_read_vlenb>;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir
new file mode 100644
index 00000000000000..60fc3c66adec4a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s
+
+---
+name: test_1
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_1
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 1
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_2
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_2
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 2
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_3
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_3
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: $x11 = COPY [[C1]](s32)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; CHECK-NEXT: $x10 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 3
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_4
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_4
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 4
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_8
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_8
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 8
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_16
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_16
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[READ_VLENB]], [[C]](s32)
+ ; CHECK-NEXT: $x10 = COPY [[SHL]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 16
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_40
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_40
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s32)
+ ; CHECK-NEXT: $x11 = COPY [[C]](s32)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; CHECK-NEXT: $x10 = COPY [[COPY]](s32)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s32) = G_VSCALE i32 40
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+
+
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv64.mir
new file mode 100644
index 00000000000000..3e140a5ef72a84
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv64.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s
+
+---
+name: test_1
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_1
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 1
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_2
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_2
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 2
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_3
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_3
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
+ ; CHECK-NEXT: $x11 = COPY [[C1]](s64)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; CHECK-NEXT: $x10 = COPY [[COPY]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 3
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_4
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_4
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+ ; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 4
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_8
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_8
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 8
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_16
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_16
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[READ_VLENB]], [[C]](s64)
+ ; CHECK-NEXT: $x10 = COPY [[SHL]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 16
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+---
+name: test_40
+body: |
+ bb.0.entry:
+
+ ; CHECK-LABEL: name: test_40
+ ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s64)
+ ; CHECK-NEXT: $x11 = COPY [[C]](s64)
+ ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; CHECK-NEXT: $x10 = COPY [[COPY]](s64)
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:_(s64) = G_VSCALE i64 40
+ $x10 = COPY %0
+ PseudoRET implicit $x10
+...
+
+
|
@@ -374,6 +374,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) | |||
.clampScalar(0, s32, sXLen) | |||
.lowerForCartesianProduct({s32, sXLen, p0}, {p0}); | |||
|
|||
getActionDefinitionsBuilder(G_VSCALE).customFor({sXLen}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't you need a clamp scalar if the type isn't sXLen?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can add that. This patch is a precommit for G_INSERT and G_EXTRACT which only build sXLen G_VSCALE instructions. So it was not needed for that patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added
Have you tried a negative factor for vscale? I got confused by: |
I think that we should not be able to put in negative scales right? This should be enforced my MachineVerifier. What does a negative scale mean in SDAG? |
The MachineVerifier only forbids 0 IIRC. It is a common pattern sub(x, C) -> add(x, -C). |
I see that it only forbids 0. I would have thought it makes sense to forbid <= 0. I find it weird for someone to say "give me a negative vscale". I can understand someone to say "give me a positive vscale and negate it" It looks like we allow negative vscale so that we can fold early. I would have expected that we would fold in that negative after the G_VSCALE is lowered. But I can update this so we are 1:1 with SDAG in this case. |
// vscale as VLENB / 8. | ||
static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!"); | ||
if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock) | ||
report_fatal_error("Support for VLEN==32 is incomplete."); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
return false to hit the fallback
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added
PseudoRET implicit $x10 | ||
... | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably should lean on more end to end tests, the new pseudo handling is incomplete here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IRTranslator support for G_VSCALE is still missing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IRTranslator support for G_VSCALE is still missing.
That is correct. I posted this patch because I am trying to implement G_INSERT and G_EXTRACT legalization and that depends on building G_VSCALE during legalization phase.
@arsenm could you please clarify what you mean by the new pseudo handling is incomplete?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean the pseudo instruction is emitted here in the legalizer, but then there's no RegBankSelect or selection handling
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since we don't have IRTranslator, would it be okay if I added regbank and ISEL with tests to this patch for the pseudos that exist from legalizing G_VSCALE?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm I have added these tests.
✅ With the latest revision this PR passed the C/C++ code formatter. |
46864fc
to
c6b84cc
Compare
case TargetOpcode::G_VSCALE: { | ||
MachineOperand &SrcMO = MI.getOperand(1); | ||
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); | ||
unsigned ExtOpc = LI.getExtOpcodeForWideningConstant( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not consistent with SelectionDAG. The vscale constant is always a signed value.
SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) {
EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
const APInt &MulImm = N->getConstantOperandAPInt(0);
return DAG.getVScale(SDLoc(N), VT, MulImm.sext(VT.getSizeInBits()));
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I will revert and fix the issues in this PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
addressed.
✅ With the latest revision this PR passed the Python code formatter. |
unsigned NarrowSize = NarrowTy.getSizeInBits(); | ||
int NumParts = TotalSize / NarrowSize; | ||
|
||
SmallVector<Register, 4> PartRegs; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't look right to me. Looks like you're just creating a G_VSCALE for each part using just the bits of the constant that belong to that part. G_VSCALE is a multiply, the full multiply result of the first part produce a 2x result. The second half of that full product needs to added to the second part.
I think you need to decompose it into a vscale with 1 and a G_MUL and let the G_MUL get legalized. That's what SelectionDAG does.
void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT VT = N->getValueType(0);
EVT HalfVT =
EVT::getIntegerVT(*DAG.getContext(), N->getValueSizeInBits(0) / 2);
SDLoc dl(N);
// We assume VSCALE(1) fits into a legal integer.
APInt One(HalfVT.getSizeInBits(), 1);
SDValue VScaleBase = DAG.getVScale(dl, HalfVT, One);
VScaleBase = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, VScaleBase);
SDValue Res = DAG.getNode(ISD::MUL, dl, VT, VScaleBase, N->getOperand(0));
SplitInteger(Res, Lo, Hi);
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLT HalfTy = Ty.divide(2); | ||
|
||
// Assume VSCALE(1) fits into a legal integer | ||
const APInt One(HalfTy.getSizeInBits(), 1); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this be NarrowTy insead of HalfTy?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext(); | ||
const APInt &SrcVal = SrcMO.getCImm()->getValue(); | ||
// The CImm is always a signed value | ||
const APInt &Val = SrcVal.sext(WideTy.getSizeInBits()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit, this can be APInt Val =
. The sext creates a new APInt, so you're taking a reference to a temporary object.
ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2 | ||
$x10 = COPY %2(s32) | ||
$x11 = COPY %3(s32) | ||
PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we test with the M extension?
@@ -0,0 +1,409 @@ | |||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need this many test cases? Isn't the only new thing G_READ_VLENB?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
G_VSCALE should be lowered using VLENB. If the type is not sXLen it should be lowered using a G_VSCALE on the narrow type and a G_MUL.