Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV][GISEL] Legalize, regbankselect, and instruction-select G_VSCALE #85967

Merged
merged 8 commits into from
Mar 27, 2024

Conversation

michaelmaitland
Copy link
Contributor

@michaelmaitland michaelmaitland commented Mar 20, 2024

G_VSCALE should be lowered using VLENB. If the type is not sXLen it should be lowered using a G_VSCALE on the narrow type and a G_MUL.

G_VSCALE should be lowered using VLENB.
@llvmbot
Copy link
Collaborator

llvmbot commented Mar 20, 2024

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-risc-v

Author: Michael Maitland (michaelmaitland)

Changes

G_VSCALE should be lowered using VLENB.


Full diff: https://github.com/llvm/llvm-project/pull/85967.diff

5 Files Affected:

  • (modified) llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp (+44)
  • (modified) llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h (+1)
  • (modified) llvm/lib/Target/RISCV/RISCVInstrGISel.td (+8)
  • (added) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir (+120)
  • (added) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv64.mir (+120)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 64ae4e94a8c929..a7829d4819ebd0 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -374,6 +374,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
       .clampScalar(0, s32, sXLen)
       .lowerForCartesianProduct({s32, sXLen, p0}, {p0});
 
+  getActionDefinitionsBuilder(G_VSCALE).customFor({sXLen});
+
   getLegacyLegalizerInfo().computeTables();
 }
 
@@ -495,6 +497,46 @@ bool RISCVLegalizerInfo::shouldBeInConstantPool(APInt APImm,
   return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());
 }
 
+bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,
+                                        MachineIRBuilder &MIB) const {
+  const LLT XLenTy(STI.getXLenVT());
+  Register Dst = MI.getOperand(0).getReg();
+
+  // We define our scalable vector types for lmul=1 to use a 64 bit known
+  // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
+  // vscale as VLENB / 8.
+  static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
+  if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
+    report_fatal_error("Support for VLEN==32 is incomplete.");
+  // We assume VLENB is a multiple of 8. We manually choose the best shift
+  // here because SimplifyDemandedBits isn't always able to simplify it.
+  uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue();
+  if (isPowerOf2_64(Val)) {
+    uint64_t Log2 = Log2_64(Val);
+    if (Log2 < 3) {
+      auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
+      MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2));
+    } else if (Log2 > 3) {
+      auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
+      MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3));
+    } else {
+      MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {});
+    }
+  } else if ((Val % 8) == 0) {
+    // If the multiplier is a multiple of 8, scale it down to avoid needing
+    // to shift the VLENB value.
+    auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
+    MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8));
+  } else {
+    auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});
+    auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3));
+    MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val));
+  }
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool RISCVLegalizerInfo::legalizeCustom(
     LegalizerHelper &Helper, MachineInstr &MI,
     LostDebugLocObserver &LocObserver) const {
@@ -552,6 +594,8 @@ bool RISCVLegalizerInfo::legalizeCustom(
   }
   case TargetOpcode::G_VASTART:
     return legalizeVAStart(MI, MIRBuilder);
+  case TargetOpcode::G_VSCALE:
+    return legalizeVScale(MI, MIRBuilder);
   }
 
   llvm_unreachable("expected switch to return");
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
index 323426034827e4..e2a98c8d2c736c 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
@@ -42,6 +42,7 @@ class RISCVLegalizerInfo : public LegalizerInfo {
                            GISelChangeObserver &Observer) const;
 
   bool legalizeVAStart(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const;
+  bool legalizeVScale(MachineInstr &MI, MachineIRBuilder &MIB) const;
 };
 } // end namespace llvm
 #endif
diff --git a/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
index ede8c9809833cc..54e22d6257814a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
@@ -24,3 +24,11 @@ def G_FCLASS : RISCVGenericInstruction {
   let hasSideEffects = false;
 }
 def : GINodeEquiv<G_FCLASS, riscv_fclass>;
+
+// Pseudo equivalent to a RISCVISD::READ_VLENB.
+def G_READ_VLENB : RISCVGenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins);
+  let hasSideEffects = false;
+}
+def : GINodeEquiv<G_READ_VLENB, riscv_read_vlenb>;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir
new file mode 100644
index 00000000000000..60fc3c66adec4a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv32.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s
+
+---
+name:            test_1
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_1
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+    ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s32) = G_VSCALE i32 1
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+---
+name:            test_2
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_2
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+    ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s32) = G_VSCALE i32 2
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+---
+name:            test_3
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_3
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
+    ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+    ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: $x11 = COPY [[C1]](s32)
+    ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+    ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: $x10 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s32) = G_VSCALE i32 3
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+---
+name:            test_4
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_4
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C]](s32)
+    ; CHECK-NEXT: $x10 = COPY [[LSHR]](s32)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s32) = G_VSCALE i32 4
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+---
+name:            test_8
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_8
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s32)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s32) = G_VSCALE i32 8
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+---
+name:            test_16
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_16
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[READ_VLENB]], [[C]](s32)
+    ; CHECK-NEXT: $x10 = COPY [[SHL]](s32)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s32) = G_VSCALE i32 16
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+---
+name:            test_40
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_40
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+    ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s32)
+    ; CHECK-NEXT: $x11 = COPY [[C]](s32)
+    ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+    ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+    ; CHECK-NEXT: $x10 = COPY [[COPY]](s32)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s32) = G_VSCALE i32 40
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+
+
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv64.mir
new file mode 100644
index 00000000000000..3e140a5ef72a84
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-vscale-rv64.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s
+
+---
+name:            test_1
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_1
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+    ; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s64) = G_VSCALE i64 1
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+---
+name:            test_2
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_2
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+    ; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s64) = G_VSCALE i64 2
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+---
+name:            test_3
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_3
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+    ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+    ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+    ; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
+    ; CHECK-NEXT: $x11 = COPY [[C1]](s64)
+    ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+    ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: $x10 = COPY [[COPY]](s64)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s64) = G_VSCALE i64 3
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+---
+name:            test_4
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_4
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C]](s64)
+    ; CHECK-NEXT: $x10 = COPY [[LSHR]](s64)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s64) = G_VSCALE i64 4
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+---
+name:            test_8
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_8
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s64)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s64) = G_VSCALE i64 8
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+---
+name:            test_16
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_16
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+    ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[READ_VLENB]], [[C]](s64)
+    ; CHECK-NEXT: $x10 = COPY [[SHL]](s64)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s64) = G_VSCALE i64 16
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+---
+name:            test_40
+body:             |
+  bb.0.entry:
+
+    ; CHECK-LABEL: name: test_40
+    ; CHECK: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+    ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+    ; CHECK-NEXT: $x10 = COPY [[READ_VLENB]](s64)
+    ; CHECK-NEXT: $x11 = COPY [[C]](s64)
+    ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) &__muldi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+    ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+    ; CHECK-NEXT: $x10 = COPY [[COPY]](s64)
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:_(s64) = G_VSCALE i64 40
+    $x10 = COPY %0
+    PseudoRET implicit $x10
+...
+
+

@@ -374,6 +374,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.clampScalar(0, s32, sXLen)
.lowerForCartesianProduct({s32, sXLen, p0}, {p0});

getActionDefinitionsBuilder(G_VSCALE).customFor({sXLen});
Copy link
Collaborator

@topperc topperc Mar 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't you need a clamp scalar if the type isn't sXLen?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can add that. This patch is a precommit for G_INSERT and G_EXTRACT which only build sXLen G_VSCALE instructions. So it was not needed for that patch.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added

@tschuett
Copy link
Member

Have you tried a negative factor for vscale? I got confused by:
https://github.com/llvm/llvm-project/blob/b20360abeb3a80281dc082f1e093abd13cb1ee4c/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L3962
buildVScale takes an unsigned, which makes it hard to put negative scales in.

@michaelmaitland
Copy link
Contributor Author

michaelmaitland commented Mar 20, 2024

Have you tried a negative factor for vscale? I got confused by: https://github.com/llvm/llvm-project/blob/b20360abeb3a80281dc082f1e093abd13cb1ee4c/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L3962 buildVScale takes an unsigned, which makes it hard to put negative scales in.

I think that we should not be able to put in negative scales right? This should be enforced my MachineVerifier. What does a negative scale mean in SDAG?

@tschuett
Copy link
Member

tschuett commented Mar 20, 2024

The MachineVerifier only forbids 0 IIRC. It is a common pattern sub(x, C) -> add(x, -C).

@michaelmaitland
Copy link
Contributor Author

The MachineVerifier only forbids 0 IIRC. It is a common pattern sub(x, C) -> add(x, -C).

I see that it only forbids 0. I would have thought it makes sense to forbid <= 0. I find it weird for someone to say "give me a negative vscale". I can understand someone to say "give me a positive vscale and negate it"

It looks like we allow negative vscale so that we can fold early. I would have expected that we would fold in that negative after the G_VSCALE is lowered. But I can update this so we are 1:1 with SDAG in this case.

// vscale as VLENB / 8.
static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)
report_fatal_error("Support for VLEN==32 is incomplete.");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

return false to hit the fallback

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

PseudoRET implicit $x10
...


Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably should lean on more end to end tests, the new pseudo handling is incomplete here

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IRTranslator support for G_VSCALE is still missing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IRTranslator support for G_VSCALE is still missing.

That is correct. I posted this patch because I am trying to implement G_INSERT and G_EXTRACT legalization and that depends on building G_VSCALE during legalization phase.

@arsenm could you please clarify what you mean by the new pseudo handling is incomplete?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean the pseudo instruction is emitted here in the legalizer, but then there's no RegBankSelect or selection handling

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we don't have IRTranslator, would it be okay if I added regbank and ISEL with tests to this patch for the pseudos that exist from legalizing G_VSCALE?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arsenm I have added these tests.

Copy link

github-actions bot commented Mar 21, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@michaelmaitland
Copy link
Contributor Author

Committed in 4768150, a2476c9, and c00a5ab

case TargetOpcode::G_VSCALE: {
MachineOperand &SrcMO = MI.getOperand(1);
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not consistent with SelectionDAG. The vscale constant is always a signed value.

SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) {
  EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));

  const APInt &MulImm = N->getConstantOperandAPInt(0);
  return DAG.getVScale(SDLoc(N), VT, MulImm.sext(VT.getSizeInBits()));
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I will revert and fix the issues in this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

addressed.

Copy link

✅ With the latest revision this PR passed the Python code formatter.

unsigned NarrowSize = NarrowTy.getSizeInBits();
int NumParts = TotalSize / NarrowSize;

SmallVector<Register, 4> PartRegs;
Copy link
Collaborator

@topperc topperc Mar 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't look right to me. Looks like you're just creating a G_VSCALE for each part using just the bits of the constant that belong to that part. G_VSCALE is a multiply, the full multiply result of the first part produce a 2x result. The second half of that full product needs to added to the second part.

I think you need to decompose it into a vscale with 1 and a G_MUL and let the G_MUL get legalized. That's what SelectionDAG does.

void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,               
                                           SDValue &Hi) {                        
  EVT VT = N->getValueType(0);                                                   
  EVT HalfVT =                                                                   
      EVT::getIntegerVT(*DAG.getContext(), N->getValueSizeInBits(0) / 2);        
  SDLoc dl(N);                                                                   
                                                                                 
  // We assume VSCALE(1) fits into a legal integer.                              
  APInt One(HalfVT.getSizeInBits(), 1);                                          
  SDValue VScaleBase = DAG.getVScale(dl, HalfVT, One);                           
  VScaleBase = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, VScaleBase);                
  SDValue Res = DAG.getNode(ISD::MUL, dl, VT, VScaleBase, N->getOperand(0));     
  SplitInteger(Res, Lo, Hi);                                                     
}     

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

LLT HalfTy = Ty.divide(2);

// Assume VSCALE(1) fits into a legal integer
const APInt One(HalfTy.getSizeInBits(), 1);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be NarrowTy insead of HalfTy?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
const APInt &SrcVal = SrcMO.getCImm()->getValue();
// The CImm is always a signed value
const APInt &Val = SrcVal.sext(WideTy.getSizeInBits());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit, this can be APInt Val =. The sext creates a new APInt, so you're taking a reference to a temporary object.

ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
$x10 = COPY %2(s32)
$x11 = COPY %3(s32)
PseudoCALL target-flags(riscv-call) &__mulsi3, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we test with the M extension?

@@ -0,0 +1,409 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this many test cases? Isn't the only new thing G_READ_VLENB?

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@michaelmaitland michaelmaitland changed the title [RISCV][GISEL] Legalize G_VSCALE [RISCV][GISEL] Legalize, regbankselect, and instruction-select G_VSCALE Mar 27, 2024
@michaelmaitland michaelmaitland merged commit 54a9f0e into llvm:main Mar 27, 2024
4 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

5 participants