-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Teach areMemAccessesTriviallyDisjoint about scalable widths. #73655
[AArch64] Teach areMemAccessesTriviallyDisjoint about scalable widths. #73655
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesThe base change here is to change getMemOperandWithOffsetWidth to return a TypeSize Width, which in turn allows areMemAccessesTriviallyDisjoint to reason about trivially disjoint widths. Patch is 31.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73655.diff 7 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index fd47970bd050596..6faad4ec4acb2c3 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1214,8 +1214,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
SEH->eraseFromParent();
}
- TypeSize Scale = TypeSize::getFixed(1);
- unsigned Width;
+ TypeSize Scale = TypeSize::getFixed(1), Width(0, false);
int64_t MinOffset, MaxOffset;
bool Success = static_cast<const AArch64InstrInfo *>(TII)->getMemOpInfo(
NewOpc, Scale, Width, MinOffset, MaxOffset);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index a95ab3c7e0f288a..01fecbf4c1408be 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1096,7 +1096,7 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
const TargetRegisterInfo *TRI = &getRegisterInfo();
const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
int64_t OffsetA = 0, OffsetB = 0;
- unsigned WidthA = 0, WidthB = 0;
+ TypeSize WidthA(0, false), WidthB(0, false);
bool OffsetAIsScalable = false, OffsetBIsScalable = false;
assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
@@ -1121,8 +1121,9 @@ bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
OffsetAIsScalable == OffsetBIsScalable) {
int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
- int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
- if (LowOffset + LowWidth <= HighOffset)
+ TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+ if (LowWidth.isScalable() == OffsetAIsScalable &&
+ LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
return true;
}
}
@@ -2675,9 +2676,15 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
return false;
const MachineOperand *BaseOp;
+ TypeSize WidthN(0, false);
if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
- Width, TRI))
+ WidthN, TRI))
return false;
+ // The maximum vscale is 16 undef AArch64, return the maximal extent for the
+ // vector.
+ Width = WidthN.isScalable()
+ ? WidthN.getKnownMinValue() * AArch64::SVEMaxBitsPerVector / 128
+ : WidthN.getKnownMinValue();
BaseOps.push_back(BaseOp);
return true;
}
@@ -3456,7 +3463,7 @@ MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
- bool &OffsetIsScalable, unsigned &Width,
+ bool &OffsetIsScalable, TypeSize &Width,
const TargetRegisterInfo *TRI) const {
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
// Handle only loads/stores with base register followed by immediate offset.
@@ -3511,26 +3518,25 @@ AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
}
bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
- unsigned &Width, int64_t &MinOffset,
+ TypeSize &Width, int64_t &MinOffset,
int64_t &MaxOffset) {
- const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
switch (Opcode) {
// Not a memory operation or something we want to handle.
default:
Scale = TypeSize::getFixed(0);
- Width = 0;
+ Width = TypeSize::getFixed(0);
MinOffset = MaxOffset = 0;
return false;
case AArch64::STRWpost:
case AArch64::LDRWpost:
- Width = 32;
+ Width = TypeSize::getFixed(32);
Scale = TypeSize::getFixed(4);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDURQi:
case AArch64::STURQi:
- Width = 16;
+ Width = TypeSize::getFixed(16);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3542,7 +3548,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STURXi:
case AArch64::STURDi:
case AArch64::STLURXi:
- Width = 8;
+ Width = TypeSize::getFixed(8);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3555,7 +3561,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STURWi:
case AArch64::STURSi:
case AArch64::STLURWi:
- Width = 4;
+ Width = TypeSize::getFixed(4);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3570,7 +3576,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STURHi:
case AArch64::STURHHi:
case AArch64::STLURHi:
- Width = 2;
+ Width = TypeSize::getFixed(2);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3585,7 +3591,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STURBi:
case AArch64::STURBBi:
case AArch64::STLURBi:
- Width = 1;
+ Width = TypeSize::getFixed(1);
Scale = TypeSize::getFixed(1);
MinOffset = -256;
MaxOffset = 255;
@@ -3595,14 +3601,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STPQi:
case AArch64::STNPQi:
Scale = TypeSize::getFixed(16);
- Width = 32;
+ Width = TypeSize::getFixed(32);
MinOffset = -64;
MaxOffset = 63;
break;
case AArch64::LDRQui:
case AArch64::STRQui:
Scale = TypeSize::getFixed(16);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3615,7 +3621,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STNPXi:
case AArch64::STNPDi:
Scale = TypeSize::getFixed(8);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -3625,14 +3631,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STRXui:
case AArch64::STRDui:
Scale = TypeSize::getFixed(8);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = 0;
MaxOffset = 4095;
break;
case AArch64::StoreSwiftAsyncContext:
// Store is an STRXui, but there might be an ADDXri in the expansion too.
Scale = TypeSize::getFixed(1);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3645,7 +3651,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STNPWi:
case AArch64::STNPSi:
Scale = TypeSize::getFixed(4);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -3655,7 +3661,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STRWui:
case AArch64::STRSui:
Scale = TypeSize::getFixed(4);
- Width = 4;
+ Width = TypeSize::getFixed(4);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3666,7 +3672,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STRHui:
case AArch64::STRHHui:
Scale = TypeSize::getFixed(2);
- Width = 2;
+ Width = TypeSize::getFixed(2);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3677,7 +3683,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STRBui:
case AArch64::STRBBui:
Scale = TypeSize::getFixed(1);
- Width = 1;
+ Width = TypeSize::getFixed(1);
MinOffset = 0;
MaxOffset = 4095;
break;
@@ -3686,14 +3692,14 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STPDpre:
case AArch64::LDPDpost:
Scale = TypeSize::getFixed(8);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = -512;
MaxOffset = 504;
break;
case AArch64::STPQpre:
case AArch64::LDPQpost:
Scale = TypeSize::getFixed(16);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -1024;
MaxOffset = 1008;
break;
@@ -3702,26 +3708,26 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDRXpost:
case AArch64::LDRDpost:
Scale = TypeSize::getFixed(1);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STRQpre:
case AArch64::LDRQpost:
Scale = TypeSize::getFixed(1);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::ADDG:
Scale = TypeSize::getFixed(16);
- Width = 0;
+ Width = TypeSize::getFixed(0);
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::TAGPstack:
Scale = TypeSize::getFixed(16);
- Width = 0;
+ Width = TypeSize::getFixed(0);
// TAGP with a negative offset turns into SUBP, which has a maximum offset
// of 63 (not 64!).
MinOffset = -63;
@@ -3731,42 +3737,42 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::STGi:
case AArch64::STZGi:
Scale = TypeSize::getFixed(16);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STR_ZZZZXI:
case AArch64::LDR_ZZZZXI:
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector * 4;
+ Width = TypeSize::getScalable(16 * 4);
MinOffset = -256;
MaxOffset = 252;
break;
case AArch64::STR_ZZZXI:
case AArch64::LDR_ZZZXI:
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector * 3;
+ Width = TypeSize::getScalable(16 * 3);
MinOffset = -256;
MaxOffset = 253;
break;
case AArch64::STR_ZZXI:
case AArch64::LDR_ZZXI:
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector * 2;
+ Width = TypeSize::getScalable(16 * 2);
MinOffset = -256;
MaxOffset = 254;
break;
case AArch64::LDR_PXI:
case AArch64::STR_PXI:
Scale = TypeSize::getScalable(2);
- Width = SVEMaxBytesPerVector / 8;
+ Width = TypeSize::getScalable(2);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::LDR_ZXI:
case AArch64::STR_ZXI:
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector;
+ Width = TypeSize::getScalable(16);
MinOffset = -256;
MaxOffset = 255;
break;
@@ -3793,7 +3799,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
// A full vectors worth of data
// Width = mbytes * elements
Scale = TypeSize::getScalable(16);
- Width = SVEMaxBytesPerVector;
+ Width = TypeSize::getScalable(16);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3806,7 +3812,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST2W_IMM:
case AArch64::ST2D_IMM:
Scale = TypeSize::getScalable(32);
- Width = SVEMaxBytesPerVector * 2;
+ Width = TypeSize::getScalable(16 * 2);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3819,7 +3825,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST3W_IMM:
case AArch64::ST3D_IMM:
Scale = TypeSize::getScalable(48);
- Width = SVEMaxBytesPerVector * 3;
+ Width = TypeSize::getScalable(16 * 3);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3832,7 +3838,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST4W_IMM:
case AArch64::ST4D_IMM:
Scale = TypeSize::getScalable(64);
- Width = SVEMaxBytesPerVector * 4;
+ Width = TypeSize::getScalable(16 * 4);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3854,7 +3860,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
// A half vector worth of data
// Width = mbytes * elements
Scale = TypeSize::getScalable(8);
- Width = SVEMaxBytesPerVector / 2;
+ Width = TypeSize::getScalable(8);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3871,7 +3877,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
// A quarter vector worth of data
// Width = mbytes * elements
Scale = TypeSize::getScalable(4);
- Width = SVEMaxBytesPerVector / 4;
+ Width = TypeSize::getScalable(4);
MinOffset = -8;
MaxOffset = 7;
break;
@@ -3883,20 +3889,20 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
// A eighth vector worth of data
// Width = mbytes * elements
Scale = TypeSize::getScalable(2);
- Width = SVEMaxBytesPerVector / 8;
+ Width = TypeSize::getScalable(2);
MinOffset = -8;
MaxOffset = 7;
break;
case AArch64::ST2Gi:
case AArch64::STZ2Gi:
Scale = TypeSize::getFixed(16);
- Width = 32;
+ Width = TypeSize::getFixed(32);
MinOffset = -256;
MaxOffset = 255;
break;
case AArch64::STGPi:
Scale = TypeSize::getFixed(16);
- Width = 16;
+ Width = TypeSize::getFixed(16);
MinOffset = -64;
MaxOffset = 63;
break;
@@ -3908,7 +3914,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1RSB_S_IMM:
case AArch64::LD1RSB_D_IMM:
Scale = TypeSize::getFixed(1);
- Width = 1;
+ Width = TypeSize::getFixed(1);
MinOffset = 0;
MaxOffset = 63;
break;
@@ -3918,7 +3924,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1RSH_S_IMM:
case AArch64::LD1RSH_D_IMM:
Scale = TypeSize::getFixed(2);
- Width = 2;
+ Width = TypeSize::getFixed(2);
MinOffset = 0;
MaxOffset = 63;
break;
@@ -3926,13 +3932,13 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1RW_D_IMM:
case AArch64::LD1RSW_IMM:
Scale = TypeSize::getFixed(4);
- Width = 4;
+ Width = TypeSize::getFixed(4);
MinOffset = 0;
MaxOffset = 63;
break;
case AArch64::LD1RD_IMM:
Scale = TypeSize::getFixed(8);
- Width = 8;
+ Width = TypeSize::getFixed(8);
MinOffset = 0;
MaxOffset = 63;
break;
@@ -5634,8 +5640,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
}
// Get the min/max offset and the scale.
- TypeSize ScaleValue(0U, false);
- unsigned Width;
+ TypeSize ScaleValue(0U, false), Width(0U, false);
int64_t MinOff, MaxOff;
if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
MaxOff))
@@ -8392,8 +8397,8 @@ AArch64InstrInfo::getOutliningCandidateInfo(
// if fixing it up would be in range.
int64_t MinOffset,
MaxOffset; // Unscaled offsets for the instruction.
- TypeSize Scale(0U, false); // The scale to multiply the offsets by.
- unsigned DummyWidth;
+ // The scale to multiply the offsets by.
+ TypeSize Scale(0U, false), DummyWidth(0, false);
getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
Offset += 16; // Update the offset to what it would be if we outlined.
@@ -8898,7 +8903,7 @@ AArch64InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
for (MachineInstr &MI : MBB) {
const MachineOperand *Base;
- unsigned Width;
+ TypeSize Width(0, false);
int64_t Offset;
bool OffsetIsScalable;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index a934103c90cbf92..8835e6a1779983e 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -165,7 +165,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
bool getMemOperandWithOffsetWidth(const MachineInstr &MI,
const MachineOperand *&BaseOp,
int64_t &Offset, bool &OffsetIsScalable,
- unsigned &Width,
+ TypeSize &Width,
const TargetRegisterInfo *TRI) const;
/// Return the immediate offset of the base register in a load/store \p LdSt.
@@ -175,7 +175,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
/// \p Scale, \p Width, \p MinOffset, and \p MaxOffset accordingly.
///
/// For unscaled instructions, \p Scale is set to 1.
- static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, unsigned &Width,
+ static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width,
int64_t &MinOffset, int64_t &MaxOffset);
bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index b8b74ae8404d3f3..4afc678abaca63c 100644
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -220,8 +220,7 @@ static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
}
// The implicit scale for Offset is 8.
- TypeSize Scale(0U, false);
- unsigned Width;
+ TypeSize Scale(0U, false), Width(0U, false);
int64_t MinOffset, MaxOffset;
[[maybe_unused]] bool Success =
AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
@@ -262,8 +261,7 @@ static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
}
// The implicit scale for Offset is 8.
- TypeSize Scale(0U, false);
- unsigned Width;
+ TypeSize Scale(0U, false), Width(0U, false);
int64_t MinOffset, MaxOffset;
[[maybe_unused]] bool Success =
AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
index 110f0ef7f4a5500..7244ac949ab88c3 100644
--- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
@@ -59,9 +59,9 @@ define void @array_1D_insert(ptr %addr, %my_subtype %elt) #0 {
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0]
+; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [sp]
-; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #3
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-aliasing.ll b/llvm/test/CodeGen/AArch64/sve-aliasing.ll
index a6d7e1c0fbab173..a83dc494b3bd2c4 100644
--- a/llvm/test/CodeGen/AArch64/sve-aliasing.ll
+++ b/llvm/test/CodeGen/AArch64/sve-aliasing.ll
@@ -8,15 +8,15 @@ define void @scalable_v16i8(ptr noalias nocapture noundef %l0) {
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
+; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT: movprfx z2, z0
+; CHECK-NEXT: mul z2.b, p0/m, z2.b, z0.b
+; CHECK-NEXT: movprfx z3, z1
+; CHECK-NEXT: mul z3.b, p0/m, z3.b, z1.b
+; CHECK-NEXT: eor z0.d, z2.d, z0.d
+; CHECK-NEXT: eor z1.d, z3.d, z1.d
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
-; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT: movprfx z1, z0
-; CHECK-NEXT: mul z1.b, p0/m, z1.b, z0.b
-; CHECK-NEXT: eor z0.d, z1.d, z0.d
-; CHECK-NEXT: st1b { z0.b }, p0, [x0, #1, mul vl]
+; CHECK-NEXT: st1b { z1.b }, p0, [x0, #1, mul vl]
; CHECK-NEXT: ret
%l3 = load <vscale x 16 x i8>, ptr %l0, align 16
%l5 = mul <vscale x 16 x i8> %l3, %l3
@@ -37,15 +37,15 @@ define void @scalable_v8i16(ptr noalias nocapture nou...
[truncated]
|
@@ -3918,21 +3924,21 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale, | |||
case AArch64::LD1RSH_S_IMM: | |||
case AArch64::LD1RSH_D_IMM: | |||
Scale = TypeSize::getFixed(2); | |||
Width = 2; | |||
Width = TypeSize::getFixed(2); | |||
MinOffset = 0; | |||
MaxOffset = 63; | |||
break; | |||
case AArch64::LD1RW_IMM: | |||
case AArch64::LD1RW_D_IMM: | |||
case AArch64::LD1RSW_IMM: | |||
Scale = TypeSize::getFixed(4); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
aren't LD1RSW* SVE instructions as well? Any reasons why we don't change them to getScalable
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe they load a scalar and broadcast them to all lanes, so only load a fixed number of bytes is read from memory.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah ok yeah makes sense
lgtm |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks like a nice improvement to alias analysis! I just had a few minor comments ...
@@ -1214,8 +1214,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( | |||
SEH->eraseFromParent(); | |||
} | |||
|
|||
TypeSize Scale = TypeSize::getFixed(1); | |||
unsigned Width; | |||
TypeSize Scale = TypeSize::getFixed(1), Width(0, false); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it would be nice to use consistent notation here and have
TypeSize Scale = TypeSize::getFixed(1), Width = TypeSize::getFixed(0);
return false; | ||
// The maximum vscale is 16 undef AArch64, return the maximal extent for the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this comment be
// The maximum vscale is 16 for AArch64
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we have tests for loads and stores of predicate vectors, i.e. <vscale x 16 x i1>, <vscale x 8 x i1>, <vscale x 4 x i1>, <vscale x 2 x i1>? I'd expect aliasing information to be useful for those cases too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't seem to be able to generate offset versions of these instructions at the moment (except for stack spills/reloads), so I've made it a mir test.
The base change here is to change getMemOperandWithOffsetWidth to return a TypeSize Width, which in turn allows areMemAccessesTriviallyDisjoint to reason about trivially disjoint widths.
0ef1a94
to
25f4c23
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM! Thanks for making the changes.
The base change here is to change getMemOperandWithOffsetWidth to return a TypeSize Width, which in turn allows areMemAccessesTriviallyDisjoint to reason about trivially disjoint widths.