diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 4ac52a48b3a18..e9cacbf739ef7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -3131,6 +3131,22 @@ def ro_Windexed32 : ComplexPattern", []>; def ro_Windexed64 : ComplexPattern", []>; def ro_Windexed128 : ComplexPattern", []>; +def gi_ro_Windexed8 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Windexed16 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Windexed32 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Windexed64 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; +def gi_ro_Windexed128 : + GIComplexOperandMatcher">, + GIComplexPatternEquiv; + class MemExtendOperand : AsmOperandClass { let Name = "Mem" # Reg # "Extend" # Width; let PredicateMethod = "isMem" # Reg # "Extend<" # Width # ">"; diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index 193136fe53d8c..ad59a95de288f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -206,6 +206,14 @@ class AArch64InstructionSelector : public InstructionSelector { ComplexRendererFns selectAddrModeShiftedExtendXReg(MachineOperand &Root, unsigned SizeInBytes) const; + + /// Returns a \p ComplexRendererFns which contains a base, offset, and whether + /// or not a shift + extend should be folded into an addressing mode. Returns + /// None when this is not profitable or possible. + ComplexRendererFns + selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, + MachineOperand &Offset, unsigned SizeInBytes, + bool WantsExt) const; ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, unsigned SizeInBytes) const; @@ -214,6 +222,13 @@ class AArch64InstructionSelector : public InstructionSelector { return selectAddrModeXRO(Root, Width / 8); } + ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, + unsigned SizeInBytes) const; + template + ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { + return selectAddrModeWRO(Root, Width / 8); + } + ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const; ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { @@ -228,6 +243,15 @@ class AArch64InstructionSelector : public InstructionSelector { return selectShiftedRegister(Root); } + /// Given an extend instruction, determine the correct shift-extend type for + /// that instruction. + /// + /// If the instruction is going to be used in a load or store, pass + /// \p IsLoadStore = true. + AArch64_AM::ShiftExtendType + getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, + bool IsLoadStore = false) const; + /// Instructions that accept extend modifiers like UXTW expect the register /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a /// subregister copy if necessary. Return either ExtReg, or the result of the @@ -4234,45 +4258,15 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); } -/// This is used for computing addresses like this: -/// -/// ldr x1, [x2, x3, lsl #3] -/// -/// Where x2 is the base register, and x3 is an offset register. The shift-left -/// is a constant value specific to this load instruction. That is, we'll never -/// see anything other than a 3 here (which corresponds to the size of the -/// element being loaded.) InstructionSelector::ComplexRendererFns -AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( - MachineOperand &Root, unsigned SizeInBytes) const { - if (!Root.isReg()) - return None; - MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); +AArch64InstructionSelector::selectExtendedSHL( + MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, + unsigned SizeInBytes, bool WantsExt) const { + assert(Base.isReg() && "Expected base to be a register operand"); + assert(Offset.isReg() && "Expected offset to be a register operand"); - // Make sure that the memory op is a valid size. - int64_t LegalShiftVal = Log2_32(SizeInBytes); - if (LegalShiftVal == 0) - return None; - - // We want to find something like this: - // - // val = G_CONSTANT LegalShiftVal - // shift = G_SHL off_reg val - // ptr = G_PTR_ADD base_reg shift - // x = G_LOAD ptr - // - // And fold it into this addressing mode: - // - // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] - - // Check if we can find the G_PTR_ADD. - MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); - if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI)) - return None; - - // Now, try to match an opcode which will match our specific offset. - // We want a G_SHL or a G_MUL. - MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI); + MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); + MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg()); if (!OffsetInst) return None; @@ -4280,6 +4274,10 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) return None; + // Make sure that the memory op is a valid size. + int64_t LegalShiftVal = Log2_32(SizeInBytes); + if (LegalShiftVal == 0) + return None; if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) return None; @@ -4324,20 +4322,75 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( if (ImmVal != LegalShiftVal) return None; + unsigned SignExtend = 0; + if (WantsExt) { + // Check if the offset is defined by an extend. + MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); + auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); + if (Ext == AArch64_AM::InvalidShiftExtend) + return None; + + SignExtend = Ext == AArch64_AM::SXTW; + + // Need a 32-bit wide register here. + MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); + OffsetReg = ExtInst->getOperand(1).getReg(); + OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB); + } + // We can use the LHS of the GEP as the base, and the LHS of the shift as an // offset. Signify that we are shifting by setting the shift flag to 1. - return {{[=](MachineInstrBuilder &MIB) { - MIB.addUse(Gep->getOperand(1).getReg()); - }, + return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); }, [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, [=](MachineInstrBuilder &MIB) { // Need to add both immediates here to make sure that they are both // added to the instruction. - MIB.addImm(0); + MIB.addImm(SignExtend); MIB.addImm(1); }}}; } +/// This is used for computing addresses like this: +/// +/// ldr x1, [x2, x3, lsl #3] +/// +/// Where x2 is the base register, and x3 is an offset register. The shift-left +/// is a constant value specific to this load instruction. That is, we'll never +/// see anything other than a 3 here (which corresponds to the size of the +/// element being loaded.) +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( + MachineOperand &Root, unsigned SizeInBytes) const { + if (!Root.isReg()) + return None; + MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); + + // We want to find something like this: + // + // val = G_CONSTANT LegalShiftVal + // shift = G_SHL off_reg val + // ptr = G_PTR_ADD base_reg shift + // x = G_LOAD ptr + // + // And fold it into this addressing mode: + // + // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] + + // Check if we can find the G_PTR_ADD. + MachineInstr *PtrAdd = + getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); + if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) + return None; + + // Now, try to match an opcode which will match our specific offset. + // We want a G_SHL or a G_MUL. + MachineInstr *OffsetInst = + getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI); + return selectExtendedSHL(Root, PtrAdd->getOperand(1), + OffsetInst->getOperand(0), SizeInBytes, + /*WantsExt=*/false); +} + /// This is used for computing addresses like this: /// /// ldr x1, [x2, x3] @@ -4399,6 +4452,74 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, return selectAddrModeRegisterOffset(Root); } +/// This is used for computing addresses like this: +/// +/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] +/// +/// Where we have a 64-bit base register, a 32-bit offset register, and an +/// extend (which may or may not be signed). +InstructionSelector::ComplexRendererFns +AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, + unsigned SizeInBytes) const { + MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); + + MachineInstr *PtrAdd = + getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); + if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) + return None; + + MachineOperand &LHS = PtrAdd->getOperand(1); + MachineOperand &RHS = PtrAdd->getOperand(2); + MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI); + + // The first case is the same as selectAddrModeXRO, except we need an extend. + // In this case, we try to find a shift and extend, and fold them into the + // addressing mode. + // + // E.g. + // + // off_reg = G_Z/S/ANYEXT ext_reg + // val = G_CONSTANT LegalShiftVal + // shift = G_SHL off_reg val + // ptr = G_PTR_ADD base_reg shift + // x = G_LOAD ptr + // + // In this case we can get a load like this: + // + // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] + auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0), + SizeInBytes, /*WantsExt=*/true); + if (ExtendedShl) + return ExtendedShl; + + // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. + // + // e.g. + // ldr something, [base_reg, ext_reg, sxtw] + if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) + return None; + + // Check if this is an extend. We'll get an extend type if it is. + AArch64_AM::ShiftExtendType Ext = + getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true); + if (Ext == AArch64_AM::InvalidShiftExtend) + return None; + + // Need a 32-bit wide register. + MachineIRBuilder MIB(*PtrAdd); + Register ExtReg = + narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB); + unsigned SignExtend = Ext == AArch64_AM::SXTW; + + // Base is LHS, offset is ExtReg. + return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); }, + [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, + [=](MachineInstrBuilder &MIB) { + MIB.addImm(SignExtend); + MIB.addImm(0); + }}}; +} + /// Select a "register plus unscaled signed 9-bit immediate" address. This /// should only match when there is an offset that is not valid for a scaled /// immediate addressing mode. The "Size" argument is the size in bytes of the @@ -4561,9 +4682,8 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const { [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; } -/// Get the correct ShiftExtendType for an extend instruction. -static AArch64_AM::ShiftExtendType -getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) { +AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( + MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { unsigned Opc = MI.getOpcode(); // Handle explicit extend instructions first. @@ -4610,9 +4730,9 @@ getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) { default: return AArch64_AM::InvalidShiftExtend; case 0xFF: - return AArch64_AM::UXTB; + return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; case 0xFFFF: - return AArch64_AM::UXTH; + return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; case 0xFFFFFFFF: return AArch64_AM::UXTW; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir new file mode 100644 index 0000000000000..e7c95fbe1063d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir @@ -0,0 +1,431 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +... +--- +name: shl_gep_sext_ldrwrow +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $x0 + + ; We should be able to fold a shift + extend into the pattern. + ; In this case, we should get a roW load with two 1s, representing a shift + ; plus sign extend. + + ; CHECK-LABEL: name: shl_gep_sext_ldrwrow + ; CHECK: liveins: $w1, $x0 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %foo:gpr32 = COPY $w1 + ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 1, 1 :: (load 4) + ; CHECK: $w0 = COPY %load + ; CHECK: RET_ReallyLR implicit $w0 + %base:gpr(p0) = COPY $x0 + %foo:gpr(s32) = COPY $w1 + %ext:gpr(s64) = G_SEXT %foo(s32) + %c:gpr(s64) = G_CONSTANT i64 2 + %offset:gpr(s64) = G_SHL %ext, %c + %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + $w0 = COPY %load(s32) + RET_ReallyLR implicit $w0 +... +--- +name: shl_gep_zext_ldrwrow +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $x0 + + ; We should be able to fold a shift + extend into the pattern. + ; In this case, we should get a roW load with a 0 representing a zero-extend + ; and a 1 representing a shift. + + ; CHECK-LABEL: name: shl_gep_zext_ldrwrow + ; CHECK: liveins: $w1, $x0 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %foo:gpr32 = COPY $w1 + ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4) + ; CHECK: $w0 = COPY %load + ; CHECK: RET_ReallyLR implicit $w0 + %base:gpr(p0) = COPY $x0 + %foo:gpr(s32) = COPY $w1 + %ext:gpr(s64) = G_ZEXT %foo(s32) + %c:gpr(s64) = G_CONSTANT i64 2 + %offset:gpr(s64) = G_SHL %ext, %c + %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + $w0 = COPY %load(s32) + RET_ReallyLR implicit $w0 +... +--- +name: shl_gep_anyext_ldrwrow +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $x0 + + ; We should be able to fold a shift + extend into the pattern. + ; In this case, we should get a roW load with a 0 representing a zero-extend + ; and a 1 representing a shift. + + ; CHECK-LABEL: name: shl_gep_anyext_ldrwrow + ; CHECK: liveins: $w1, $x0 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %foo:gpr32 = COPY $w1 + ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4) + ; CHECK: $w0 = COPY %load + ; CHECK: RET_ReallyLR implicit $w0 + %base:gpr(p0) = COPY $x0 + %foo:gpr(s32) = COPY $w1 + %ext:gpr(s64) = G_ANYEXT %foo(s32) + %c:gpr(s64) = G_CONSTANT i64 2 + %offset:gpr(s64) = G_SHL %ext, %c + %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + $w0 = COPY %load(s32) + RET_ReallyLR implicit $w0 +... +--- +name: mul_gep_sext_ldrwrow +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + + ; We should be able to do the same with multiplies as with shifts. + + liveins: $w1, $x0 + ; CHECK-LABEL: name: mul_gep_sext_ldrwrow + ; CHECK: liveins: $w1, $x0 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %foo:gpr32 = COPY $w1 + ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 1, 1 :: (load 4) + ; CHECK: $w0 = COPY %load + ; CHECK: RET_ReallyLR implicit $w0 + %base:gpr(p0) = COPY $x0 + %foo:gpr(s32) = COPY $w1 + %ext:gpr(s64) = G_SEXT %foo(s32) + %c:gpr(s64) = G_CONSTANT i64 4 + %offset:gpr(s64) = G_MUL %c, %ext + %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + $w0 = COPY %load(s32) + RET_ReallyLR implicit $w0 +... +--- +name: mul_gep_zext_ldrwrow +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $x0 + + ; We should be able to do the same with multiplies as with shifts. + + ; CHECK-LABEL: name: mul_gep_zext_ldrwrow + ; CHECK: liveins: $w1, $x0 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %foo:gpr32 = COPY $w1 + ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4) + ; CHECK: $w0 = COPY %load + ; CHECK: RET_ReallyLR implicit $w0 + %base:gpr(p0) = COPY $x0 + %foo:gpr(s32) = COPY $w1 + %ext:gpr(s64) = G_ZEXT %foo(s32) + %c:gpr(s64) = G_CONSTANT i64 4 + %offset:gpr(s64) = G_MUL %c, %ext + %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + $w0 = COPY %load(s32) + RET_ReallyLR implicit $w0 +... +--- +name: mul_gep_anyext_ldrwrow +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $x0 + + ; We should be able to do the same with multiplies as with shifts. + + ; CHECK-LABEL: name: mul_gep_anyext_ldrwrow + ; CHECK: liveins: $w1, $x0 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %foo:gpr32 = COPY $w1 + ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4) + ; CHECK: $w0 = COPY %load + ; CHECK: RET_ReallyLR implicit $w0 + %base:gpr(p0) = COPY $x0 + %foo:gpr(s32) = COPY $w1 + %ext:gpr(s64) = G_ANYEXT %foo(s32) + %c:gpr(s64) = G_CONSTANT i64 4 + %offset:gpr(s64) = G_MUL %c, %ext + %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4) + $w0 = COPY %load(s32) + RET_ReallyLR implicit $w0 +... +--- +name: ldrdrow +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $x0, $d0 + + ; Verify that we can select LDRDroW. + + ; CHECK-LABEL: name: ldrdrow + ; CHECK: liveins: $w1, $x0, $d0 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %foo:gpr32 = COPY $w1 + ; CHECK: %load:fpr64 = LDRDroW %base, %foo, 1, 1 :: (load 8) + ; CHECK: $x0 = COPY %load + ; CHECK: RET_ReallyLR implicit $x0 + %base:gpr(p0) = COPY $x0 + %foo:gpr(s32) = COPY $w1 + %ext:gpr(s64) = G_SEXT %foo(s32) + %c:gpr(s64) = G_CONSTANT i64 8 + %offset:gpr(s64) = G_MUL %c, %ext + %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) + %load:fpr(<2 x s32>) = G_LOAD %ptr(p0) :: (load 8) + $x0 = COPY %load(<2 x s32>) + RET_ReallyLR implicit $x0 +... +--- +name: ldrxrow +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $w1, $x0, $d0 + + ; Verify that we can select LDRXroW. + + ; CHECK-LABEL: name: ldrxrow + ; CHECK: liveins: $w1, $x0, $d0 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %foo:gpr32 = COPY $w1 + ; CHECK: %load:gpr64 = LDRXroW %base, %foo, 1, 1 :: (load 8) + ; CHECK: $x0 = COPY %load + ; CHECK: RET_ReallyLR implicit $x0 + %base:gpr(p0) = COPY $x0 + %foo:gpr(s32) = COPY $w1 + %ext:gpr(s64) = G_SEXT %foo(s32) + %c:gpr(s64) = G_CONSTANT i64 8 + %offset:gpr(s64) = G_MUL %c, %ext + %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) + %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8) + $x0 = COPY %load(s64) + RET_ReallyLR implicit $x0 +... +--- +name: ldrbbrow +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $w0, $w1 + + ; Verify that we can select LDRBBroW. Note that there is no shift here, + ; but we still fold the extend into the addressing mode. + + ; CHECK-LABEL: name: ldrbbrow + ; CHECK: liveins: $x0, $w0, $w1 + ; CHECK: %val:gpr32 = COPY $w1 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %load:gpr32 = LDRBBroW %base, %val, 1, 0 :: (load 1) + ; CHECK: $w0 = COPY %load + ; CHECK: RET_ReallyLR implicit $w0 + %val:gpr(s32) = COPY $w1 + %base:gpr(p0) = COPY $x0 + %ext:gpr(s64) = G_SEXT %val(s32) + %ptr:gpr(p0) = G_PTR_ADD %base, %ext(s64) + %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 1) + $w0 = COPY %load(s32) + RET_ReallyLR implicit $w0 +... +--- +name: ldrhrow +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $w1, $x0 + + ; Verify that we can select ldrhrow. + + ; CHECK-LABEL: name: ldrhrow + ; CHECK: liveins: $w1, $x0 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %foo:gpr32 = COPY $w1 + ; CHECK: %load:fpr16 = LDRHroW %base, %foo, 1, 1 :: (load 2) + ; CHECK: $h0 = COPY %load + ; CHECK: RET_ReallyLR implicit $h0 + %base:gpr(p0) = COPY $x0 + %foo:gpr(s32) = COPY $w1 + %ext:gpr(s64) = G_SEXT %foo(s32) + %c:gpr(s64) = G_CONSTANT i64 2 + %offset:gpr(s64) = G_MUL %c, %ext + %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) + %load:fpr(s16) = G_LOAD %ptr(p0) :: (load 2) + $h0 = COPY %load(s16) + RET_ReallyLR implicit $h0 +... +--- +name: bad_and_mask_1 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +body: | + bb.0: + liveins: $x0 + + ; We should get a roX load here, not a roW load. We can't use the mask in + ; this test for an extend. + + ; CHECK-LABEL: name: bad_and_mask_1 + ; CHECK: liveins: $x0 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %imp:gpr64 = IMPLICIT_DEF + ; CHECK: %and:gpr64common = ANDXri %imp, 4103 + ; CHECK: %load:gpr64 = LDRXroX %base, %and, 0, 1 :: (load 8) + ; CHECK: $x1 = COPY %load + ; CHECK: RET_ReallyLR implicit $x1 + %base:gpr(p0) = COPY $x0 + %imp:gpr(s64) = G_IMPLICIT_DEF + %bad_mask:gpr(s64) = G_CONSTANT i64 255 + %and:gpr(s64) = G_AND %imp, %bad_mask + %c:gpr(s64) = G_CONSTANT i64 8 + %mul:gpr(s64) = G_MUL %c, %and + %ptr:gpr(p0) = G_PTR_ADD %base, %mul(s64) + %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8) + $x1 = COPY %load(s64) + RET_ReallyLR implicit $x1 +... +--- +name: bad_and_mask_2 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +body: | + bb.0: + liveins: $x0 + + ; We should get a roX load here, not a roW load. We can't use the mask in + ; this test for an extend. + + ; CHECK-LABEL: name: bad_and_mask_2 + ; CHECK: liveins: $x0 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %imp:gpr64 = IMPLICIT_DEF + ; CHECK: %and:gpr64common = ANDXri %imp, 4111 + ; CHECK: %load:gpr64 = LDRXroX %base, %and, 0, 1 :: (load 8) + ; CHECK: $x1 = COPY %load + ; CHECK: RET_ReallyLR implicit $x1 + %base:gpr(p0) = COPY $x0 + %imp:gpr(s64) = G_IMPLICIT_DEF + %bad_mask:gpr(s64) = G_CONSTANT i64 65535 + %and:gpr(s64) = G_AND %imp, %bad_mask + %c:gpr(s64) = G_CONSTANT i64 8 + %mul:gpr(s64) = G_MUL %c, %and + %ptr:gpr(p0) = G_PTR_ADD %base, %mul(s64) + %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8) + $x1 = COPY %load(s64) + RET_ReallyLR implicit $x1 +... +--- +name: and_uxtw +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +body: | + bb.0: + liveins: $x0 + + ; The mask used for the AND here is legal for producing a roW load. + + ; CHECK-LABEL: name: and_uxtw + ; CHECK: liveins: $x0 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %imp:gpr64 = IMPLICIT_DEF + ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %imp + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32 + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]] + ; CHECK: %load:gpr64 = LDRXroW %base, [[COPY2]], 0, 1 :: (load 8) + ; CHECK: $x1 = COPY %load + ; CHECK: RET_ReallyLR implicit $x1 + %base:gpr(p0) = COPY $x0 + %imp:gpr(s64) = G_IMPLICIT_DEF + %mask:gpr(s64) = G_CONSTANT i64 4294967295 + %and:gpr(s64) = G_AND %imp, %mask + %c:gpr(s64) = G_CONSTANT i64 8 + %mul:gpr(s64) = G_MUL %c, %and + %ptr:gpr(p0) = G_PTR_ADD %base, %mul(s64) + %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8) + $x1 = COPY %load(s64) + RET_ReallyLR implicit $x1 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir new file mode 100644 index 0000000000000..41fcb6204726e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir @@ -0,0 +1,52 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s +--- +name: strwrow +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $w2 + ; CHECK-LABEL: name: strwrow + ; CHECK: liveins: $x0, $x1, $w2 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %foo:gpr32 = COPY $w1 + ; CHECK: %dst:gpr32 = COPY $w2 + ; CHECK: STRWroW %dst, %base, %foo, 1, 1 :: (store 4) + %base:gpr(p0) = COPY $x0 + %foo:gpr(s32) = COPY $w1 + %ext:gpr(s64) = G_SEXT %foo(s32) + %c:gpr(s64) = G_CONSTANT i64 2 + %offset:gpr(s64) = G_SHL %ext, %c + %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) + %dst:gpr(s32) = COPY $w2 + G_STORE %dst, %ptr :: (store 4) +... +--- +name: strxrow +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: strxrow + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %base:gpr64sp = COPY $x0 + ; CHECK: %foo:gpr32 = COPY $w1 + ; CHECK: %dst:gpr64 = COPY $x2 + ; CHECK: STRXroW %dst, %base, %foo, 1, 1 :: (store 8) + %base:gpr(p0) = COPY $x0 + %foo:gpr(s32) = COPY $w1 + %ext:gpr(s64) = G_SEXT %foo(s32) + %c:gpr(s64) = G_CONSTANT i64 3 + %offset:gpr(s64) = G_SHL %ext, %c + %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64) + %dst:gpr(s64) = COPY $x2 + G_STORE %dst, %ptr :: (store 8) +...