diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index a7e6d37419a848..4fdfabbfb16125 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -110,7 +110,7 @@ struct LegalityQuery { ArrayRef Types; struct MemDesc { - uint64_t SizeInBits; + LLT MemoryTy; uint64_t AlignInBits; AtomicOrdering Ordering; }; @@ -196,13 +196,12 @@ namespace LegalityPredicates { struct TypePairAndMemDesc { LLT Type0; LLT Type1; - uint64_t MemSize; + LLT MemTy; uint64_t Align; bool operator==(const TypePairAndMemDesc &Other) const { return Type0 == Other.Type0 && Type1 == Other.Type1 && - Align == Other.Align && - MemSize == Other.MemSize; + Align == Other.Align && MemTy == Other.MemTy; } /// \returns true if this memory access is legal with for the access described @@ -210,7 +209,9 @@ struct TypePairAndMemDesc { bool isCompatible(const TypePairAndMemDesc &Other) const { return Type0 == Other.Type0 && Type1 == Other.Type1 && Align >= Other.Align && - MemSize == Other.MemSize; + // FIXME: This perhaps should be stricter, but the current legality + // rules are written only considering the size. + MemTy.getSizeInBits() == Other.MemTy.getSizeInBits(); } }; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 89b127c3df5f05..bb02c101fd048b 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -489,7 +489,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI, // Check for legality. if (LI) { LegalityQuery::MemDesc MMDesc; - MMDesc.SizeInBits = MMO.getSizeInBits(); + MMDesc.MemoryTy = MMO.getMemoryType(); MMDesc.AlignInBits = MMO.getAlign().value() * 8; MMDesc.Ordering = MMO.getSuccessOrdering(); LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg()); @@ -3709,7 +3709,7 @@ bool CombinerHelper::matchLoadOrCombine( Register Ptr = LowestIdxLoad->getOperand(1).getReg(); const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin(); LegalityQuery::MemDesc MMDesc; - MMDesc.SizeInBits = WideMemSizeInBits; + MMDesc.MemoryTy = Ty; MMDesc.AlignInBits = MMO.getAlign().value() * 8; MMDesc.Ordering = MMO.getSuccessOrdering(); if (!isLegalOrBeforeLegalizer( diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp index 1993f6033291ed..7c5e4e52ca3e71 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp @@ -55,7 +55,7 @@ LegalityPredicate LegalityPredicates::typePairAndMemDescInSet( SmallVector TypesAndMemDesc = TypesAndMemDescInit; return [=](const LegalityQuery &Query) { TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1], - Query.MMODescrs[MMOIdx].SizeInBits, + Query.MMODescrs[MMOIdx].MemoryTy, Query.MMODescrs[MMOIdx].AlignInBits}; return llvm::any_of(TypesAndMemDesc, [=](const TypePairAndMemDesc &Entry) -> bool { @@ -176,7 +176,7 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0, LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) { return [=](const LegalityQuery &Query) { - return !isPowerOf2_32(Query.MMODescrs[MMOIdx].SizeInBits / 8); + return !isPowerOf2_32(Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes()); }; } diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index c757cb65947e64..3e3141657e87a0 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -88,7 +88,7 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const { OS << Opcode << ", MMOs={"; for (const auto &MMODescr : MMODescrs) { - OS << MMODescr.SizeInBits << ", "; + OS << MMODescr.MemoryTy << ", "; } OS << "}"; @@ -352,8 +352,7 @@ LegalizerInfo::getAction(const MachineInstr &MI, SmallVector MemDescrs; for (const auto &MMO : MI.memoperands()) - MemDescrs.push_back({8 * MMO->getSize() /* in bits */, - 8 * MMO->getAlign().value(), + MemDescrs.push_back({MMO->getMemoryType(), 8 * MMO->getAlign().value(), MMO->getSuccessOrdering()}); return getAction({MI.getOpcode(), Types, MemDescrs}); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 80f7cdded3e895..e2532b23e81a2b 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -252,15 +252,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) .lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered)) - .legalForTypesWithMemDesc({{s32, p0, 8, 8}, - {s32, p0, 16, 8}, - {s32, p0, 32, 8}, - {s64, p0, 8, 2}, - {s64, p0, 16, 2}, - {s64, p0, 32, 4}, - {s64, p0, 64, 8}, - {p0, p0, 64, 8}, - {v2s32, p0, 64, 8}}) + .legalForTypesWithMemDesc({{s32, p0, s8, 8}, + {s32, p0, s16, 8}, + {s32, p0, s32, 8}, + {s64, p0, s8, 2}, + {s64, p0, s16, 2}, + {s64, p0, s32, 4}, + {s64, p0, s64, 8}, + {p0, p0, s64, 8}, + {v2s32, p0, s64, 8}}) .clampScalar(0, s32, s64) .widenScalarToNextPow2(0) // TODO: We could support sum-of-pow2's but the lowering code doesn't know @@ -278,34 +278,34 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) }; getActionDefinitionsBuilder(G_LOAD) - .legalForTypesWithMemDesc({{s8, p0, 8, 8}, - {s16, p0, 16, 8}, - {s32, p0, 32, 8}, - {s64, p0, 64, 8}, - {p0, p0, 64, 8}, - {s128, p0, 128, 8}, - {v8s8, p0, 64, 8}, - {v16s8, p0, 128, 8}, - {v4s16, p0, 64, 8}, - {v8s16, p0, 128, 8}, - {v2s32, p0, 64, 8}, - {v4s32, p0, 128, 8}, - {v2s64, p0, 128, 8}}) + .legalForTypesWithMemDesc({{s8, p0, s8, 8}, + {s16, p0, s16, 8}, + {s32, p0, s32, 8}, + {s64, p0, s64, 8}, + {p0, p0, s64, 8}, + {s128, p0, s128, 8}, + {v8s8, p0, s64, 8}, + {v16s8, p0, s128, 8}, + {v4s16, p0, s64, 8}, + {v8s16, p0, s128, 8}, + {v2s32, p0, s64, 8}, + {v4s32, p0, s128, 8}, + {v2s64, p0, s128, 8}}) // These extends are also legal - .legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}}) + .legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}}) .clampScalar(0, s8, s64) .lowerIfMemSizeNotPow2() .widenScalarToNextPow2(0) .narrowScalarIf([=](const LegalityQuery &Query) { // Clamp extending load results to 32-bits. return Query.Types[0].isScalar() && - Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits && - Query.Types[0].getSizeInBits() > 32; + Query.Types[0] != Query.MMODescrs[0].MemoryTy && + Query.Types[0].getSizeInBits() > 32; }, changeTo(0, s32)) // Lower any any-extending loads left into G_ANYEXT and G_LOAD .lowerIf([=](const LegalityQuery &Query) { - return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; + return Query.Types[0] != Query.MMODescrs[0].MemoryTy; }) .clampMaxNumElements(0, s8, 16) .clampMaxNumElements(0, s16, 8) @@ -314,31 +314,31 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .customIf(IsPtrVecPred); getActionDefinitionsBuilder(G_STORE) - .legalForTypesWithMemDesc({{s8, p0, 8, 8}, - {s16, p0, 8, 8}, // truncstorei8 from s16 - {s32, p0, 8, 8}, // truncstorei8 from s32 - {s64, p0, 8, 8}, // truncstorei8 from s64 - {s16, p0, 16, 8}, - {s32, p0, 16, 8}, // truncstorei16 from s32 - {s64, p0, 16, 8}, // truncstorei16 from s64 - {s32, p0, 8, 8}, - {s32, p0, 16, 8}, - {s32, p0, 32, 8}, - {s64, p0, 64, 8}, - {s64, p0, 32, 8}, // truncstorei32 from s64 - {p0, p0, 64, 8}, - {s128, p0, 128, 8}, - {v16s8, p0, 128, 8}, - {v8s8, p0, 64, 8}, - {v4s16, p0, 64, 8}, - {v8s16, p0, 128, 8}, - {v2s32, p0, 64, 8}, - {v4s32, p0, 128, 8}, - {v2s64, p0, 128, 8}}) + .legalForTypesWithMemDesc({{s8, p0, s8, 8}, + {s16, p0, s8, 8}, // truncstorei8 from s16 + {s32, p0, s8, 8}, // truncstorei8 from s32 + {s64, p0, s8, 8}, // truncstorei8 from s64 + {s16, p0, s16, 8}, + {s32, p0, s16, 8}, // truncstorei16 from s32 + {s64, p0, s16, 8}, // truncstorei16 from s64 + {s32, p0, s8, 8}, + {s32, p0, s16, 8}, + {s32, p0, s32, 8}, + {s64, p0, s64, 8}, + {s64, p0, s32, 8}, // truncstorei32 from s64 + {p0, p0, s64, 8}, + {s128, p0, s128, 8}, + {v16s8, p0, s128, 8}, + {v8s8, p0, s64, 8}, + {v4s16, p0, s64, 8}, + {v8s16, p0, s128, 8}, + {v2s32, p0, s64, 8}, + {v4s32, p0, s128, 8}, + {v2s64, p0, s128, 8}}) .clampScalar(0, s8, s64) .lowerIf([=](const LegalityQuery &Query) { return Query.Types[0].isScalar() && - Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits; + Query.Types[0] != Query.MMODescrs[0].MemoryTy; }) // Maximum: sN * k = 128 .clampMaxNumElements(0, s8, 16) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 5bbb5f3f77d540..31a7c88442f0b2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -232,7 +232,7 @@ static LegalityPredicate isWideScalarExtLoadTruncStore(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; return !Ty.isVector() && Ty.getSizeInBits() > 32 && - Query.MMODescrs[0].SizeInBits < Ty.getSizeInBits(); + Query.MMODescrs[0].MemoryTy.getSizeInBits() < Ty.getSizeInBits(); }; } @@ -272,7 +272,7 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST, const bool IsLoad = Query.Opcode != AMDGPU::G_STORE; unsigned RegSize = Ty.getSizeInBits(); - unsigned MemSize = Query.MMODescrs[0].SizeInBits; + unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); unsigned AlignBits = Query.MMODescrs[0].AlignInBits; unsigned AS = Query.Types[1].getAddressSpace(); @@ -361,23 +361,28 @@ static bool isLoadStoreLegal(const GCNSubtarget &ST, const LegalityQuery &Query) /// Return true if a load or store of the type should be lowered with a bitcast /// to a different type. static bool shouldBitcastLoadStoreType(const GCNSubtarget &ST, const LLT Ty, - const unsigned MemSizeInBits) { + const LLT MemTy) { + const unsigned MemSizeInBits = MemTy.getSizeInBits(); const unsigned Size = Ty.getSizeInBits(); if (Size != MemSizeInBits) return Size <= 32 && Ty.isVector(); if (loadStoreBitcastWorkaround(Ty) && isRegisterType(Ty)) return true; - return Ty.isVector() && (Size <= 32 || isRegisterSize(Size)) && + + // Don't try to handle bitcasting vector ext loads for now. + return Ty.isVector() && (!MemTy.isVector() || MemTy == Ty) && + (Size <= 32 || isRegisterSize(Size)) && !isRegisterVectorElementType(Ty.getElementType()); } /// Return true if we should legalize a load by widening an odd sized memory /// access up to the alignment. Note this case when the memory access itself /// changes, not the size of the result register. -static bool shouldWidenLoad(const GCNSubtarget &ST, unsigned SizeInBits, +static bool shouldWidenLoad(const GCNSubtarget &ST, LLT MemoryTy, unsigned AlignInBits, unsigned AddrSpace, unsigned Opcode) { + unsigned SizeInBits = MemoryTy.getSizeInBits(); // We don't want to widen cases that are naturally legal. if (isPowerOf2_32(SizeInBits)) return false; @@ -413,7 +418,7 @@ static bool shouldWidenLoad(const GCNSubtarget &ST, const LegalityQuery &Query, if (Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic) return false; - return shouldWidenLoad(ST, Query.MMODescrs[0].SizeInBits, + return shouldWidenLoad(ST, Query.MMODescrs[0].MemoryTy, Query.MMODescrs[0].AlignInBits, Query.Types[1].getAddressSpace(), Opcode); } @@ -1044,7 +1049,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT DstTy = Query.Types[0]; // Split vector extloads. - unsigned MemSize = Query.MMODescrs[0].SizeInBits; + unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); unsigned AlignBits = Query.MMODescrs[0].AlignInBits; if (MemSize < DstTy.getSizeInBits()) @@ -1093,32 +1098,32 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, auto &Actions = getActionDefinitionsBuilder(Op); // Explicitly list some common cases. // TODO: Does this help compile time at all? - Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, 32, GlobalAlign32}, - {V2S32, GlobalPtr, 64, GlobalAlign32}, - {V4S32, GlobalPtr, 128, GlobalAlign32}, - {S64, GlobalPtr, 64, GlobalAlign32}, - {V2S64, GlobalPtr, 128, GlobalAlign32}, - {V2S16, GlobalPtr, 32, GlobalAlign32}, - {S32, GlobalPtr, 8, GlobalAlign8}, - {S32, GlobalPtr, 16, GlobalAlign16}, - - {S32, LocalPtr, 32, 32}, - {S64, LocalPtr, 64, 32}, - {V2S32, LocalPtr, 64, 32}, - {S32, LocalPtr, 8, 8}, - {S32, LocalPtr, 16, 16}, - {V2S16, LocalPtr, 32, 32}, - - {S32, PrivatePtr, 32, 32}, - {S32, PrivatePtr, 8, 8}, - {S32, PrivatePtr, 16, 16}, - {V2S16, PrivatePtr, 32, 32}, - - {S32, ConstantPtr, 32, GlobalAlign32}, - {V2S32, ConstantPtr, 64, GlobalAlign32}, - {V4S32, ConstantPtr, 128, GlobalAlign32}, - {S64, ConstantPtr, 64, GlobalAlign32}, - {V2S32, ConstantPtr, 32, GlobalAlign32}}); + Actions.legalForTypesWithMemDesc({{S32, GlobalPtr, S32, GlobalAlign32}, + {V2S32, GlobalPtr, V2S32, GlobalAlign32}, + {V4S32, GlobalPtr, V4S32, GlobalAlign32}, + {S64, GlobalPtr, S64, GlobalAlign32}, + {V2S64, GlobalPtr, V2S64, GlobalAlign32}, + {V2S16, GlobalPtr, V2S16, GlobalAlign32}, + {S32, GlobalPtr, S8, GlobalAlign8}, + {S32, GlobalPtr, S16, GlobalAlign16}, + + {S32, LocalPtr, S32, 32}, + {S64, LocalPtr, S64, 32}, + {V2S32, LocalPtr, V2S32, 32}, + {S32, LocalPtr, S8, 8}, + {S32, LocalPtr, S16, 16}, + {V2S16, LocalPtr, S32, 32}, + + {S32, PrivatePtr, S32, 32}, + {S32, PrivatePtr, S8, 8}, + {S32, PrivatePtr, S16, 16}, + {V2S16, PrivatePtr, S32, 32}, + + {S32, ConstantPtr, S32, GlobalAlign32}, + {V2S32, ConstantPtr, V2S32, GlobalAlign32}, + {V4S32, ConstantPtr, V4S32, GlobalAlign32}, + {S64, ConstantPtr, S64, GlobalAlign32}, + {V2S32, ConstantPtr, V2S32, GlobalAlign32}}); Actions.legalIf( [=](const LegalityQuery &Query) -> bool { return isLoadStoreLegal(ST, Query); @@ -1140,7 +1145,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, Actions.bitcastIf( [=](const LegalityQuery &Query) -> bool { return shouldBitcastLoadStoreType(ST, Query.Types[0], - Query.MMODescrs[0].SizeInBits); + Query.MMODescrs[0].MemoryTy); }, bitcastToRegisterType(0)); if (!IsStore) { @@ -1163,7 +1168,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT PtrTy = Query.Types[1]; const unsigned DstSize = DstTy.getSizeInBits(); - unsigned MemSize = Query.MMODescrs[0].SizeInBits; + unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); // Split extloads. if (DstSize > MemSize) @@ -1211,7 +1216,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // FIXME: 3 element stores scalarized on SI // Split if it's too large for the address space. - if (Query.MMODescrs[0].SizeInBits > MaxSize) { + unsigned MemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); + if (MemSize > MaxSize) { unsigned NumElts = DstTy.getNumElements(); unsigned EltSize = EltTy.getSizeInBits(); @@ -1221,7 +1227,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, ElementCount::getFixed(MaxSize / EltSize), EltTy)); } - unsigned NumPieces = Query.MMODescrs[0].SizeInBits / MaxSize; + unsigned NumPieces = MemSize / MaxSize; // FIXME: Refine when odd breakdowns handled // The scalars will need to be re-legalized. @@ -1234,7 +1240,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, } // FIXME: We could probably handle weird extending loads better. - unsigned MemSize = Query.MMODescrs[0].SizeInBits; if (DstTy.getSizeInBits() > MemSize) return std::make_pair(0, EltTy); @@ -1270,14 +1275,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, } auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) - .legalForTypesWithMemDesc({{S32, GlobalPtr, 8, 8}, - {S32, GlobalPtr, 16, 2 * 8}, - {S32, LocalPtr, 8, 8}, - {S32, LocalPtr, 16, 16}, - {S32, PrivatePtr, 8, 8}, - {S32, PrivatePtr, 16, 16}, - {S32, ConstantPtr, 8, 8}, - {S32, ConstantPtr, 16, 2 * 8}}) + .legalForTypesWithMemDesc({{S32, GlobalPtr, S8, 8}, + {S32, GlobalPtr, S16, 2 * 8}, + {S32, LocalPtr, S8, 8}, + {S32, LocalPtr, S16, 16}, + {S32, PrivatePtr, S8, 8}, + {S32, PrivatePtr, S16, 16}, + {S32, ConstantPtr, S8, 8}, + {S32, ConstantPtr, S16, 2 * 8}}) .legalIf( [=](const LegalityQuery &Query) -> bool { return isLoadStoreLegal(ST, Query); @@ -1285,7 +1290,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.hasFlatAddressSpace()) { ExtLoads.legalForTypesWithMemDesc( - {{S32, FlatPtr, 8, 8}, {S32, FlatPtr, 16, 16}}); + {{S32, FlatPtr, S8, 8}, {S32, FlatPtr, S16, 16}}); } // Constant 32-bit is handled by addrspacecasting the 32-bit pointer to @@ -2484,12 +2489,13 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper, MachineMemOperand *MMO = *MI.memoperands_begin(); const unsigned ValSize = ValTy.getSizeInBits(); - const unsigned MemSize = 8 * MMO->getSize(); + const LLT MemTy = MMO->getMemoryType(); const Align MemAlign = MMO->getAlign(); + const unsigned MemSize = MemTy.getSizeInBits(); const unsigned AlignInBits = 8 * MemAlign.value(); // Widen non-power-of-2 loads to the alignment if needed - if (shouldWidenLoad(ST, MemSize, AlignInBits, AddrSpace, MI.getOpcode())) { + if (shouldWidenLoad(ST, MemTy, AlignInBits, AddrSpace, MI.getOpcode())) { const unsigned WideMemSize = PowerOf2Ceil(MemSize); // This was already the correct extending load result type, so just adjust @@ -4611,7 +4617,7 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad( Observer.changingInstr(MI); - if (shouldBitcastLoadStoreType(ST, Ty, Size)) { + if (shouldBitcastLoadStoreType(ST, Ty, LLT::scalar(Size))) { Ty = getBitcastRegisterType(Ty); Helper.bitcastDst(MI, Ty, 0); Dst = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp index 09a94cc3a8e808..8be4e3f160e309 100644 --- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp @@ -1096,24 +1096,6 @@ bool ARMInstructionSelector::select(MachineInstr &I) { if (NewOpc == G_LOAD || NewOpc == G_STORE) return false; - if (ValSize == 1 && NewOpc == Opcodes.STORE8) { - // Before storing a 1-bit value, make sure to clear out any unneeded bits. - Register OriginalValue = I.getOperand(0).getReg(); - - Register ValueToStore = MRI.createVirtualRegister(&ARM::GPRRegClass); - I.getOperand(0).setReg(ValueToStore); - - auto InsertBefore = I.getIterator(); - auto AndI = BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(Opcodes.AND)) - .addDef(ValueToStore) - .addUse(OriginalValue) - .addImm(1) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); - if (!constrainSelectedInstRegOperands(*AndI, TII, TRI, RBI)) - return false; - } - I.setDesc(TII.get(NewOpc)); if (NewOpc == ARM::LDRH || NewOpc == ARM::STRH) diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp index 9db001cf153e13..0d2c28a9ce913c 100644 --- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -149,11 +149,10 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { // We're keeping these builders around because we'll want to add support for // floating point to them. auto &LoadStoreBuilder = getActionDefinitionsBuilder({G_LOAD, G_STORE}) - .legalForTypesWithMemDesc({{s1, p0, 8, 8}, - {s8, p0, 8, 8}, - {s16, p0, 16, 8}, - {s32, p0, 32, 8}, - {p0, p0, 32, 8}}) + .legalForTypesWithMemDesc({{s8, p0, s8, 8}, + {s16, p0, s16, 8}, + {s32, p0, s32, 8}, + {p0, p0, p0, 8}}) .unsupportedIfMemSizeNotPow2(); getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0}); @@ -176,7 +175,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { .legalFor({s32, s64}); LoadStoreBuilder - .legalForTypesWithMemDesc({{s64, p0, 64, 32}}) + .legalForTypesWithMemDesc({{s64, p0, s64, 32}}) .maxScalar(0, s32); PhiBuilder.legalFor({s64}); @@ -221,6 +220,9 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { .libcallForCartesianProduct({s32, s64}, {s32}); } + // Just expand whatever loads and stores are left. + LoadStoreBuilder.lower(); + if (!ST.useSoftFloat() && ST.hasVFP4Base()) getActionDefinitionsBuilder(G_FMA).legalFor({s32, s64}); else diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp index b370eb36477870..a3970781ccecee 100644 --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -37,7 +37,7 @@ static bool isUnalignedMemmoryAccess(uint64_t MemSize, uint64_t AlignInBits) { static bool CheckTy0Ty1MemSizeAlign(const LegalityQuery &Query, std::initializer_list SupportedValues) { - unsigned QueryMemSize = Query.MMODescrs[0].SizeInBits; + unsigned QueryMemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); // Non power of two memory access is never legal. if (!isPowerOf2_64(QueryMemSize)) @@ -67,6 +67,8 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { using namespace TargetOpcode; const LLT s1 = LLT::scalar(1); + const LLT s8 = LLT::scalar(8); + const LLT s16 = LLT::scalar(16); const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); const LLT v16s8 = LLT::fixed_vector(16, 8); @@ -125,13 +127,13 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { return false; unsigned Size = Query.Types[0].getSizeInBits(); - unsigned QueryMemSize = Query.MMODescrs[0].SizeInBits; + unsigned QueryMemSize = Query.MMODescrs[0].MemoryTy.getSizeInBits(); assert(QueryMemSize <= Size && "Scalar can't hold MemSize"); if (Size > 64 || QueryMemSize > 64) return false; - if (!isPowerOf2_64(Query.MMODescrs[0].SizeInBits)) + if (!isPowerOf2_64(Query.MMODescrs[0].MemoryTy.getSizeInBits())) return true; if (!ST.systemSupportsUnalignedAccess() && @@ -143,7 +145,8 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { return false; }) - .minScalar(0, s32); + .minScalar(0, s32) + .lower(); getActionDefinitionsBuilder(G_IMPLICIT_DEF) .legalFor({s32, s64}); @@ -155,8 +158,8 @@ MipsLegalizerInfo::MipsLegalizerInfo(const MipsSubtarget &ST) { .legalFor({{s64, s32}}); getActionDefinitionsBuilder({G_ZEXTLOAD, G_SEXTLOAD}) - .legalForTypesWithMemDesc({{s32, p0, 8, 8}, - {s32, p0, 16, 8}}) + .legalForTypesWithMemDesc({{s32, p0, s8, 8}, + {s32, p0, s16, 8}}) .clampScalar(0, s32, s32); getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir index 1a5c5627a64c3e..8c0c6e62f4d7e3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-store.mir @@ -8,8 +8,9 @@ body: | ; CHECK-LABEL: name: test_load ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s1)) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8)) + ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s8) = G_ASSERT_ZEXT [[LOAD]], 1 + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASSERT_ZEXT]](s8) ; CHECK: $w0 = COPY [[ANYEXT]](s32) ; CHECK: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8) @@ -63,11 +64,12 @@ body: | ; CHECK-LABEL: name: test_store ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) - ; CHECK: G_STORE [[TRUNC]](s8), [[COPY]](p0) :: (store (s1)) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32) + ; CHECK: G_STORE [[TRUNC]](s8), [[COPY]](p0) :: (store (s8)) ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) ; CHECK: G_STORE [[TRUNC1]](s8), [[COPY]](p0) :: (store (s8)) ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir index 8589be6f4d8848..33cf7a8346bf2d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-load-trunc.mir @@ -11,8 +11,9 @@ body: | ; CHECK-LABEL: name: test_load_trunc ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s10)) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[LOAD]](s16) + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s16)) + ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s16) = G_ASSERT_ZEXT [[LOAD]], 10 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s16) ; CHECK: RET_ReallyLR implicit [[TRUNC]](s1) %0:_(p0) = G_FRAME_INDEX %stack.0 %1:_(s10) = G_LOAD %0(p0) :: (load (s10)) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index aedba2fba1f1f8..b590099b947a33 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -12,21 +12,21 @@ body: | ; CI-LABEL: name: test_load_constant_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_constant_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_constant_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] @@ -45,21 +45,21 @@ body: | ; CI-LABEL: name: test_load_constant_s2_align1 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s2), addrspace 4) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_constant_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s2), addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_constant_s2_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s2), addrspace 4) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load (s8), addrspace 4) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index d9babfafadfe0c..f90b3430c0a973 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -12,21 +12,21 @@ body: | ; CI-LABEL: name: test_load_flat_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s1)) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_flat_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s1)) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_flat_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s1)) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] @@ -45,21 +45,21 @@ body: | ; CI-LABEL: name: test_load_flat_s2_align1 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s2)) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_flat_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s2)) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_flat_s2_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s2)) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s8)) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index adbe1de4ee99f4..38dd947dafabdb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -19,42 +19,42 @@ body: | ; SI-LABEL: name: test_load_global_s1_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-HSA-LABEL: name: test_load_global_s1_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-HSA: $vgpr0 = COPY [[AND]](s32) ; CI-MESA-LABEL: name: test_load_global_s1_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-MESA: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_global_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s1_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9-HSA: $vgpr0 = COPY [[AND]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s1_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s1), addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] @@ -73,42 +73,42 @@ body: | ; SI-LABEL: name: test_load_global_s2_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-HSA-LABEL: name: test_load_global_s2_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) + ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-HSA: $vgpr0 = COPY [[AND]](s32) ; CI-MESA-LABEL: name: test_load_global_s2_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) + ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; CI-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-MESA: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_global_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-HSA-LABEL: name: test_load_global_s2_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9-HSA: $vgpr0 = COPY [[AND]](s32) ; GFX9-MESA-LABEL: name: test_load_global_s2_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s2), addrspace 1) + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) ; GFX9-MESA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-MESA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index 012ba24c72d484..90b388dc808423 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -16,56 +16,56 @@ body: | ; SI-LABEL: name: test_load_local_s1_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-LABEL: name: test_load_local_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; CI-DS128-LABEL: name: test_load_local_s1_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-DS128: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_local_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_local_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s1_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9-UNALIGNED: $vgpr0 = COPY [[AND]](s32) ; GFX10-LABEL: name: test_load_local_s1_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX10: $vgpr0 = COPY [[AND]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s1_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s1), addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] @@ -84,56 +84,56 @@ body: | ; SI-LABEL: name: test_load_local_s2_align1 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-LABEL: name: test_load_local_s2_align1 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; CI-DS128-LABEL: name: test_load_local_s2_align1 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) + ; CI-DS128: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; CI-DS128: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI-DS128: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI-DS128: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI-DS128: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_local_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_local_s2_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9: $vgpr0 = COPY [[AND]](s32) ; GFX9-UNALIGNED-LABEL: name: test_load_local_s2_align1 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) + ; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX9-UNALIGNED: $vgpr0 = COPY [[AND]](s32) ; GFX10-LABEL: name: test_load_local_s2_align1 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) + ; GFX10: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; GFX10: $vgpr0 = COPY [[AND]](s32) ; GFX10-UNALIGNED-LABEL: name: test_load_local_s2_align1 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s2), addrspace 3) + ; GFX10-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s8), addrspace 3) ; GFX10-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX10-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX10-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index ac47b8e56965af..b50a4c4018f2dc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -13,28 +13,28 @@ body: | ; SI-LABEL: name: test_load_private_s1_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s1), addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-LABEL: name: test_load_private_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s1), addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_private_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s1), addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_private_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s1), addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] @@ -53,28 +53,28 @@ body: | ; SI-LABEL: name: test_load_private_s2_align1 ; SI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s2), addrspace 5) + ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; SI: $vgpr0 = COPY [[AND]](s32) ; CI-LABEL: name: test_load_private_s2_align1 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s2), addrspace 5) + ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CI: $vgpr0 = COPY [[AND]](s32) ; VI-LABEL: name: test_load_private_s2_align1 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s2), addrspace 5) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; VI: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_load_private_s2_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s2), addrspace 5) + ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s8), addrspace 5) ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir index e09b59a1a3897d..9c4b23fe15dc4d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-global.mir @@ -23,12 +23,14 @@ body: | ; GFX8-LABEL: name: test_sextload_global_i32_i1 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1) - ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 1 + ; GFX8: $vgpr0 = COPY [[SEXT_INREG]](s32) ; GFX6-LABEL: name: test_sextload_global_i32_i1 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1) - ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 1 + ; GFX6: $vgpr0 = COPY [[SEXT_INREG]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s1), addrspace 1) $vgpr0 = COPY %1 @@ -42,12 +44,14 @@ body: | ; GFX8-LABEL: name: test_sextload_global_i32_i7 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1) - ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 7 + ; GFX8: $vgpr0 = COPY [[SEXT_INREG]](s32) ; GFX6-LABEL: name: test_sextload_global_i32_i7 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1) - ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 7 + ; GFX6: $vgpr0 = COPY [[SEXT_INREG]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s7), addrspace 1) $vgpr0 = COPY %1 @@ -79,12 +83,14 @@ body: | ; GFX8-LABEL: name: test_sextload_global_i32_i30 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1) - ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 30 + ; GFX8: $vgpr0 = COPY [[SEXT_INREG]](s32) ; GFX6-LABEL: name: test_sextload_global_i32_i30 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1) - ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 30 + ; GFX6: $vgpr0 = COPY [[SEXT_INREG]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s30), addrspace 1) $vgpr0 = COPY %1 @@ -98,12 +104,14 @@ body: | ; GFX8-LABEL: name: test_sextload_global_i32_i31 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1) - ; GFX8: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 31 + ; GFX8: $vgpr0 = COPY [[SEXT_INREG]](s32) ; GFX6-LABEL: name: test_sextload_global_i32_i31 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1) - ; GFX6: $vgpr0 = COPY [[SEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LOAD]], 31 + ; GFX6: $vgpr0 = COPY [[SEXT_INREG]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_SEXTLOAD %0 :: (load (s31), addrspace 1) $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index 7f3ec0beaad29a..8a16437cd148a1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -17,28 +17,32 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; SI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; CI-LABEL: name: test_store_global_s1_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; CI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) + ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-LABEL: name: test_store_global_s1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; VI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; GFX9-LABEL: name: test_store_global_s1_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) + ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; GFX9: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s1) = G_TRUNC %1 @@ -55,22 +59,30 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s7), addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; CI-LABEL: name: test_store_global_s7_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s7), addrspace 1) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-LABEL: name: test_store_global_s7_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s7), addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; GFX9-LABEL: name: test_store_global_s7_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s7), addrspace 1) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX9: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s7) = G_TRUNC %1 @@ -435,22 +447,30 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; SI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s25), addrspace 1) + ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 + ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) ; CI-LABEL: name: test_store_global_s25_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; CI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s25), addrspace 1) + ; CI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 + ; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) ; VI-LABEL: name: test_store_global_s25_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s25), addrspace 1) + ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 + ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) ; GFX9-LABEL: name: test_store_global_s25_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s25), addrspace 1) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 33554431 + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX9: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s32), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s25) = G_TRUNC %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index 68719e5e78b226..db38191211b5dc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -275,14 +275,16 @@ body: | ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; SI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) + ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; SI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-LABEL: name: test_store_global_i1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; VI: G_STORE [[AND]](s32), [[COPY]](p1) :: (store (s1), addrspace 1) + ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; VI: G_STORE [[AND1]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s1) = G_TRUNC %1 @@ -970,7 +972,9 @@ body: | ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; SI: G_STORE [[AND4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) ; VI-LABEL: name: test_truncstore_global_v3s8_to_1_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -996,7 +1000,9 @@ body: | ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (<3 x s2>), addrspace 1) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: G_STORE [[AND4]](s32), [[COPY]](p1) :: (store (s8), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s8>) = G_TRUNC %1 @@ -1039,7 +1045,9 @@ body: | ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; SI: G_STORE [[COPY5]](s32), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4095 + ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C4]] + ; SI: G_STORE [[AND4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) ; VI-LABEL: name: test_truncstore_global_v3s8_to_2_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -1065,7 +1073,9 @@ body: | ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR2]](s32) - ; VI: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (<3 x s4>), addrspace 1) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4095 + ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; VI: G_STORE [[AND4]](s32), [[COPY]](p1) :: (store (s16), addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s8>) = G_TRUNC %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir index b598e05fb9bd44..6e89156519ec37 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zextload-global.mir @@ -23,12 +23,14 @@ body: | ; GFX8-LABEL: name: test_zextload_global_i32_i1 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1) - ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX8: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 1 + ; GFX8: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) ; GFX6-LABEL: name: test_zextload_global_i32_i1 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s1), addrspace 1) - ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX6: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 1 + ; GFX6: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s1), addrspace 1) $vgpr0 = COPY %1 @@ -42,12 +44,14 @@ body: | ; GFX8-LABEL: name: test_zextload_global_i32_i7 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1) - ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX8: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 7 + ; GFX8: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) ; GFX6-LABEL: name: test_zextload_global_i32_i7 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s7), addrspace 1) - ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), addrspace 1) + ; GFX6: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 7 + ; GFX6: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s7), addrspace 1) $vgpr0 = COPY %1 @@ -80,12 +84,14 @@ body: | ; GFX8-LABEL: name: test_zextload_global_i32_i30 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1) - ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX8: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 30 + ; GFX8: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) ; GFX6-LABEL: name: test_zextload_global_i32_i30 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s30), addrspace 1) - ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX6: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 30 + ; GFX6: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s30), addrspace 1) $vgpr0 = COPY %1 @@ -99,12 +105,14 @@ body: | ; GFX8-LABEL: name: test_zextload_global_i32_i31 ; GFX8: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX8: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1) - ; GFX8: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX8: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX8: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 31 + ; GFX8: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) ; GFX6-LABEL: name: test_zextload_global_i32_i31 ; GFX6: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX6: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s31), addrspace 1) - ; GFX6: $vgpr0 = COPY [[ZEXTLOAD]](s32) + ; GFX6: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1) + ; GFX6: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[LOAD]], 31 + ; GFX6: $vgpr0 = COPY [[ASSERT_ZEXT]](s32) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s31), addrspace 1) $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll index ce1066abc377c4..0c56ac3816c02d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll @@ -3227,11 +3227,13 @@ define amdgpu_kernel void @sdivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s4, v2 ; GFX8-NEXT: v_xor_b32_e32 v3, s8, v3 +; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX8-NEXT: flat_store_byte v[0:1], v2 -; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, s8, v3 +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_and_b32_e32 v2, 7, v3 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: flat_store_byte v[0:1], v3 +; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: sdivrem_i3: @@ -3271,12 +3273,14 @@ define amdgpu_kernel void @sdivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX9-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX9-NEXT: v_xor_b32_e32 v1, s8, v1 ; GFX9-NEXT: v_subrev_u32_e32 v0, s4, v0 +; GFX9-NEXT: v_xor_b32_e32 v1, s8, v1 +; GFX9-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX9-NEXT: v_subrev_u32_e32 v1, s8, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_byte v2, v0, s[0:1] -; GFX9-NEXT: global_store_byte v2, v1, s[2:3] +; GFX9-NEXT: v_and_b32_e32 v0, 7, v1 +; GFX9-NEXT: global_store_byte v2, v0, s[2:3] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: sdivrem_i3: @@ -3286,39 +3290,41 @@ define amdgpu_kernel void @sdivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX10-NEXT: s_bfe_i32 s1, s0, 0x30008 ; GFX10-NEXT: s_bfe_i32 s0, s0, 0x30000 ; GFX10-NEXT: s_ashr_i32 s6, s1, 31 -; GFX10-NEXT: s_ashr_i32 s8, s0, 31 +; GFX10-NEXT: s_ashr_i32 s7, s0, 31 ; GFX10-NEXT: s_add_i32 s1, s1, s6 -; GFX10-NEXT: s_add_i32 s0, s0, s8 -; GFX10-NEXT: s_xor_b32 s7, s1, s6 -; GFX10-NEXT: s_xor_b32 s0, s0, s8 -; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s7 -; GFX10-NEXT: s_sub_i32 s1, 0, s7 +; GFX10-NEXT: s_add_i32 s0, s0, s7 +; GFX10-NEXT: s_xor_b32 s1, s1, s6 +; GFX10-NEXT: s_xor_b32 s0, s0, s7 +; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s1 +; GFX10-NEXT: s_sub_i32 s2, 0, s1 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GFX10-NEXT: v_mul_lo_u32 v1, s1, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, s2, v0 ; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 ; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 -; GFX10-NEXT: v_mul_lo_u32 v1, v0, s7 +; GFX10-NEXT: v_mul_lo_u32 v1, v0, s1 ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 -; GFX10-NEXT: s_xor_b32 s4, s8, s6 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 -; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 -; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX10-NEXT: s_xor_b32 s4, s7, s6 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 -; GFX10-NEXT: v_xor_b32_e32 v1, s8, v1 +; GFX10-NEXT: v_xor_b32_e32 v1, s7, v1 ; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 -; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s8, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s7, v1 +; GFX10-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX10-NEXT: v_and_b32_e32 v1, 7, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_store_byte v2, v0, s[0:1] ; GFX10-NEXT: global_store_byte v2, v1, s[2:3] @@ -3330,11 +3336,155 @@ define amdgpu_kernel void @sdivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ret void } -; FIXME: Reenable test -; define amdgpu_kernel void @sdivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { -; %div = sdiv i27 %x, %y -; store i27 %div, i27 addrspace(1)* %out0 -; %rem = srem i27 %x, %y -; store i27 %rem, i27 addrspace(1)* %out1 -; ret void -; } +define amdgpu_kernel void @sdivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { +; GFX8-LABEL: sdivrem_i27: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX8-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x0 +; GFX8-NEXT: s_mov_b32 s9, 0x7ffffff +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_bfe_i32 s1, s1, 0x1b0000 +; GFX8-NEXT: s_ashr_i32 s2, s1, 31 +; GFX8-NEXT: s_add_i32 s1, s1, s2 +; GFX8-NEXT: s_xor_b32 s3, s1, s2 +; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s3 +; GFX8-NEXT: s_sub_i32 s1, 0, s3 +; GFX8-NEXT: s_bfe_i32 s0, s0, 0x1b0000 +; GFX8-NEXT: s_ashr_i32 s8, s0, 31 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX8-NEXT: s_add_i32 s0, s0, s8 +; GFX8-NEXT: s_xor_b32 s0, s0, s8 +; GFX8-NEXT: s_xor_b32 s2, s8, s2 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 +; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0 +; GFX8-NEXT: v_mul_lo_u32 v1, v0, s3 +; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v0 +; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s0, v1 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_subrev_u32_e64 v2, s[0:1], s3, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX8-NEXT: v_add_u32_e32 v2, vcc, 1, v0 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s3, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: v_subrev_u32_e64 v2, s[0:1], s3, v1 +; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX8-NEXT: v_xor_b32_e32 v0, s2, v0 +; GFX8-NEXT: v_subrev_u32_e32 v0, vcc, s2, v0 +; GFX8-NEXT: v_xor_b32_e32 v1, s8, v1 +; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, s8, v1 +; GFX8-NEXT: v_and_b32_e32 v3, s9, v0 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: flat_store_dword v[0:1], v3 +; GFX8-NEXT: v_mov_b32_e32 v0, s6 +; GFX8-NEXT: v_and_b32_e32 v2, s9, v2 +; GFX8-NEXT: v_mov_b32_e32 v1, s7 +; GFX8-NEXT: flat_store_dword v[0:1], v2 +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: sdivrem_i27: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_bfe_i32 s1, s1, 0x1b0000 +; GFX9-NEXT: s_ashr_i32 s6, s1, 31 +; GFX9-NEXT: s_add_i32 s1, s1, s6 +; GFX9-NEXT: s_xor_b32 s7, s1, s6 +; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 +; GFX9-NEXT: s_sub_i32 s1, 0, s7 +; GFX9-NEXT: s_bfe_i32 s0, s0, 0x1b0000 +; GFX9-NEXT: s_ashr_i32 s8, s0, 31 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX9-NEXT: s_add_i32 s0, s0, s8 +; GFX9-NEXT: s_xor_b32 s9, s0, s8 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-NEXT: s_xor_b32 s5, s8, s6 +; GFX9-NEXT: s_mov_b32 s4, 0x7ffffff +; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9-NEXT: v_mul_hi_u32 v0, s9, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, v0, s7 +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s9, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_xor_b32_e32 v0, s5, v0 +; GFX9-NEXT: v_subrev_u32_e32 v0, s5, v0 +; GFX9-NEXT: v_xor_b32_e32 v1, s8, v1 +; GFX9-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX9-NEXT: v_subrev_u32_e32 v1, s8, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_store_dword v2, v0, s[0:1] +; GFX9-NEXT: v_and_b32_e32 v0, s4, v1 +; GFX9-NEXT: global_store_dword v2, v0, s[2:3] +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: sdivrem_i27: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_bfe_i32 s1, s1, 0x1b0000 +; GFX10-NEXT: s_bfe_i32 s0, s0, 0x1b0000 +; GFX10-NEXT: s_ashr_i32 s6, s1, 31 +; GFX10-NEXT: s_ashr_i32 s7, s0, 31 +; GFX10-NEXT: s_add_i32 s1, s1, s6 +; GFX10-NEXT: s_add_i32 s0, s0, s7 +; GFX10-NEXT: s_xor_b32 s1, s1, s6 +; GFX10-NEXT: s_xor_b32 s0, s0, s7 +; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s1 +; GFX10-NEXT: s_sub_i32 s2, 0, s1 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, s2, v0 +; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, v0, s1 +; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s1, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s1, v1 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX10-NEXT: s_xor_b32 s4, s7, s6 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_xor_b32_e32 v0, s4, v0 +; GFX10-NEXT: v_xor_b32_e32 v1, s7, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v0, s4, v0 +; GFX10-NEXT: v_subrev_nc_u32_e32 v1, s7, v1 +; GFX10-NEXT: s_mov_b32 s4, 0x7ffffff +; GFX10-NEXT: v_and_b32_e32 v0, s4, v0 +; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: global_store_dword v2, v0, s[0:1] +; GFX10-NEXT: global_store_dword v2, v1, s[2:3] +; GFX10-NEXT: s_endpgm + %div = sdiv i27 %x, %y + store i27 %div, i27 addrspace(1)* %out0 + %rem = srem i27 %x, %y + store i27 %rem, i27 addrspace(1)* %out1 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll index 0cb228f6c651dd..11d115fcc5c057 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll @@ -2553,11 +2553,13 @@ define amdgpu_kernel void @udivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s6, v3 +; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX8-NEXT: flat_store_byte v[0:1], v2 -; GFX8-NEXT: v_mov_b32_e32 v0, s2 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_and_b32_e32 v2, 7, v3 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 -; GFX8-NEXT: flat_store_byte v[0:1], v3 +; GFX8-NEXT: flat_store_byte v[0:1], v2 ; GFX8-NEXT: s_endpgm ; ; GFX9-LABEL: udivrem_i3: @@ -2588,10 +2590,12 @@ define amdgpu_kernel void @udivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s6, v1 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GFX9-NEXT: v_subrev_u32_e32 v3, s6, v1 +; GFX9-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_byte v2, v0, s[0:1] -; GFX9-NEXT: global_store_byte v2, v1, s[2:3] +; GFX9-NEXT: v_and_b32_e32 v0, 7, v1 +; GFX9-NEXT: global_store_byte v2, v0, s[2:3] ; GFX9-NEXT: s_endpgm ; ; GFX10-LABEL: udivrem_i3: @@ -2621,8 +2625,10 @@ define amdgpu_kernel void @udivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s6, v1 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s6, v1 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo -; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_and_b32_e32 v0, 7, v0 +; GFX10-NEXT: v_and_b32_e32 v1, 7, v1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: global_store_byte v2, v0, s[0:1] ; GFX10-NEXT: global_store_byte v2, v1, s[2:3] @@ -2634,11 +2640,123 @@ define amdgpu_kernel void @udivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* % ret void } -; FIXME: Reenable test -; define amdgpu_kernel void @udivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { -; %div = udiv i27 %x, %y -; store i27 %div, i27 addrspace(1)* %out0 -; %rem = urem i27 %x, %y -; store i27 %rem, i27 addrspace(1)* %out1 -; ret void -; } +define amdgpu_kernel void @udivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) { +; GFX8-LABEL: udivrem_i27: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX8-NEXT: s_mov_b32 s6, 0x7ffffff +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_and_b32 s7, s1, s6 +; GFX8-NEXT: v_cvt_f32_u32_e32 v0, s7 +; GFX8-NEXT: s_sub_i32 s1, 0, s7 +; GFX8-NEXT: s_and_b32 s8, s0, s6 +; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX8-NEXT: v_mul_lo_u32 v1, s1, v0 +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT: v_mul_hi_u32 v2, s8, v0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s1 +; GFX8-NEXT: v_mul_lo_u32 v3, v2, s7 +; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 +; GFX8-NEXT: v_sub_u32_e32 v3, vcc, s8, v3 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX8-NEXT: v_add_u32_e32 v4, vcc, 1, v2 +; GFX8-NEXT: v_cmp_le_u32_e32 vcc, s7, v3 +; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX8-NEXT: v_subrev_u32_e64 v4, s[0:1], s7, v3 +; GFX8-NEXT: v_and_b32_e32 v2, s6, v2 +; GFX8-NEXT: flat_store_dword v[0:1], v2 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_and_b32_e32 v2, s6, v3 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: flat_store_dword v[0:1], v2 +; GFX8-NEXT: s_endpgm +; +; GFX9-LABEL: udivrem_i27: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX9-NEXT: s_mov_b32 s6, 0x7ffffff +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_and_b32 s7, s1, s6 +; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s7 +; GFX9-NEXT: s_sub_i32 s1, 0, s7 +; GFX9-NEXT: s_and_b32 s8, s0, s6 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, s1, v0 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9-NEXT: v_mul_hi_u32 v0, s8, v0 +; GFX9-NEXT: v_mul_lo_u32 v1, v0, s7 +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_sub_u32_e32 v1, s8, v1 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: v_add_u32_e32 v3, 1, v0 +; GFX9-NEXT: v_cmp_le_u32_e32 vcc, s7, v1 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX9-NEXT: v_subrev_u32_e32 v3, s7, v1 +; GFX9-NEXT: v_and_b32_e32 v0, s6, v0 +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_store_dword v2, v0, s[0:1] +; GFX9-NEXT: v_and_b32_e32 v0, s6, v1 +; GFX9-NEXT: global_store_dword v2, v0, s[2:3] +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: udivrem_i27: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x10 +; GFX10-NEXT: s_mov_b32 s6, 0x7ffffff +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_and_b32 s7, s1, s6 +; GFX10-NEXT: s_and_b32 s0, s0, s6 +; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s7 +; GFX10-NEXT: s_sub_i32 s1, 0, s7 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, s1, v0 +; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: v_mul_hi_u32 v0, s0, v0 +; GFX10-NEXT: v_mul_lo_u32 v1, v0, s7 +; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 +; GFX10-NEXT: v_sub_nc_u32_e32 v1, s0, v1 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_add_nc_u32_e32 v2, 1, v0 +; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, s7, v1 +; GFX10-NEXT: v_subrev_nc_u32_e32 v3, s7, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_and_b32_e32 v0, s6, v0 +; GFX10-NEXT: v_and_b32_e32 v1, s6, v1 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: global_store_dword v2, v0, s[0:1] +; GFX10-NEXT: global_store_dword v2, v1, s[2:3] +; GFX10-NEXT: s_endpgm + %div = udiv i27 %x, %y + store i27 %div, i27 addrspace(1)* %out0 + %rem = urem i27 %x, %y + store i27 %rem, i27 addrspace(1)* %out1 + ret void +} diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir index 6a6da04807cf3b..935bbd34651f46 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir @@ -938,7 +938,7 @@ body: | ; CHECK: [[LDRi12_:%[0-9]+]]:gpr = LDRi12 [[ADDri]], 0, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: $r0 = COPY [[LDRi12_]] ; CHECK: [[ADDri1:%[0-9]+]]:gpr = ADDri %fixed-stack.2, 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[LDRBi12_:%[0-9]+]]:gprnopc = LDRBi12 [[ADDri1]], 0, 14 /* CC::al */, $noreg :: (load (s1)) + ; CHECK: [[LDRBi12_:%[0-9]+]]:gprnopc = LDRBi12 [[ADDri1]], 0, 14 /* CC::al */, $noreg :: (load (s8)) ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY [[LDRBi12_]] ; CHECK: $r0 = COPY [[COPY]] ; CHECK: BX_RET 14 /* CC::al */, $noreg @@ -950,9 +950,9 @@ body: | %2(p0) = G_FRAME_INDEX %fixed-stack.0 - %3(s1) = G_LOAD %2(p0) :: (load (s1)) + %3(s8) = G_LOAD %2(p0) :: (load (s8)) - %4(s32) = G_ANYEXT %3(s1) + %4(s32) = G_ANYEXT %3(s8) $r0 = COPY %4 @@ -977,8 +977,6 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 ; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY $r1 ; CHECK: [[COPY2:%[0-9]+]]:gprnopc = COPY [[COPY1]] - ; CHECK: [[ANDri:%[0-9]+]]:gprnopc = ANDri [[COPY1]], 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: STRBi12 [[ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s1)) ; CHECK: STRBi12 [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s8)) ; CHECK: STRH [[COPY1]], [[COPY]], $noreg, 0, 14 /* CC::al */, $noreg :: (store (s16)) ; CHECK: STRi12 [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s32)) @@ -987,14 +985,10 @@ body: | %3(s32) = COPY $r1 - %4(s1) = G_TRUNC %3(s32) - %1(s8) = G_TRUNC %3(s32) %2(s16) = G_TRUNC %3(s32) - G_STORE %4(s1), %0(p0) :: (store (s1)) - G_STORE %1(s8), %0(p0) :: (store (s8)) G_STORE %2(s16), %0(p0) :: (store (s16)) diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir index e1c972855a3944..7c2780d672bd69 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-consts.mir @@ -41,14 +41,17 @@ body: | ; CHECK: {{%[0-9]+}}:_(s8) = G_TRUNC [[EXT]](s32) ; CHECK-NOT: G_CONSTANT i8 - %3(s1) = G_CONSTANT i1 1 - G_STORE %3(s1), %4(p0) :: (store (s1)) + %3:_(s1) = G_CONSTANT i1 1 + %6:_(s32) = G_CONSTANT i32 99 + %7:_(s32) = G_SELECT %3, %0, %6 + G_STORE %7(s32), %4(p0) :: (store (s32)) ; CHECK-NOT: G_CONSTANT i1 ; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: {{%[0-9]+}}:_(s1) = G_TRUNC [[EXT]](s32) ; CHECK-NOT: G_CONSTANT i1 %5(p0) = G_CONSTANT i32 0 + G_STORE %5(p0), %4(p0) :: (store (p0)) ; CHECK: {{%[0-9]+}}:_(p0) = G_CONSTANT i32 0 diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-exts.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-exts.mir index c3fb95c9957302..5e6301dc07b655 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-exts.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-exts.mir @@ -143,20 +143,18 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } body: | bb.0: - liveins: $r0 + liveins: $r0, $r1 - %0(p0) = COPY $r0 - %1(s1) = G_LOAD %0(p0) :: (load (s1)) - %2(s16) = G_SEXT %1(s1) + %0:_(p0) = COPY $r0 + %1:_(s32) = COPY $r1 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(eq), %1, %2 + %4:_(s16) = G_SEXT %3(s1) ; G_SEXT from s1 to s16 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s16) = G_SEXT {{%[0-9]+}}(s1) - G_STORE %2(s16), %0(p0) :: (store (s16)) + G_STORE %4(s16), %0(p0) :: (store (s16)) BX_RET 14, $noreg ... --- @@ -167,20 +165,18 @@ legalized: false regBankSelected: false selected: false tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } body: | bb.0: - liveins: $r0 + liveins: $r0, $r1 - %0(p0) = COPY $r0 - %1(s1) = G_LOAD %0(p0) :: (load (s1)) - %2(s8) = G_ANYEXT %1 + %0:_(p0) = COPY $r0 + %1:_(s32) = COPY $r1 + %2:_(s32) = G_CONSTANT i32 0 + %3:_(s1) = G_ICMP intpred(eq), %1, %2 + %4:_(s8) = G_ANYEXT %3 ; G_ANYEXT from s1 to s8 is legal, so we should find it unchanged in the output ; CHECK: {{%[0-9]+}}:_(s8) = G_ANYEXT {{%[0-9]+}}(s1) - G_STORE %2(s8), %0(p0) :: (store (s8)) + G_STORE %4(s8), %0(p0) :: (store (s8)) BX_RET 14, $noreg ... --- diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir index f2b6277a802347..1ebaa2a532cc4b 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir @@ -2,6 +2,7 @@ # RUN: llc -mtriple thumbv7-- -run-pass=legalizer %s -o - | FileCheck %s --- | define void @test_legal_loads_stores() { ret void } + define void @test_load_store_s1() { ret void } define void @test_load_from_stack() { ret void } define void @test_load_store_64_vfp() #0 { ret void } @@ -34,16 +35,14 @@ body: | liveins: $r0 ; These are all legal, so we should find them unchanged in the output - ; CHECK-DAG: G_STORE {{%[0-9]+}}(s32), %0(p0) - ; CHECK-DAG: G_STORE {{%[0-9]+}}(s16), %0(p0) - ; CHECK-DAG: G_STORE {{%[0-9]+}}(s8), %0(p0) - ; CHECK-DAG: G_STORE {{%[0-9]+}}(s1), %0(p0) + ; CHECK-DAG: G_STORE {{%[0-9]+}}(s32), %0(p0) :: (store (s32)) + ; CHECK-DAG: G_STORE {{%[0-9]+}}(s16), %0(p0) :: (store (s16)) + ; CHECK-DAG: G_STORE {{%[0-9]+}}(s8), %0(p0) :: (store (s8)) ; CHECK-DAG: G_STORE {{%[0-9]+}}(p0), %0(p0) - ; CHECK-DAG: {{%[0-9]+}}:_(s32) = G_LOAD %0(p0) - ; CHECK-DAG: {{%[0-9]+}}:_(s16) = G_LOAD %0(p0) - ; CHECK-DAG: {{%[0-9]+}}:_(s8) = G_LOAD %0(p0) - ; CHECK-DAG: {{%[0-9]+}}:_(s1) = G_LOAD %0(p0) - ; CHECK-DAG: {{%[0-9]+}}:_(p0) = G_LOAD %0(p0) + ; CHECK-DAG: {{%[0-9]+}}:_(s32) = G_LOAD %0(p0) :: (load (s32)) + ; CHECK-DAG: {{%[0-9]+}}:_(s16) = G_LOAD %0(p0) :: (load (s16)) + ; CHECK-DAG: {{%[0-9]+}}:_(s8) = G_LOAD %0(p0) :: (load (s8)) + ; CHECK-DAG: {{%[0-9]+}}:_(p0) = G_LOAD %0(p0) :: (load (p0)) %0(p0) = COPY $r0 %2(s32) = G_LOAD %0(p0) :: (load (s32)) G_STORE %2(s32), %0(p0) :: (store (s32)) @@ -51,12 +50,34 @@ body: | G_STORE %3(s16), %0(p0) :: (store (s16)) %4(s8) = G_LOAD %0(p0) :: (load (s8)) G_STORE %4(s8), %0(p0) :: (store (s8)) - %5(s1) = G_LOAD %0(p0) :: (load (s1)) - G_STORE %5(s1), %0(p0) :: (store (s1)) %6(p0) = G_LOAD %0(p0) :: (load (p0)) G_STORE %6(p0), %0(p0) :: (store (p0)) BX_RET 14, $noreg ... + +--- +name: test_load_store_s1 +legalized: false +# CHECK: legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +body: | + bb.0: + liveins: $r0 + + ; CHECK: [[LD:%[0-9]+]]:_(s8) = G_LOAD %0(p0) :: (load (s8)) + ; CHECK: [[ONE:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LD]] + ; CHECK: [[COPYONE:%[0-9]+]]:_(s32) = COPY [[ONE]] + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[EXT]], [[COPYONE]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND]] + ; CHECK: G_STORE [[TRUNC]](s8), {{%[0-9]+}}(p0) :: (store (s8)) + %0:_(p0) = COPY $r0 + %5:_(s1) = G_LOAD %0(p0) :: (load (s1)) + G_STORE %5(s1), %0(p0) :: (store (s1)) + BX_RET 14, $noreg +... --- name: test_load_from_stack # CHECK-LABEL: name: test_load_from_stack diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir index 6c97952862c530..44e17025032021 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir @@ -400,8 +400,7 @@ selected: false # CHECK: - { id: 2, class: gprb, preferred-register: '' } # CHECK: - { id: 3, class: gprb, preferred-register: '' } # CHECK: - { id: 4, class: gprb, preferred-register: '' } -# CHECK: - { id: 5, class: gprb, preferred-register: '' } -# CHECK: - { id: 6, class: fprb, preferred-register: '' } +# CHECK: - { id: 5, class: fprb, preferred-register: '' } registers: - { id: 0, class: _ } @@ -415,12 +414,11 @@ body: | bb.0: liveins: $r0 %0(p0) = COPY $r0 - %6(s64) = G_LOAD %0 :: (load (s64)) + %5(s64) = G_LOAD %0 :: (load (s64)) %1(s32) = G_LOAD %0 :: (load (s32)) %2(s16) = G_LOAD %0 :: (load (s16)) %3(s8) = G_LOAD %0 :: (load (s8)) - %4(s1) = G_LOAD %0 :: (load (s1)) - %5(p0) = G_LOAD %0 :: (load (p0)) + %4(p0) = G_LOAD %0 :: (load (p0)) BX_RET 14, $noreg, implicit $r0 ... @@ -435,10 +433,7 @@ selected: false # CHECK: - { id: 1, class: gprb, preferred-register: '' } # CHECK: - { id: 2, class: gprb, preferred-register: '' } # CHECK: - { id: 3, class: gprb, preferred-register: '' } -# CHECK: - { id: 4, class: gprb, preferred-register: '' } -# CHECK: - { id: 5, class: gprb, preferred-register: '' } -# CHECK: - { id: 6, class: fprb, preferred-register: '' } - +# CHECK: - { id: 4, class: fprb, preferred-register: '' } registers: - { id: 0, class: _ } - { id: 1, class: _ } @@ -457,12 +452,8 @@ body: | G_STORE %2(s16), %0 :: (store (s16)) %3(s8) = G_TRUNC %1(s32) G_STORE %3(s8), %0 :: (store (s8)) - %4(s1) = G_TRUNC %1(s32) - G_STORE %4(s1), %0 :: (store (s1)) - %5(p0) = COPY $r5 - G_STORE %5(p0), %0 :: (store (p0)) - %6(s64) = COPY $d6 - G_STORE %6(s64), %0 :: (store (s64)) + %4(s64) = COPY $d6 + G_STORE %4(s64), %0 :: (store (s64)) BX_RET 14, $noreg, implicit $r0 ... diff --git a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir index 4f92f411de30f5..ff97632736ecc7 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/thumb-select-load-store.mir @@ -1,7 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -O0 -mtriple thumb-- -mattr=+v6t2 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s --- | - define void @test_s1() { ret void } define void @test_s8() { ret void } define void @test_s16() { ret void } define void @test_s32() { ret void } @@ -11,32 +10,6 @@ define void @test_load_from_stack() { ret void } ... --- -name: test_s1 -legalized: true -regBankSelected: true -selected: false -registers: - - { id: 0, class: gprb } - - { id: 1, class: gprb } -body: | - bb.0: - liveins: $r0 - - ; CHECK-LABEL: name: test_s1 - ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $r0 - ; CHECK: [[t2LDRBi12_:%[0-9]+]]:rgpr = t2LDRBi12 [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s1)) - ; CHECK: [[t2ANDri:%[0-9]+]]:rgpr = t2ANDri [[t2LDRBi12_]], 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: t2STRBi12 [[t2ANDri]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s1)) - ; CHECK: BX_RET 14 /* CC::al */, $noreg - %0(p0) = COPY $r0 - - %1(s1) = G_LOAD %0(p0) :: (load (s1)) - - G_STORE %1(s1), %0(p0) :: (store (s1)) - - BX_RET 14, $noreg -... ---- name: test_s8 legalized: true regBankSelected: true @@ -164,7 +137,7 @@ body: | ; CHECK: [[t2LDRi12_:%[0-9]+]]:gpr = t2LDRi12 [[t2ADDri]], 0, 14 /* CC::al */, $noreg :: (load (s32)) ; CHECK: $r0 = COPY [[t2LDRi12_]] ; CHECK: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri %fixed-stack.2, 0, 14 /* CC::al */, $noreg, $noreg - ; CHECK: [[t2LDRBi12_:%[0-9]+]]:gprnopc = t2LDRBi12 [[t2ADDri1]], 0, 14 /* CC::al */, $noreg :: (load (s1)) + ; CHECK: [[t2LDRBi12_:%[0-9]+]]:gprnopc = t2LDRBi12 [[t2ADDri1]], 0, 14 /* CC::al */, $noreg :: (load (s8)) ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY [[t2LDRBi12_]] ; CHECK: $r0 = COPY [[COPY]] ; CHECK: BX_RET 14 /* CC::al */, $noreg @@ -176,9 +149,9 @@ body: | %2(p0) = G_FRAME_INDEX %fixed-stack.0 - %3(s1) = G_LOAD %2(p0) :: (load (s1)) + %3(s8) = G_LOAD %2(p0) :: (load (s8)) - %4(s32) = G_ANYEXT %3(s1) + %4(s32) = G_ANYEXT %3(s8) $r0 = COPY %4 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir index c60ca3672c9ad4..7e9e3d1f3a4bf7 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/add.mir @@ -342,7 +342,8 @@ body: | ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; MIPS32: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store (s1) into %ir.pcarry_flag) + ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; MIPS32: G_STORE [[AND1]](s32), [[COPY3]](p0) :: (store (s8) into %ir.pcarry_flag) ; MIPS32: G_STORE [[ADD]](s32), [[COPY2]](p0) :: (store (s32) into %ir.padd) ; MIPS32: RetRA %0:_(s32) = COPY $a0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir index c5a35cb1f8808c..f5a7a48e77004c 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/mul.mir @@ -446,7 +446,8 @@ body: | ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] - ; MIPS32: G_STORE [[AND]](s32), [[COPY3]](p0) :: (store (s1) into %ir.pcarry_flag) + ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C1]] + ; MIPS32: G_STORE [[AND1]](s32), [[COPY3]](p0) :: (store (s8) into %ir.pcarry_flag) ; MIPS32: G_STORE [[MUL]](s32), [[COPY2]](p0) :: (store (s32) into %ir.pmul) ; MIPS32: RetRA %0:_(s32) = COPY $a0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/truncStore_and_aExtLoad.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/truncStore_and_aExtLoad.mir index 02d01c18791be8..ed31997f0f34c3 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/truncStore_and_aExtLoad.mir +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/truncStore_and_aExtLoad.mir @@ -62,11 +62,12 @@ body: | ; MIPS32: liveins: $a0, $a1 ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 - ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s1) from %ir.py) + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s8) from %ir.py) ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; MIPS32: G_STORE [[AND]](s32), [[COPY]](p0) :: (store (s1) into %ir.px) + ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; MIPS32: G_STORE [[AND1]](s32), [[COPY]](p0) :: (store (s8) into %ir.px) ; MIPS32: RetRA %0:_(p0) = COPY $a0 %1:_(p0) = COPY $a1 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll index fe26837d0a3446..116b76e4ee1e8e 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll @@ -173,6 +173,7 @@ define void @uadd_with_overflow(i32 %lhs, i32 %rhs, i32* %padd, i1* %pcarry_flag ; MIPS32-NEXT: addu $1, $4, $5 ; MIPS32-NEXT: sltu $2, $1, $5 ; MIPS32-NEXT: andi $2, $2, 1 +; MIPS32-NEXT: andi $2, $2, 1 ; MIPS32-NEXT: sb $2, 0($7) ; MIPS32-NEXT: sw $1, 0($6) ; MIPS32-NEXT: jr $ra diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll index d249890bcfeaaa..7195ccf4967936 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll @@ -187,6 +187,7 @@ define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag ; MIPS32-NEXT: mul $1, $4, $5 ; MIPS32-NEXT: sltu $2, $zero, $2 ; MIPS32-NEXT: andi $2, $2, 1 +; MIPS32-NEXT: andi $2, $2, 1 ; MIPS32-NEXT: sb $2, 0($7) ; MIPS32-NEXT: sw $1, 0($6) ; MIPS32-NEXT: jr $ra diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/truncStore_and_aExtLoad.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/truncStore_and_aExtLoad.ll index e4f93520fe9023..1c085685f01504 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/truncStore_and_aExtLoad.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/truncStore_and_aExtLoad.ll @@ -28,6 +28,7 @@ define void @load_store_i1(i1* %px, i1* %py) { ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lbu $1, 0($5) ; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: sb $1, 0($4) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp index 69362200618100..7ca4678e2c3ce3 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerInfoTest.cpp @@ -391,22 +391,22 @@ TEST(LegalizerInfoTest, MMOAlignment) { LegalizerInfo LI; auto &LegacyInfo = LI.getLegacyLegalizerInfo(); LI.getActionDefinitionsBuilder(G_LOAD) - .legalForTypesWithMemDesc({{s32, p0, 32, 32}}); + .legalForTypesWithMemDesc({{s32, p0, s32, 32}}); LegacyInfo.computeTables(); EXPECT_ACTION(Legal, 0, LLT(), LegalityQuery(G_LOAD, {s32, p0}, LegalityQuery::MemDesc{ - 32, 32, AtomicOrdering::NotAtomic})); + s32, 32, AtomicOrdering::NotAtomic})); EXPECT_ACTION(Unsupported, 0, LLT(), LegalityQuery(G_LOAD, {s32, p0}, LegalityQuery::MemDesc{ - 32, 16, AtomicOrdering::NotAtomic })); + s32, 16, AtomicOrdering::NotAtomic })); EXPECT_ACTION(Unsupported, 0, LLT(), LegalityQuery(G_LOAD, {s32, p0}, LegalityQuery::MemDesc{ - 32, 8, AtomicOrdering::NotAtomic})); + s32, 8, AtomicOrdering::NotAtomic})); } // Test that the maximum supported alignment value isn't truncated @@ -417,18 +417,18 @@ TEST(LegalizerInfoTest, MMOAlignment) { LegalizerInfo LI; auto &LegacyInfo = LI.getLegacyLegalizerInfo(); LI.getActionDefinitionsBuilder(G_LOAD) - .legalForTypesWithMemDesc({{s32, p0, 32, MaxAlignInBits}}); + .legalForTypesWithMemDesc({{s32, p0, s32, MaxAlignInBits}}); LegacyInfo.computeTables(); EXPECT_ACTION(Legal, 0, LLT(), LegalityQuery(G_LOAD, {s32, p0}, - LegalityQuery::MemDesc{32, + LegalityQuery::MemDesc{s32, MaxAlignInBits, AtomicOrdering::NotAtomic})); EXPECT_ACTION(Unsupported, 0, LLT(), LegalityQuery(G_LOAD, {s32, p0}, LegalityQuery::MemDesc{ - 32, 8, AtomicOrdering::NotAtomic })); + s32, 8, AtomicOrdering::NotAtomic })); } } diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp index ba6721fe6fe9be..b759b5fecd645f 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp @@ -31,10 +31,11 @@ ::testing::AssertionResult isNullMIPtr(const MachineInstr *MI) { DefineLegalizerInfo(ALegalizer, { auto p0 = LLT::pointer(0, 64); + auto s8 = LLT::scalar(8); auto v2s8 = LLT::fixed_vector(2, 8); auto v2s16 = LLT::fixed_vector(2, 16); getActionDefinitionsBuilder(G_LOAD) - .legalForTypesWithMemDesc({{s16, p0, 8, 8}}) + .legalForTypesWithMemDesc({{s16, p0, s8, 8}}) .scalarize(0) .clampScalar(0, s16, s16); getActionDefinitionsBuilder(G_PTR_ADD).legalFor({{p0, s64}});