diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h index 4d147bf20c26a5..554bef84069893 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h @@ -652,17 +652,17 @@ bool GIMatchTableExecutor::executeMatchTable( MachineMemOperand *MMO = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx); - unsigned Size = MRI.getType(MO.getReg()).getSizeInBits(); + const TypeSize Size = MRI.getType(MO.getReg()).getSizeInBits(); if (MatcherOpcode == GIM_CheckMemorySizeEqualToLLT && - MMO->getSizeInBits().getValue() != Size) { + MMO->getSizeInBits() != Size) { if (handleReject() == RejectAndGiveUp) return false; } else if (MatcherOpcode == GIM_CheckMemorySizeLessThanLLT && - MMO->getSizeInBits().getValue() >= Size) { + TypeSize::isKnownGE(MMO->getSizeInBits().getValue(), Size)) { if (handleReject() == RejectAndGiveUp) return false; } else if (MatcherOpcode == GIM_CheckMemorySizeGreaterThanLLT && - MMO->getSizeInBits().getValue() <= Size) + TypeSize::isKnownLE(MMO->getSizeInBits().getValue(), Size)) if (handleReject() == RejectAndGiveUp) return false; diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 653e7689b57743..0c886a052d059c 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1080,7 +1080,8 @@ bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const { LLT Ty = MRI.getType(LdSt.getReg(0)); LLT MemTy = LdSt.getMMO().getMemoryType(); SmallVector MemDescrs( - {{MemTy, MemTy.getSizeInBits(), AtomicOrdering::NotAtomic}}); + {{MemTy, MemTy.getSizeInBits().getKnownMinValue(), + AtomicOrdering::NotAtomic}}); unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode()); SmallVector OpTys; if (IndexedOpc == TargetOpcode::G_INDEXED_STORE) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 6661127162e524..b14a004d5c4ac9 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1413,7 +1413,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) { const StoreInst &SI = cast(U); - if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0) + if (DL->getTypeStoreSize(SI.getValueOperand()->getType()).isZero()) return true; ArrayRef Vals = getOrCreateVRegs(*SI.getValueOperand()); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c1ca78af5cda8c..93057ef87503cd 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -145,6 +145,15 @@ static cl::opt EnableExtToTBL("aarch64-enable-ext-to-tbl", cl::Hidden, static cl::opt MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden, cl::desc("Maximum of xors")); +// By turning this on, we will not fallback to DAG ISel when encountering +// scalable vector types for all instruction, even if SVE is not yet supported +// with some instructions. +// See [AArch64TargetLowering::fallbackToDAGISel] for implementation details. +static cl::opt EnableSVEGISel( + "aarch64-enable-gisel-sve", cl::Hidden, + cl::desc("Enable / disable SVE scalable vectors in Global ISel"), + cl::init(false)); + /// Value type used for condition codes. static const MVT MVT_CC = MVT::i32; @@ -26376,16 +26385,22 @@ bool AArch64TargetLowering::shouldLocalize( } bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const { - if (Inst.getType()->isScalableTy()) - return true; - - for (unsigned i = 0; i < Inst.getNumOperands(); ++i) - if (Inst.getOperand(i)->getType()->isScalableTy()) + // Fallback for scalable vectors. + // Note that if EnableSVEGISel is true, we allow scalable vector types for + // all instructions, regardless of whether they are actually supported. + if (!EnableSVEGISel) { + if (Inst.getType()->isScalableTy()) { return true; + } - if (const AllocaInst *AI = dyn_cast(&Inst)) { - if (AI->getAllocatedType()->isScalableTy()) - return true; + for (unsigned i = 0; i < Inst.getNumOperands(); ++i) + if (Inst.getOperand(i)->getType()->isScalableTy()) + return true; + + if (const AllocaInst *AI = dyn_cast(&Inst)) { + if (AI->getAllocatedType()->isScalableTy()) + return true; + } } // Checks to allow the use of SME instructions diff --git a/llvm/lib/Target/AArch64/AArch64RegisterBanks.td b/llvm/lib/Target/AArch64/AArch64RegisterBanks.td index 615ce7d51d9ba7..2b597b8606921e 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterBanks.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterBanks.td @@ -12,8 +12,8 @@ /// General Purpose Registers: W, X. def GPRRegBank : RegisterBank<"GPR", [XSeqPairsClass]>; -/// Floating Point/Vector Registers: B, H, S, D, Q. -def FPRRegBank : RegisterBank<"FPR", [QQQQ]>; +/// Floating Point, Vector, Scalable Vector Registers: B, H, S, D, Q, Z. +def FPRRegBank : RegisterBank<"FPR", [QQQQ, ZPR]>; /// Conditional register: NZCV. def CCRegBank : RegisterBank<"CC", [CCR]>; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index d4aac94d24f12a..c472fd06ba3734 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -61,6 +61,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) const LLT v2s64 = LLT::fixed_vector(2, 64); const LLT v2p0 = LLT::fixed_vector(2, p0); + const LLT nxv16s8 = LLT::scalable_vector(16, s8); + const LLT nxv8s16 = LLT::scalable_vector(8, s16); + const LLT nxv4s32 = LLT::scalable_vector(4, s32); + const LLT nxv2s64 = LLT::scalable_vector(2, s64); + std::initializer_list PackedVectorAllTypeList = {/* Begin 128bit types */ v16s8, v8s16, v4s32, v2s64, v2p0, @@ -329,7 +334,31 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) return ValTy.isPointerVector() && ValTy.getAddressSpace() == 0; }; - getActionDefinitionsBuilder(G_LOAD) + auto &LoadActions = getActionDefinitionsBuilder(G_LOAD); + auto &StoreActions = getActionDefinitionsBuilder(G_STORE); + + if (ST.hasSVE()) { + LoadActions.legalForTypesWithMemDesc({ + // 128 bit base sizes + {nxv16s8, p0, nxv16s8, 8}, + {nxv8s16, p0, nxv8s16, 8}, + {nxv4s32, p0, nxv4s32, 8}, + {nxv2s64, p0, nxv2s64, 8}, + }); + + // TODO: Add nxv2p0. Consider bitcastIf. + // See #92130 + // https://github.com/llvm/llvm-project/pull/92130#discussion_r1616888461 + StoreActions.legalForTypesWithMemDesc({ + // 128 bit base sizes + {nxv16s8, p0, nxv16s8, 8}, + {nxv8s16, p0, nxv8s16, 8}, + {nxv4s32, p0, nxv4s32, 8}, + {nxv2s64, p0, nxv2s64, 8}, + }); + } + + LoadActions .customIf([=](const LegalityQuery &Query) { return HasRCPC3 && Query.Types[0] == s128 && Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire; @@ -379,7 +408,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .customIf(IsPtrVecPred) .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0); - getActionDefinitionsBuilder(G_STORE) + StoreActions .customIf([=](const LegalityQuery &Query) { return HasRCPC3 && Query.Types[0] == s128 && Query.MMODescrs[0].Ordering == AtomicOrdering::Release; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp index d8ca5494ba50a4..fe84d0e27189f6 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -309,6 +309,8 @@ bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) { if (!Store.isSimple()) return false; LLT ValTy = MRI.getType(Store.getValueReg()); + if (ValTy.isScalableVector()) + return false; if (!ValTy.isVector() || ValTy.getSizeInBits() != 128) return false; if (Store.getMemSizeInBits() != ValTy.getSizeInBits()) @@ -653,6 +655,11 @@ bool AArch64PostLegalizerCombiner::optimizeConsecutiveMemOpAddressing( // should only be in a single block. resetState(); for (auto &MI : MBB) { + // Skip for scalable vectors + if (auto *LdSt = dyn_cast(&MI); + LdSt && MRI.getType(LdSt->getOperand(0).getReg()).isScalableVector()) + continue; + if (auto *St = dyn_cast(&MI)) { Register PtrBaseReg; APInt Offset; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp index 44ba9f0429e671..4d2a7fd4121352 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -257,6 +257,7 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, case AArch64::QQRegClassID: case AArch64::QQQRegClassID: case AArch64::QQQQRegClassID: + case AArch64::ZPRRegClassID: return getRegBank(AArch64::FPRRegBankID); case AArch64::GPR32commonRegClassID: case AArch64::GPR32RegClassID: @@ -740,12 +741,15 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { LLT Ty = MRI.getType(MO.getReg()); if (!Ty.isValid()) continue; - OpSize[Idx] = Ty.getSizeInBits(); + OpSize[Idx] = Ty.getSizeInBits().getKnownMinValue(); - // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs. + // As a top-level guess, vectors including both scalable and non-scalable + // ones go in FPRs, scalars and pointers in GPRs. // For floating-point instructions, scalars go in FPRs. - if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc) || - Ty.getSizeInBits() > 64) + if (Ty.isVector()) + OpRegBankIdx[Idx] = PMI_FirstFPR; + else if (isPreISelGenericFloatingPointOpcode(Opc) || + Ty.getSizeInBits() > 64) OpRegBankIdx[Idx] = PMI_FirstFPR; else OpRegBankIdx[Idx] = PMI_FirstGPR; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/sve-load-store.ll b/llvm/test/CodeGen/AArch64/GlobalISel/sve-load-store.ll new file mode 100644 index 00000000000000..95a5bfa4b038f1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/sve-load-store.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -global-isel -aarch64-enable-gisel-sve=true < %s | FileCheck %s + +define void @scalable_v16i8(ptr %l0, ptr %l1) { +; CHECK-LABEL: scalable_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: st1b { z0.b }, p0, [x1] +; CHECK-NEXT: ret + %l3 = load , ptr %l0, align 16 + store %l3, ptr %l1, align 16 + ret void +} + +define void @scalable_v8i16(ptr %l0, ptr %l1) { +; CHECK-LABEL: scalable_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: st1h { z0.h }, p0, [x1] +; CHECK-NEXT: ret + %l3 = load , ptr %l0, align 16 + store %l3, ptr %l1, align 16 + ret void +} + +define void @scalable_v4i32(ptr %l0, ptr %l1) { +; CHECK-LABEL: scalable_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: st1w { z0.s }, p0, [x1] +; CHECK-NEXT: ret + %l3 = load , ptr %l0, align 16 + store %l3, ptr %l1, align 16 + ret void +} + +define void @scalable_v2i64(ptr %l0, ptr %l1) { +; CHECK-LABEL: scalable_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: st1d { z0.d }, p0, [x1] +; CHECK-NEXT: ret + %l3 = load , ptr %l0, align 16 + store %l3, ptr %l1, align 16 + ret void +}