From afbbd542745f1a98a09195d7aa497b44f7179585 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 13 Nov 2025 21:54:08 -0800 Subject: [PATCH 01/22] [ADT] Fix a memory leak in SmallDenseMap (#168011) Tmp.moveFrom(*this); about 10 lines above leaves *this in a zombie state with a bucket array still allocated. This patch fixes the memory leak by calling deallocateBuckets(). --- llvm/include/llvm/ADT/DenseMap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index 9d61a91631fab..333bbcb9399ce 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -1122,6 +1122,7 @@ class SmallDenseMap Tmp.Small = false; Tmp.getLargeRep()->NumBuckets = 0; } else { + deallocateBuckets(); Small = false; NumTombstones = 0; *getLargeRep() = std::move(*Tmp.getLargeRep()); From 4d42a0c3f139f41fb7409e7831f21ab9bca40a0c Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 13 Nov 2025 21:54:16 -0800 Subject: [PATCH 02/22] [Scalar] Avoid deep copies of DenseMap (NFC) (#168012) Passing BlockOrder by value results in deep copies of DenseMap. This patch replaces them with const l-value references. --- llvm/lib/Transforms/Scalar/GVNSink.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp index d564e32e26526..4dddb017a98ee 100644 --- a/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -142,7 +142,7 @@ class ModelledPHI { for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)}); - auto ComesBefore = [BlockOrder](OpsType O1, OpsType O2) { + auto ComesBefore = [&](OpsType O1, OpsType O2) { return BlockOrder.lookup(O1.first) < BlockOrder.lookup(O2.first); }; // Sort in a deterministic order. @@ -167,8 +167,8 @@ class ModelledPHI { verifyModelledPHI(const DenseMap &BlockOrder) { assert(Values.size() > 1 && Blocks.size() > 1 && "Modelling PHI with less than 2 values"); - auto ComesBefore = [BlockOrder](const BasicBlock *BB1, - const BasicBlock *BB2) { + [[maybe_unused]] auto ComesBefore = [&](const BasicBlock *BB1, + const BasicBlock *BB2) { return BlockOrder.lookup(BB1) < BlockOrder.lookup(BB2); }; assert(llvm::is_sorted(Blocks, ComesBefore)); From daa1b60e2a2370c32e044ce08e443926028187ae Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 13 Nov 2025 21:54:24 -0800 Subject: [PATCH 03/22] [MC] Remove a redundant cast (NFC) (#168013) ISD.Symbol is already of type MCSymbolMachO *. Identified with readability-redundant-casting. --- llvm/lib/MC/MachObjectWriter.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp index a8535dfa8a5d3..b42155d18bf0a 100644 --- a/llvm/lib/MC/MachObjectWriter.cpp +++ b/llvm/lib/MC/MachObjectWriter.cpp @@ -570,8 +570,7 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) { // // FIXME: Do not hardcode. if (Asm.registerSymbol(*ISD.Symbol)) - static_cast(ISD.Symbol) - ->setReferenceTypeUndefinedLazy(true); + ISD.Symbol->setReferenceTypeUndefinedLazy(true); } } From 9efe5170bcd39686b2cb7ed0e0b80cd838060565 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 13 Nov 2025 21:54:31 -0800 Subject: [PATCH 04/22] [Hexagon] Remove redundant declarations (NFC) (#168014) These functions are declared in Hexagon.h. Identified with readability-redundant-declaration. --- llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp index 6d66237730ded..c9cb4499b1ea0 100644 --- a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp +++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp @@ -112,11 +112,6 @@ const std::map QFPInstMap{ {Hexagon::V6_vneg_qf16_hf, Hexagon::V6_vneg_qf16_qf16}}; } // namespace -namespace llvm { -FunctionPass *createHexagonQFPOptimizer(); -void initializeHexagonQFPOptimizerPass(PassRegistry &); -} // namespace llvm - namespace { struct HexagonQFPOptimizer : public MachineFunctionPass { public: From 675bf80ed43c117a0968f237313eea4522b5b353 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 13 Nov 2025 21:54:40 -0800 Subject: [PATCH 05/22] [Basic] Remove a redundant std::string::c_str (NFC) (#168015) defineMacro takes Twine, which can be constructed from const std::string &. Identified with readability-redundant-string-cstr. --- clang/lib/Basic/Targets/SystemZ.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp index 30f846cb900f8..ecd12ed34a20c 100644 --- a/clang/lib/Basic/Targets/SystemZ.cpp +++ b/clang/lib/Basic/Targets/SystemZ.cpp @@ -205,7 +205,7 @@ void SystemZTargetInfo::getTargetDefines(const LangOptions &Opts, Librel |= V.getSubminor().value_or(0); Str += llvm::utohexstr(Librel); - Builder.defineMacro("__TARGET_LIB__", Str.c_str()); + Builder.defineMacro("__TARGET_LIB__", Str); } } From 851f8f79847d527ffe2d69995ce9edd7c7e9695c Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 14 Nov 2025 14:30:12 +0800 Subject: [PATCH 06/22] [VPlan] Disable partial reductions again with EVL tail folding (#167863) VPPartialReductionRecipe doesn't yet support an EVL variant, and we guard against this by not calling convertToAbstractRecipes when we're tail folding with EVL. However recently some things got shuffled around which means we may detect some scaled reductions in collectScaledReductions and store them in ScaledReductionMap, where outside of convertToAbstractRecipes we may look them up and start e.g. adding a scale factor to an otherwise regular VPReductionPHI. This fixes it by skipping collectScaledReductions, and fixes #167861 --- .../Transforms/Vectorize/LoopVectorize.cpp | 4 +- .../RISCV/partial-reduce-dot-product.ll | 125 ++++++++++++++++++ 2 files changed, 128 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 83844287adc5f..9f0d6fcb237ef 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8395,7 +8395,9 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // --------------------------------------------------------------------------- VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE, Builder, BlockMaskCache, LVer); - RecipeBuilder.collectScaledReductions(Range); + // TODO: Handle partial reductions with EVL tail folding. + if (!CM.foldTailWithEVL()) + RecipeBuilder.collectScaledReductions(Range); // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll index 61e3a1848ceed..8d3026e63748a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll @@ -3,6 +3,7 @@ ; RUN: opt -passes=loop-vectorize -mattr=+v,+experimental-zvqdotq -prefer-predicate-over-epilogue=scalar-epilogue -S < %s | FileCheck %s --check-prefixes=CHECK,ZVQDOTQ ; RUN: opt -passes=loop-vectorize -mattr=+v -scalable-vectorization=off -prefer-predicate-over-epilogue=scalar-epilogue -S < %s | FileCheck %s --check-prefixes=FIXED,FIXED-V ; RUN: opt -passes=loop-vectorize -mattr=+v,+experimental-zvqdotq -scalable-vectorization=off -prefer-predicate-over-epilogue=scalar-epilogue -S < %s | FileCheck %s --check-prefixes=FIXED,FIXED-ZVQDOTQ +; RUN: opt -passes=loop-vectorize -mattr=+v,+experimental-zvqdotq -S < %s | FileCheck %s --check-prefixes=CHECK,TAILFOLD ; TODO: Remove -prefer-predicate-over-epilogue=scalar-epilogue when partial reductions with EVL tail folding is supported. @@ -147,6 +148,37 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ: for.exit: ; FIXED-ZVQDOTQ-NEXT: ret i32 [[TMP13]] ; +; TAILFOLD-LABEL: define i32 @vqdot( +; TAILFOLD-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; TAILFOLD-NEXT: entry: +; TAILFOLD-NEXT: br label [[VECTOR_PH:%.*]] +; TAILFOLD: vector.ph: +; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] +; TAILFOLD: vector.body: +; TAILFOLD-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; TAILFOLD-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; TAILFOLD-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; TAILFOLD-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; TAILFOLD-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[EVL_BASED_IV]] +; TAILFOLD-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP1]], splat (i1 true), i32 [[TMP0]]) +; TAILFOLD-NEXT: [[TMP2:%.*]] = sext [[VP_OP_LOAD]] to +; TAILFOLD-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[EVL_BASED_IV]] +; TAILFOLD-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP3]], splat (i1 true), i32 [[TMP0]]) +; TAILFOLD-NEXT: [[TMP4:%.*]] = sext [[VP_OP_LOAD1]] to +; TAILFOLD-NEXT: [[TMP5:%.*]] = mul [[TMP4]], [[TMP2]] +; TAILFOLD-NEXT: [[TMP6:%.*]] = add [[TMP5]], [[VEC_PHI]] +; TAILFOLD-NEXT: [[TMP7]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP6]], [[VEC_PHI]], i32 [[TMP0]]) +; TAILFOLD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP0]] to i64 +; TAILFOLD-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]] +; TAILFOLD-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] +; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; TAILFOLD-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; TAILFOLD: middle.block: +; TAILFOLD-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP7]]) +; TAILFOLD-NEXT: br label [[FOR_EXIT:%.*]] +; TAILFOLD: for.exit: +; TAILFOLD-NEXT: ret i32 [[TMP10]] +; entry: br label %for.body @@ -309,6 +341,37 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ: for.exit: ; FIXED-ZVQDOTQ-NEXT: ret i32 [[TMP13]] ; +; TAILFOLD-LABEL: define i32 @vqdotu( +; TAILFOLD-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; TAILFOLD-NEXT: entry: +; TAILFOLD-NEXT: br label [[VECTOR_PH:%.*]] +; TAILFOLD: vector.ph: +; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] +; TAILFOLD: vector.body: +; TAILFOLD-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; TAILFOLD-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; TAILFOLD-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; TAILFOLD-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; TAILFOLD-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[EVL_BASED_IV]] +; TAILFOLD-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP1]], splat (i1 true), i32 [[TMP0]]) +; TAILFOLD-NEXT: [[TMP2:%.*]] = zext [[VP_OP_LOAD]] to +; TAILFOLD-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[EVL_BASED_IV]] +; TAILFOLD-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP3]], splat (i1 true), i32 [[TMP0]]) +; TAILFOLD-NEXT: [[TMP4:%.*]] = zext [[VP_OP_LOAD1]] to +; TAILFOLD-NEXT: [[TMP5:%.*]] = mul [[TMP4]], [[TMP2]] +; TAILFOLD-NEXT: [[TMP6:%.*]] = add [[TMP5]], [[VEC_PHI]] +; TAILFOLD-NEXT: [[TMP7]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP6]], [[VEC_PHI]], i32 [[TMP0]]) +; TAILFOLD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP0]] to i64 +; TAILFOLD-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]] +; TAILFOLD-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] +; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; TAILFOLD-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; TAILFOLD: middle.block: +; TAILFOLD-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP7]]) +; TAILFOLD-NEXT: br label [[FOR_EXIT:%.*]] +; TAILFOLD: for.exit: +; TAILFOLD-NEXT: ret i32 [[TMP10]] +; entry: br label %for.body @@ -471,6 +534,37 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ: for.exit: ; FIXED-ZVQDOTQ-NEXT: ret i32 [[TMP13]] ; +; TAILFOLD-LABEL: define i32 @vqdotsu( +; TAILFOLD-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; TAILFOLD-NEXT: entry: +; TAILFOLD-NEXT: br label [[VECTOR_PH:%.*]] +; TAILFOLD: vector.ph: +; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] +; TAILFOLD: vector.body: +; TAILFOLD-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; TAILFOLD-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; TAILFOLD-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; TAILFOLD-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; TAILFOLD-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[EVL_BASED_IV]] +; TAILFOLD-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP1]], splat (i1 true), i32 [[TMP0]]) +; TAILFOLD-NEXT: [[TMP2:%.*]] = zext [[VP_OP_LOAD]] to +; TAILFOLD-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[EVL_BASED_IV]] +; TAILFOLD-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP3]], splat (i1 true), i32 [[TMP0]]) +; TAILFOLD-NEXT: [[TMP4:%.*]] = sext [[VP_OP_LOAD1]] to +; TAILFOLD-NEXT: [[TMP5:%.*]] = mul [[TMP4]], [[TMP2]] +; TAILFOLD-NEXT: [[TMP6:%.*]] = add [[TMP5]], [[VEC_PHI]] +; TAILFOLD-NEXT: [[TMP7]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP6]], [[VEC_PHI]], i32 [[TMP0]]) +; TAILFOLD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP0]] to i64 +; TAILFOLD-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]] +; TAILFOLD-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] +; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; TAILFOLD-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TAILFOLD: middle.block: +; TAILFOLD-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP7]]) +; TAILFOLD-NEXT: br label [[FOR_EXIT:%.*]] +; TAILFOLD: for.exit: +; TAILFOLD-NEXT: ret i32 [[TMP10]] +; entry: br label %for.body @@ -632,6 +726,37 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ: for.exit: ; FIXED-ZVQDOTQ-NEXT: ret i32 [[TMP13]] ; +; TAILFOLD-LABEL: define i32 @vqdotsu2( +; TAILFOLD-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; TAILFOLD-NEXT: entry: +; TAILFOLD-NEXT: br label [[VECTOR_PH:%.*]] +; TAILFOLD: vector.ph: +; TAILFOLD-NEXT: br label [[VECTOR_BODY:%.*]] +; TAILFOLD: vector.body: +; TAILFOLD-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; TAILFOLD-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; TAILFOLD-NEXT: [[AVL:%.*]] = phi i64 [ 1024, [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; TAILFOLD-NEXT: [[TMP0:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true) +; TAILFOLD-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[EVL_BASED_IV]] +; TAILFOLD-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP1]], splat (i1 true), i32 [[TMP0]]) +; TAILFOLD-NEXT: [[TMP2:%.*]] = sext [[VP_OP_LOAD]] to +; TAILFOLD-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[EVL_BASED_IV]] +; TAILFOLD-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i8.p0(ptr align 1 [[TMP3]], splat (i1 true), i32 [[TMP0]]) +; TAILFOLD-NEXT: [[TMP4:%.*]] = zext [[VP_OP_LOAD1]] to +; TAILFOLD-NEXT: [[TMP5:%.*]] = mul [[TMP4]], [[TMP2]] +; TAILFOLD-NEXT: [[TMP6:%.*]] = add [[TMP5]], [[VEC_PHI]] +; TAILFOLD-NEXT: [[TMP7]] = call @llvm.vp.merge.nxv4i32( splat (i1 true), [[TMP6]], [[VEC_PHI]], i32 [[TMP0]]) +; TAILFOLD-NEXT: [[TMP8:%.*]] = zext i32 [[TMP0]] to i64 +; TAILFOLD-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]] +; TAILFOLD-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] +; TAILFOLD-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 +; TAILFOLD-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; TAILFOLD: middle.block: +; TAILFOLD-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP7]]) +; TAILFOLD-NEXT: br label [[FOR_EXIT:%.*]] +; TAILFOLD: for.exit: +; TAILFOLD-NEXT: ret i32 [[TMP10]] +; entry: br label %for.body From 6a8072838c753fd91e366a96db8d597272018423 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Fri, 14 Nov 2025 08:36:48 +0100 Subject: [PATCH 07/22] [clang][bytecode][NFC] Limit pointer and array ops to integer indices (#168022) AluOpcode includes fixed point as well, which isn't possible here. --- clang/lib/AST/ByteCode/Opcodes.td | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/clang/lib/AST/ByteCode/Opcodes.td b/clang/lib/AST/ByteCode/Opcodes.td index ddf1a8fcc98b1..a236f89dcf78b 100644 --- a/clang/lib/AST/ByteCode/Opcodes.td +++ b/clang/lib/AST/ByteCode/Opcodes.td @@ -361,8 +361,14 @@ def NarrowPtr : Opcode; // [Pointer] -> [Pointer] def ExpandPtr : Opcode; // [Pointer, Offset] -> [Pointer] -def ArrayElemPtr : AluOpcode; -def ArrayElemPtrPop : AluOpcode; +def ArrayElemPtr : Opcode { + let Types = [IntegralTypeClass]; + let HasGroup = 1; +} +def ArrayElemPtrPop : Opcode { + let Types = [IntegralTypeClass]; + let HasGroup = 1; +} def ArrayElemPop : Opcode { let Args = [ArgUint32]; @@ -536,9 +542,15 @@ def InitElemPop : Opcode { //===----------------------------------------------------------------------===// // [Pointer, Integral] -> [Pointer] -def AddOffset : AluOpcode; +def AddOffset : Opcode { + let Types = [IntegralTypeClass]; + let HasGroup = 1; +} // [Pointer, Integral] -> [Pointer] -def SubOffset : AluOpcode; +def SubOffset : Opcode { + let Types = [IntegralTypeClass]; + let HasGroup = 1; +} // [Pointer, Pointer] -> [Integral] def SubPtr : Opcode { From 6b16b31bbd861d634f8577b00a132c18a636bc2a Mon Sep 17 00:00:00 2001 From: Brandon Wu Date: Fri, 14 Nov 2025 15:48:37 +0800 Subject: [PATCH 08/22] [llvm][RISCV] Support P extension CodeGen (#167882) This patch support PADD_W, PSUB_W, PSADD_W, PSADDU_W, PSSUB_W, PSSUBU_W, PAADD_W and PAADDU_W --- llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 14 ++ llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 176 +++++++++++++++++++++++ 2 files changed, 190 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 7637047aabf2d..126a39996c741 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1539,6 +1539,20 @@ let Predicates = [HasStdExtP, IsRV64] in { // 32-bit PLI SD node pattern def: Pat<(v2i32 (riscv_pli simm10:$imm10)), (PLI_W simm10:$imm10)>; + // Basic 32-bit arithmetic patterns + def: Pat<(v2i32 (add GPR:$rs1, GPR:$rs2)), (PADD_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (sub GPR:$rs1, GPR:$rs2)), (PSUB_W GPR:$rs1, GPR:$rs2)>; + + // 32-bit saturating add/sub patterns + def: Pat<(v2i32 (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (uaddsat GPR:$rs1, GPR:$rs2)), (PSADDU_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (ssubsat GPR:$rs1, GPR:$rs2)), (PSSUB_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (usubsat GPR:$rs1, GPR:$rs2)), (PSSUBU_W GPR:$rs1, GPR:$rs2)>; + + // 32-bit averaging patterns + def: Pat<(v2i32 (avgfloors GPR:$rs1, GPR:$rs2)), (PAADD_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (avgflooru GPR:$rs1, GPR:$rs2)), (PAADDU_W GPR:$rs1, GPR:$rs2)>; + // 32-bit averaging-sub patterns def: Pat<(v2i32 (riscv_pasub GPR:$rs1, GPR:$rs2)), (PASUB_W GPR:$rs1, GPR:$rs2)>; def: Pat<(v2i32 (riscv_pasubu GPR:$rs1, GPR:$rs2)), (PASUBU_W GPR:$rs1, GPR:$rs2)>; diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll index 000a95fb6e0f8..353039e9482e9 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll @@ -495,6 +495,182 @@ define void @test_extract_vector_32(ptr %ret_ptr, ptr %a_ptr) { ret void } +; Test basic add/sub operations for v2i32 (RV64 only) +define void @test_padd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_padd_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: padd.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = add <2 x i32> %a, %b + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_psub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_psub_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: psub.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = sub <2 x i32> %a, %b + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +; Test saturating add operations for v2i32 (RV64 only) +define void @test_psadd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_psadd_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: psadd.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b) + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_psaddu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_psaddu_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: psaddu.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b) + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +; Test saturating sub operations for v2i32 (RV64 only) +define void @test_pssub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_pssub_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pssub.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b) + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_pssubu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_pssubu_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pssubu.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b) + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +; Test averaging floor signed operations for v2i32 (RV64 only) +; avgfloors pattern: (a + b) arithmetic shift right 1 +define void @test_paadd_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_paadd_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: paadd.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %ext.a = sext <2 x i32> %a to <2 x i64> + %ext.b = sext <2 x i32> %b to <2 x i64> + %add = add nsw <2 x i64> %ext.a, %ext.b + %shift = ashr <2 x i64> %add, + %res = trunc <2 x i64> %shift to <2 x i32> + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +; Test averaging floor unsigned operations for v2i32 (RV64 only) +; avgflooru pattern: (a & b) + ((a ^ b) >> 1) +define void @test_paaddu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_paaddu_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: paaddu.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %and = and <2 x i32> %a, %b + %xor = xor <2 x i32> %a, %b + %shift = lshr <2 x i32> %xor, + %res = add <2 x i32> %and, %shift + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +; Test averaging floor subtraction signed for v2i32 (RV64 only) +; pasub pattern: (a - b) arithmetic shift right 1 +define void @test_pasub_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_pasub_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pasub.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %a_ext = sext <2 x i32> %a to <2 x i64> + %b_ext = sext <2 x i32> %b to <2 x i64> + %sub = sub <2 x i64> %a_ext, %b_ext + %res = ashr <2 x i64> %sub, + %res_trunc = trunc <2 x i64> %res to <2 x i32> + store <2 x i32> %res_trunc, ptr %ret_ptr + ret void +} + +; Test averaging floor subtraction unsigned for v2i32 (RV64 only) +; pasubu pattern: (a - b) logical shift right 1 +define void @test_pasubu_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_pasubu_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pasubu.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %a_ext = zext <2 x i32> %a to <2 x i64> + %b_ext = zext <2 x i32> %b to <2 x i64> + %sub = sub <2 x i64> %a_ext, %b_ext + %res = lshr <2 x i64> %sub, + %res_trunc = trunc <2 x i64> %res to <2 x i32> + store <2 x i32> %res_trunc, ptr %ret_ptr + ret void +} + ; Intrinsic declarations declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) From 9822905b38096726534d712daeb9efa7c5fef158 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 14 Nov 2025 09:54:59 +0200 Subject: [PATCH 09/22] [libcxx] [doc] Update the docs about LIBCXX_ENABLE_FILESYSTEM (#167843) Since 1939eb3dc2330af6fb9609a7c3bd5276e127c9ce, std::filesystem is enabled by default in MSVC builds too. --- libcxx/docs/VendorDocumentation.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcxx/docs/VendorDocumentation.rst b/libcxx/docs/VendorDocumentation.rst index 7eba598909a74..9ce12d565b7eb 100644 --- a/libcxx/docs/VendorDocumentation.rst +++ b/libcxx/docs/VendorDocumentation.rst @@ -162,10 +162,10 @@ General purpose options .. option:: LIBCXX_ENABLE_FILESYSTEM:BOOL - **Default**: ``ON`` except on Windows when using MSVC. + **Default**: ``ON`` This option can be used to enable or disable the filesystem components on - platforms that may not support them. For example on Windows when using MSVC. + platforms that may not support them. .. option:: LIBCXX_ENABLE_WIDE_CHARACTERS:BOOL From 80ae168f221526c0f4c9543e861a773214e012cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 14 Nov 2025 09:56:10 +0200 Subject: [PATCH 10/22] [libcxx] [doc] Document the supported target versions of Windows (#167845) The llvm-mingw toolchains defaults to `_WIN32_WINNT=0x601`, so this configuration is covered by our CI build matrix. --- libcxx/docs/VendorDocumentation.rst | 6 ++++++ libcxx/docs/index.rst | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/libcxx/docs/VendorDocumentation.rst b/libcxx/docs/VendorDocumentation.rst index 9ce12d565b7eb..15677c7428263 100644 --- a/libcxx/docs/VendorDocumentation.rst +++ b/libcxx/docs/VendorDocumentation.rst @@ -376,6 +376,12 @@ newer (19.14) is required. Libc++ also supports being built with clang targeting MinGW environments. +Libc++ supports Windows 7 or newer. However, the minimum runtime version +of the build is determined by the ``_WIN32_WINNT`` define, which in many +SDKs defaults to the latest version. To build a version that runs on an +older version, define e.g. ``_WIN32_WINNT=0x601`` while building libc++, +to target Windows 7. + CMake + Visual Studio --------------------- diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst index 03dfb9d41aa1a..d006b52f24cec 100644 --- a/libcxx/docs/index.rst +++ b/libcxx/docs/index.rst @@ -147,7 +147,7 @@ macOS 10.13+ i386, x86_64, arm64 FreeBSD 12+ i386, x86_64, arm Linux i386, x86_64, arm, arm64 Only glibc-2.24 and later and no other libc is officially supported Android 5.0+ i386, x86_64, arm, arm64 -Windows i386, x86_64, arm64 Both MSVC and MinGW style environments, ABI in MSVC environments is :doc:`unstable ` +Windows 7+ i386, x86_64, arm64 Both MSVC and MinGW style environments, ABI in MSVC environments is :doc:`unstable ` AIX 7.2TL5+ powerpc, powerpc64 Embedded (picolibc) arm ===================== ========================= ============================ From b5c459d3a1c7dcd442ab216fc0d6d624bae78358 Mon Sep 17 00:00:00 2001 From: Abhishek Varma Date: Fri, 14 Nov 2025 14:32:23 +0530 Subject: [PATCH 11/22] [Linalg] Add basic infra to add matchers for linalg.*conv*/*pool* ops (#163724) -- This commit includes the basic infra/utilities to add matchers for linalg.*conv*/*pool* ops - such that given a `linalg.generic` op it identifies which linalg.*conv*/*pool* op it is. -- It adds a few representative linalg.*conv*/*pool* ops to demo the matchers' capability and does so as part of `linalg-specialize-generic-ops` pass. -- The goal is directed towards addressing the aim of [[RFC] Op explosion in Linalg](https://discourse.llvm.org/t/rfc-op-explosion-in-linalg/82863) iteratively for `*conv*/*pooling*` ops. -- This is part-1 of a series of PRs aimed to add matchers for Convolution ops. -- For further details, refer to https://github.com/llvm/llvm-project/pull/163374#pullrequestreview-3341048722 Signed-off-by: Abhishek Varma --- .../include/mlir/Dialect/Linalg/Utils/Utils.h | 11 + .../Dialect/Linalg/Transforms/Specialize.cpp | 50 ++ mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 579 ++++++++++++++++++ .../convolution/roundtrip-convolution.mlir | 119 ++++ 4 files changed, 759 insertions(+) create mode 100644 mlir/test/Dialect/Linalg/convolution/roundtrip-convolution.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index de07f500a8669..9da01f30b52d2 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -102,6 +102,17 @@ GenericOp makeMemRefCopyOp(OpBuilder &b, Location loc, Value from, Value to); std::optional> getReassociationMapForFoldingUnitDims(ArrayRef mixedSizes); +//===----------------------------------------------------------------------===// +// Convolution matcher utility +//===----------------------------------------------------------------------===// + +/// Given a linalg `op` this function returns true if it is a convolution op of +/// type `ConvOpTy` and populates `dilations` and `strides` with values inferred +/// from the indexing maps. +template +bool isaConvolutionOpOfType(LinalgOp op, SmallVector *dilations, + SmallVector *strides); + //===----------------------------------------------------------------------===// // Fusion / Tiling utilities //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp index 40fc0d68e358f..249a74b007dce 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Specialize.cpp @@ -237,6 +237,51 @@ static FailureOr specializeLinalgContractions(RewriterBase &rewriter, return replaceWithMatmulVariant(rewriter, genericOp); } +/// Utility to specialize a `genericOp` with a convolution op of type `ConvOpTy` +/// with `dilations` and `strides`. +template +static FailureOr +specializeToConvOp(RewriterBase &rewriter, GenericOp genericOp, + ArrayRef dilations, ArrayRef strides) { + SmallVector inputs = genericOp.getDpsInputs(); + ValueRange outputs = genericOp.getDpsInits(); + SmallVector indexingMaps = genericOp.getIndexingMapsArray(); + SmallVector resultTypes = genericOp.hasPureTensorSemantics() + ? TypeRange(ValueRange(outputs)) + : TypeRange{}; + Attribute stridesAttr = rewriter.getI64TensorAttr(strides); + Attribute dilationsAttr = rewriter.getI64TensorAttr(dilations); + LinalgOp namedOp = rewriter.replaceOpWithNewOp( + genericOp, resultTypes, inputs, outputs, stridesAttr, dilationsAttr); + return namedOp; +} + +/// Converts linalg.generic to named linalg.*conv/pooling* where possible. +static FailureOr specializeLinalgConvolutions(RewriterBase &rewriter, + GenericOp genericOp) { + SmallVector dilations, strides; +#define CONV_OP_SPECIALIZER(ConvOpTy) \ + if (isaConvolutionOpOfType(genericOp, &dilations, &strides)) \ + return specializeToConvOp(rewriter, genericOp, dilations, \ + strides); \ + // ----------------------------- + // Depthwise Convolution ops. + // ----------------------------- + CONV_OP_SPECIALIZER(linalg::DepthwiseConv1DNwcWcOp); + CONV_OP_SPECIALIZER(linalg::DepthwiseConv2DNchwChwOp); + CONV_OP_SPECIALIZER(linalg::DepthwiseConv3DNdhwcDhwcmOp); + // ----------------------------- + // Pooling ops. + // ----------------------------- + CONV_OP_SPECIALIZER(linalg::PoolingNhwcMaxOp); + CONV_OP_SPECIALIZER(linalg::PoolingNhwcMinOp); + CONV_OP_SPECIALIZER(linalg::PoolingNhwcSumOp); + CONV_OP_SPECIALIZER(linalg::PoolingNhwcMaxUnsignedOp); + CONV_OP_SPECIALIZER(linalg::PoolingNhwcMinUnsignedOp); +#undef CONV_OP_SPECIALIZER + return failure(); +} + } // namespace //===----------------------------------------------------------------------===// @@ -316,6 +361,11 @@ FailureOr mlir::linalg::specializeGenericOp(RewriterBase &rewriter, if (isaContractionOpInterface(genericOp)) { return specializeLinalgContractions(rewriter, genericOp); } + + // Convolution - e.g. *conv/pooling* + if (isaConvolutionOpInterface(genericOp)) { + return specializeLinalgConvolutions(rewriter, genericOp); + } return failure(); } diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 6eeb2063e0a9e..5dd5e1b055f0d 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -235,6 +235,585 @@ bool isReductionIterator(utils::IteratorType iteratorType) { return iteratorType == utils::IteratorType::reduction; } +//===----------------------------------------------------------------------===// +// Convolution matcher utilities +//===----------------------------------------------------------------------===// + +/// Returns the BlockArgument that leads to `val`, if any. Traverses optional +/// ext* ops. +static BlockArgument getBlockArgumentWithOptionalExtOps(Value val) { + BlockArgument blockArg = dyn_cast(val); + if ((blockArg)) + return blockArg; + + Operation *defOp = val.getDefiningOp(); + if (!dyn_cast_if_present(defOp) && + !dyn_cast_if_present(defOp) && + !dyn_cast_if_present(defOp)) { + return nullptr; + } + return dyn_cast(defOp->getOperand(0)); +} + +/// Utility to match block body for convolution ops. +/// The body is thus expected to yield :- +/// %out + (%lhs * %rhs) +/// where: %lhs, %rhs and %out are block arguments and +/// %lhs and %rhs can have optional upcast operation. +static bool bodyMatcherForConvolutionOps(Value yieldVal, Block *body) { + Operation *addOp = yieldVal.getDefiningOp(); + if (!isa_and_present(addOp)) + return false; + + Operation *mulOp = addOp->getOperand(1).getDefiningOp(); + if (!isa_and_present(mulOp)) + return false; + + BlockArgument lhsBlockArg = + getBlockArgumentWithOptionalExtOps(mulOp->getOperand(0)); + BlockArgument rhsBlockArg = + getBlockArgumentWithOptionalExtOps(mulOp->getOperand(1)); + BlockArgument outBlockArg = + getBlockArgumentWithOptionalExtOps(addOp->getOperand(0)); + if (!lhsBlockArg || !rhsBlockArg || !outBlockArg || + lhsBlockArg.getOwner() != body || rhsBlockArg.getOwner() != body || + outBlockArg.getOwner() != body || lhsBlockArg.getArgNumber() != 0 || + rhsBlockArg.getArgNumber() != 1 || outBlockArg.getArgNumber() != 2) + return false; + return true; +} + +/// Utility to match block body for linalg.pool* ops. +template +static bool bodyMatcherForPoolOps(Value yieldVal, Block *body) { + Operation *defOp = yieldVal.getDefiningOp(); + if (!(isa_and_present(defOp) || ...)) + return false; + + BlockArgument lhsArg = + getBlockArgumentWithOptionalExtOps(defOp->getOperand(0)); + BlockArgument rhsArg = + getBlockArgumentWithOptionalExtOps(defOp->getOperand(1)); + if (!lhsArg || !rhsArg || lhsArg.getOwner() != body || + rhsArg.getOwner() != body || lhsArg.getArgNumber() != 2 || + rhsArg.getArgNumber() != 0) + return false; + return true; +} + +static bool bodyMatcherForMaxSignedPoolOps(Value yieldVal, Block *body) { + return bodyMatcherForPoolOps(yieldVal, + body); +} + +// max_unsigned ops should not allow float data type. +// TODO(#164800): Retire OPDSL logic. +static bool bodyMatcherForMaxUnsignedPoolOps(Value yieldVal, Block *body) { + return bodyMatcherForPoolOps(yieldVal, + body); +} + +static bool bodyMatcherForMinSignedPoolOps(Value yieldVal, Block *body) { + return bodyMatcherForPoolOps(yieldVal, + body); +} + +// min_unsigned ops should not allow float data type. +// TODO(#164800): Retire OPDSL logic. +static bool bodyMatcherForMinUnsignedPoolOps(Value yieldVal, Block *body) { + return bodyMatcherForPoolOps(yieldVal, + body); +} + +static bool bodyMatcherForSumPoolOps(Value yieldVal, Block *body) { + return bodyMatcherForPoolOps(yieldVal, body); +} + +static AffineExpr getAffineMapDim(ArrayAttr indexingMaps, uint32_t mapIndex, + uint32_t dimIndex) { + auto affineMap = cast(indexingMaps[mapIndex]).getValue(); + if (dimIndex < affineMap.getNumResults()) + return affineMap.getResult(dimIndex); + return nullptr; +} + +/// Check if `expr` is either: +/// - a dimension expr alone (implying multiplication by 1), or +/// - a multiplication of dimension expr by any positive constant != 1 +/// In both cases we will capture the dimension expression into `dim` and +/// return the constant multiplier. Returns -1 in case of a match failure. +static int64_t isDimTimesConstantOrDimOnly(AffineExpr expr, AffineExpr &dim) { + if ((dim = dyn_cast(expr))) + return 1; + + auto mulExpr = dyn_cast(expr); + if (!mulExpr || mulExpr.getKind() != AffineExprKind::Mul) + return -1; + + AffineExpr lhs = mulExpr.getLHS(); + AffineExpr rhs = mulExpr.getRHS(); + + AffineConstantExpr cst = nullptr; + if (((dim = dyn_cast(lhs)) && + (cst = dyn_cast(rhs))) || + ((dim = dyn_cast(rhs)) && + (cst = dyn_cast(lhs)))) + return cst.getValue(); + return -1; +} + +/// Given an array of AffineMaps `indexingMaps` verify the following +/// commutatively:- +/// indexingMaps[0].getResult(iDim) == +/// indexingMaps[1].getResult(fDim) * + +/// indexingMaps[n-1].getResult(oDim) * +/// where, +/// - c0 and c1 can be any constant, +/// - n is the size of the indexingMaps' array, +/// - 0, 1 and n-1 are input, filter and output map indices respectively, +/// - iDim, fDim and oDim are the input, filter and output dimension +/// indices in their respective indexing maps +/// Example: +/// #inputMap = affine_map<(d0, d1, d2, d3, d4, d5, d6) +/// -> (d0, d1 * 2 + d4 * 3, d2 + d5, d6)> +/// #filterMap = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d4, d5, d6, d3)> +/// #outputMap = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3)> +/// +/// Here, +/// #inputMap[1] = #outputMap[1] * 2 + #filterMap[0] * 3 +/// Therefore, +/// matchConvDimAddExprPattern(indexingMaps, 1, 0, 1, dilation, stride) +/// would return true and update dilation = 3 and stride = 2 +static bool matchConvDimAddExprPattern(ArrayAttr indexingMaps, unsigned iDim, + unsigned fDim, unsigned oDim, + int64_t &dilation, int64_t &stride) { + unsigned inputMapIdx = 0, filterMapIdx = 1, + outputMapIdx = indexingMaps.size() - 1; + AffineExpr inpExpr = getAffineMapDim(indexingMaps, inputMapIdx, iDim); + auto addExpr = dyn_cast(inpExpr); + if (!addExpr || addExpr.getKind() != AffineExprKind::Add) + return false; + + AffineExpr dim0, dim1; + int64_t c0 = isDimTimesConstantOrDimOnly(addExpr.getLHS(), dim0); + int64_t c1 = isDimTimesConstantOrDimOnly(addExpr.getRHS(), dim1); + + if (c0 == -1 || c1 == -1) + return false; + // Pattern matched with dims and constants extracted. + AffineExpr fExpr = getAffineMapDim(indexingMaps, filterMapIdx, fDim); + AffineExpr oExpr = getAffineMapDim(indexingMaps, outputMapIdx, oDim); + if (dim0 == fExpr && dim1 == oExpr) { + dilation = c0; + stride = c1; + return true; + } + if (dim1 == fExpr && dim0 == oExpr) { + dilation = c1; + stride = c0; + return true; + } + return false; +} + +// --------------------------------------------- +// Matchers for specific convolution operation. +// --------------------------------------------- + +/// Returns true if the given indexing maps matches with the expected indexing +/// maps. +static bool convLayoutMatches(ArrayRef> mapListExpected, + ArrayAttr indexingMaps, MLIRContext *context) { + SmallVector expectedIndexingMaps = + AffineMap::inferFromExprList(mapListExpected, context); + return indexingMaps == + ArrayAttr::get( + context, llvm::to_vector<4>(llvm::map_range( + expectedIndexingMaps, [&](AffineMap m) -> Attribute { + return AffineMapAttr::get(m); + }))); +} + +// #inputMap = affine_map<(N, W, C, w) -> (N, W + w, C)> +// #filterMap = affine_map<(N, W, C, w) -> (w, C)> +// #outputMap = affine_map<(N, W, C, w) -> (N, W, C)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + *dilations = SmallVector(1, 1); + *strides = SmallVector(1, 1); + MLIRContext *context = op->getContext(); + AffineExpr N = getAffineDimExpr(0, context); + AffineExpr W = getAffineDimExpr(1, context); + AffineExpr C = getAffineDimExpr(2, context); + AffineExpr w = getAffineDimExpr(3, context); + ArrayAttr indexingMaps = op.getIndexingMaps(); + // First fetch dilations/strides :- + // Match: W * stride + w * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0, + /*oDim=*/1, (*dilations)[0], (*strides)[0])) + return false; + // Match expected indexing maps + if (!convLayoutMatches( + {/*inputMap=*/{N, W * (*strides)[0] + w * (*dilations)[0], C}, + /*filterMap=*/{w, C}, + /*outputMap=*/{N, W, C}}, + indexingMaps, context)) + return false; + // Match body + Block *body = op.getBlock(); + auto yieldOp = cast(body->getTerminator()); + Value yieldVal = yieldOp.getOperand(0); + return bodyMatcherForConvolutionOps(yieldVal, body); +} + +// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, C, H + h, W + w)> +// #filterMap = affine_map<(N, H, W, C, h, w) -> (C, h, w)> +// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, C, H, W)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + *dilations = SmallVector(2, 1); + *strides = SmallVector(2, 1); + MLIRContext *context = op->getContext(); + AffineExpr N = getAffineDimExpr(0, context); + AffineExpr H = getAffineDimExpr(1, context); + AffineExpr W = getAffineDimExpr(2, context); + AffineExpr C = getAffineDimExpr(3, context); + AffineExpr h = getAffineDimExpr(4, context); + AffineExpr w = getAffineDimExpr(5, context); + ArrayAttr indexingMaps = op.getIndexingMaps(); + // First fetch dilations/strides :- + // Match: H * stride + h * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1, + /*oDim=*/2, (*dilations)[0], (*strides)[0])) + return false; + // Match: W * stride + w * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/3, /*fDim=*/2, + /*oDim=*/3, (*dilations)[1], (*strides)[1])) + return false; + // Match expected indexing maps + if (!convLayoutMatches( + {/*inputMap=*/{N, C, H * (*strides)[0] + h * (*dilations)[0], + W * (*strides)[1] + w * (*dilations)[1]}, + /*filterMap=*/{C, h, w}, + /*outputMap=*/{N, C, H, W}}, + indexingMaps, context)) + return false; + // Match body + Block *body = op.getBlock(); + auto yieldOp = cast(body->getTerminator()); + Value yieldVal = yieldOp.getOperand(0); + return bodyMatcherForConvolutionOps(yieldVal, body); +} + +// #inputMap = affine_map<(N, D, H, W, CM, d, h, w, C) +// -> (N, D + d, H + h, W + w, C)> +// #filterMap = affine_map<(N, D, H, W, CM, d, h, w, C) +// -> (d, h, w, C, CM)> +// #outputMap = affine_map<(N, D, H, W, CM, d, h, w, C) +// -> (N, D, H, W, C, CM)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + *dilations = SmallVector(3, 1); + *strides = SmallVector(3, 1); + MLIRContext *context = op->getContext(); + AffineExpr N = getAffineDimExpr(0, context); + AffineExpr D = getAffineDimExpr(1, context); + AffineExpr H = getAffineDimExpr(2, context); + AffineExpr W = getAffineDimExpr(3, context); + AffineExpr CM = getAffineDimExpr(4, context); + AffineExpr d = getAffineDimExpr(5, context); + AffineExpr h = getAffineDimExpr(6, context); + AffineExpr w = getAffineDimExpr(7, context); + AffineExpr C = getAffineDimExpr(8, context); + ArrayAttr indexingMaps = op.getIndexingMaps(); + // First fetch dilations/strides :- + // Match: D * stride + d * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0, + /*oDim=*/1, (*dilations)[0], (*strides)[0])) + return false; + // Match: H * stride + h * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1, + /*oDim=*/2, (*dilations)[1], (*strides)[1])) + return false; + // Match: W * stride + w * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/3, /*fDim=*/2, + /*oDim=*/3, (*dilations)[2], (*strides)[2])) + return false; + // Match expected indexing maps + if (!convLayoutMatches( + {/*inputMap=*/{N, D * (*strides)[0] + d * (*dilations)[0], + H * (*strides)[1] + h * (*dilations)[1], + W * (*strides)[2] + w * (*dilations)[2], C}, + /*filterMap=*/{d, h, w, C, CM}, + /*outputMap=*/{N, D, H, W, C, CM}}, + indexingMaps, context)) + return false; + // Match body + Block *body = op.getBlock(); + auto yieldOp = cast(body->getTerminator()); + Value yieldVal = yieldOp.getOperand(0); + return bodyMatcherForConvolutionOps(yieldVal, body); +} + +// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)> +// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)> +// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + *dilations = SmallVector(2, 1); + *strides = SmallVector(2, 1); + MLIRContext *context = op->getContext(); + AffineExpr N = getAffineDimExpr(0, context); + AffineExpr H = getAffineDimExpr(1, context); + AffineExpr W = getAffineDimExpr(2, context); + AffineExpr C = getAffineDimExpr(3, context); + AffineExpr h = getAffineDimExpr(4, context); + AffineExpr w = getAffineDimExpr(5, context); + ArrayAttr indexingMaps = op.getIndexingMaps(); + // First fetch dilations/strides :- + // Match: H * stride + h * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0, + /*oDim=*/1, (*dilations)[0], (*strides)[0])) + return false; + // Match: W * stride + w * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1, + /*oDim=*/2, (*dilations)[1], (*strides)[1])) + return false; + // Match expected indexing maps + if (!convLayoutMatches( + {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0], + W * (*strides)[1] + w * (*dilations)[1], C}, + /*filterMap=*/{h, w}, + /*outputMap=*/{N, H, W, C}}, + indexingMaps, context)) + return false; + // Match body + Block *body = op.getBlock(); + auto yieldOp = cast(body->getTerminator()); + Value yieldVal = yieldOp.getOperand(0); + return bodyMatcherForMaxSignedPoolOps(yieldVal, body); +} + +// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)> +// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)> +// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + *dilations = SmallVector(2, 1); + *strides = SmallVector(2, 1); + MLIRContext *context = op->getContext(); + AffineExpr N = getAffineDimExpr(0, context); + AffineExpr H = getAffineDimExpr(1, context); + AffineExpr W = getAffineDimExpr(2, context); + AffineExpr C = getAffineDimExpr(3, context); + AffineExpr h = getAffineDimExpr(4, context); + AffineExpr w = getAffineDimExpr(5, context); + ArrayAttr indexingMaps = op.getIndexingMaps(); + // First fetch dilations/strides :- + // Match: H * stride + h * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0, + /*oDim=*/1, (*dilations)[0], (*strides)[0])) + return false; + // Match: W * stride + w * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1, + /*oDim=*/2, (*dilations)[1], (*strides)[1])) + return false; + // Match expected indexing maps + if (!convLayoutMatches( + {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0], + W * (*strides)[1] + w * (*dilations)[1], C}, + /*filterMap=*/{h, w}, + /*outputMap=*/{N, H, W, C}}, + indexingMaps, context)) + return false; + // Match body + Block *body = op.getBlock(); + auto yieldOp = cast(body->getTerminator()); + Value yieldVal = yieldOp.getOperand(0); + return bodyMatcherForMinSignedPoolOps(yieldVal, body); +} + +// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)> +// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)> +// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + *dilations = SmallVector(2, 1); + *strides = SmallVector(2, 1); + MLIRContext *context = op->getContext(); + AffineExpr N = getAffineDimExpr(0, context); + AffineExpr H = getAffineDimExpr(1, context); + AffineExpr W = getAffineDimExpr(2, context); + AffineExpr C = getAffineDimExpr(3, context); + AffineExpr h = getAffineDimExpr(4, context); + AffineExpr w = getAffineDimExpr(5, context); + ArrayAttr indexingMaps = op.getIndexingMaps(); + // First fetch dilations/strides :- + // Match: H * stride + h * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0, + /*oDim=*/1, (*dilations)[0], (*strides)[0])) + return false; + // Match: W * stride + w * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1, + /*oDim=*/2, (*dilations)[1], (*strides)[1])) + return false; + // Match expected indexing maps + if (!convLayoutMatches( + {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0], + W * (*strides)[1] + w * (*dilations)[1], C}, + /*filterMap=*/{h, w}, + /*outputMap=*/{N, H, W, C}}, + indexingMaps, context)) + return false; + // Match body + Block *body = op.getBlock(); + auto yieldOp = cast(body->getTerminator()); + Value yieldVal = yieldOp.getOperand(0); + return bodyMatcherForSumPoolOps(yieldVal, body); +} + +// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)> +// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)> +// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + *dilations = SmallVector(2, 1); + *strides = SmallVector(2, 1); + MLIRContext *context = op->getContext(); + AffineExpr N = getAffineDimExpr(0, context); + AffineExpr H = getAffineDimExpr(1, context); + AffineExpr W = getAffineDimExpr(2, context); + AffineExpr C = getAffineDimExpr(3, context); + AffineExpr h = getAffineDimExpr(4, context); + AffineExpr w = getAffineDimExpr(5, context); + ArrayAttr indexingMaps = op.getIndexingMaps(); + // First fetch dilations/strides :- + // Match: H * stride + h * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0, + /*oDim=*/1, (*dilations)[0], (*strides)[0])) + return false; + // Match: W * stride + w * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1, + /*oDim=*/2, (*dilations)[1], (*strides)[1])) + return false; + // Match expected indexing maps + if (!convLayoutMatches( + {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0], + W * (*strides)[1] + w * (*dilations)[1], C}, + /*filterMap=*/{h, w}, + /*outputMap=*/{N, H, W, C}}, + indexingMaps, context)) + return false; + // Match body + Block *body = op.getBlock(); + auto yieldOp = cast(body->getTerminator()); + Value yieldVal = yieldOp.getOperand(0); + return bodyMatcherForMaxUnsignedPoolOps(yieldVal, body); +} + +// #inputMap = affine_map<(N, H, W, C, h, w) -> (N, H + h, W + w, C)> +// #filterMap = affine_map<(N, H, W, C, h, w) -> (h, w)> +// #outputMap = affine_map<(N, H, W, C, h, w) -> (N, H, W, C)> +template <> +bool isaConvolutionOpOfType( + LinalgOp op, SmallVector *dilations, + SmallVector *strides) { + if (isa(op)) + return true; + + assert(isaConvolutionOpInterface(op) && + "expected op to implement ConvolutionOpInterface"); + + *dilations = SmallVector(2, 1); + *strides = SmallVector(2, 1); + MLIRContext *context = op->getContext(); + AffineExpr N = getAffineDimExpr(0, context); + AffineExpr H = getAffineDimExpr(1, context); + AffineExpr W = getAffineDimExpr(2, context); + AffineExpr C = getAffineDimExpr(3, context); + AffineExpr h = getAffineDimExpr(4, context); + AffineExpr w = getAffineDimExpr(5, context); + ArrayAttr indexingMaps = op.getIndexingMaps(); + // First fetch dilations/strides :- + // Match: H * stride + h * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/1, /*fDim=*/0, + /*oDim=*/1, (*dilations)[0], (*strides)[0])) + return false; + // Match: W * stride + w * dilation + if (!matchConvDimAddExprPattern(indexingMaps, /*iDim=*/2, /*fDim=*/1, + /*oDim=*/2, (*dilations)[1], (*strides)[1])) + return false; + // Match expected indexing maps + if (!convLayoutMatches( + {/*inputMap=*/{N, H * (*strides)[0] + h * (*dilations)[0], + W * (*strides)[1] + w * (*dilations)[1], C}, + /*filterMap=*/{h, w}, + /*outputMap=*/{N, H, W, C}}, + indexingMaps, context)) + return false; + // Match body + Block *body = op.getBlock(); + auto yieldOp = cast(body->getTerminator()); + Value yieldVal = yieldOp.getOperand(0); + return bodyMatcherForMinUnsignedPoolOps(yieldVal, body); +} + Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type, Value source, Value pad, bool nofold, ValueRange typeDynDims) { diff --git a/mlir/test/Dialect/Linalg/convolution/roundtrip-convolution.mlir b/mlir/test/Dialect/Linalg/convolution/roundtrip-convolution.mlir new file mode 100644 index 0000000000000..8f22cc749bee9 --- /dev/null +++ b/mlir/test/Dialect/Linalg/convolution/roundtrip-convolution.mlir @@ -0,0 +1,119 @@ +// The following test examples of linalg convolution named ops lowered to linalg.generic and then +// lifted back up to named op. +// RUN: mlir-opt %s -linalg-generalize-named-ops | mlir-opt --linalg-specialize-generic-ops | FileCheck %s --implicit-check-not=linalg.generic + +// NOTE: Most tests in this file use dynamic shapes as the underlying transformations don't modify shapes. There's one exception that's added as a smoke test. +func.func @depthwise_conv_1d_nwc_wc_static(%input: tensor<1x25x8xi8>, %filter: tensor<3x8xi8>, %output: tensor<1x10x8xi32>) -> tensor<1x10x8xi32> { + %0 = linalg.depthwise_conv_1d_nwc_wc + {dilations = dense<3> : tensor<1xi64>, strides = dense<2> : tensor<1xi64>} + ins (%input, %filter: tensor<1x25x8xi8>, tensor<3x8xi8>) + outs (%output: tensor<1x10x8xi32>) -> tensor<1x10x8xi32> + return %0 : tensor<1x10x8xi32> +} +// CHECK: @depthwise_conv_1d_nwc_wc_static +// CHECK: linalg.depthwise_conv_1d_nwc_wc +// CHECK-SAME: dilations = dense<3> : tensor<1xi64>, strides = dense<2> : tensor<1xi64> + +// ----- + +func.func @depthwise_conv_2d_nchw_chw(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.depthwise_conv_2d_nchw_chw + {dilations = dense<[2,3]> : vector<2xi64>, strides = dense<[4,5]> : vector<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @depthwise_conv_2d_nchw_chw +// CHECK: linalg.depthwise_conv_2d_nchw_chw +// CHECK-SAME: dilations = dense<[2, 3]> : tensor<2xi64>, strides = dense<[4, 5]> : tensor<2xi64> + +// ----- + +func.func @depthwise_conv_3d_ndhwc_dhwcm(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.depthwise_conv_3d_ndhwc_dhwcm + {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @depthwise_conv_3d_ndhwc_dhwcm +// CHECK: linalg.depthwise_conv_3d_ndhwc_dhwcm +// CHECK-SAME: dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64> + +// ----- + +func.func @pooling_nhwc_max(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.pooling_nhwc_max + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @pooling_nhwc_max +// CHECK: linalg.pooling_nhwc_max +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +func.func @pooling_nhwc_min(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.pooling_nhwc_min + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @pooling_nhwc_min +// CHECK: linalg.pooling_nhwc_min +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +func.func @pooling_nhwc_sum(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.pooling_nhwc_sum + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @pooling_nhwc_sum +// CHECK: linalg.pooling_nhwc_sum +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +func.func @pooling_nhwc_max_unsigned(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.pooling_nhwc_max_unsigned + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @pooling_nhwc_max_unsigned +// CHECK: linalg.pooling_nhwc_max_unsigned +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +func.func @pooling_nhwc_min_unsigned_integer(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.pooling_nhwc_min_unsigned + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @pooling_nhwc_min_unsigned_integer +// CHECK: linalg.pooling_nhwc_min_unsigned +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> + +// ----- + +func.func @pooling_nhwc_min_unsigned_float(%input: tensor, %filter: tensor, %output: tensor) -> tensor { + %0 = linalg.pooling_nhwc_min_unsigned + {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} + ins (%input, %filter: tensor, tensor) + outs (%output: tensor) -> tensor + return %0 : tensor +} +// CHECK: @pooling_nhwc_min_unsigned_float +// CHECK: linalg.pooling_nhwc_min +// CHECK-SAME: dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> From bf07226c6d6aaf3b8f230e4e36e8aac8e40d8c4d Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 14 Nov 2025 04:03:21 -0500 Subject: [PATCH 12/22] [libc++] Reorganize and fix the libc++ CI dockerfiles (#167530) Instead of having one large Dockerfile building multiple images with relatively confusing inheritance, explicitly have three standalone Dockerfiles each building one image. Then, tie the three images together using the docker-compose file which explicitly versions the base image used by the Android and the Github Actions images. --- .github/workflows/libcxx-build-containers.yml | 47 +-- libcxx/docs/Contributing.rst | 60 ++-- libcxx/utils/ci/Dockerfile | 329 ------------------ libcxx/utils/ci/docker-compose.yml | 40 --- .../ci/docker/android-builder.dockerfile | 114 ++++++ libcxx/utils/ci/docker/docker-compose.yml | 38 ++ .../ci/docker/linux-builder-base.dockerfile | 148 ++++++++ .../utils/ci/docker/linux-builder.dockerfile | 38 ++ 8 files changed, 384 insertions(+), 430 deletions(-) delete mode 100644 libcxx/utils/ci/Dockerfile delete mode 100644 libcxx/utils/ci/docker-compose.yml create mode 100644 libcxx/utils/ci/docker/android-builder.dockerfile create mode 100644 libcxx/utils/ci/docker/docker-compose.yml create mode 100644 libcxx/utils/ci/docker/linux-builder-base.dockerfile create mode 100644 libcxx/utils/ci/docker/linux-builder.dockerfile diff --git a/.github/workflows/libcxx-build-containers.yml b/.github/workflows/libcxx-build-containers.yml index 4bce86145fc0c..530e94df976c1 100644 --- a/.github/workflows/libcxx-build-containers.yml +++ b/.github/workflows/libcxx-build-containers.yml @@ -15,11 +15,11 @@ on: branches: - main paths: - - 'libcxx/utils/ci/**' + - 'libcxx/utils/ci/docker/**' - '.github/workflows/libcxx-build-containers.yml' pull_request: paths: - - 'libcxx/utils/ci/**' + - 'libcxx/utils/ci/docker/**' - '.github/workflows/libcxx-build-containers.yml' jobs: @@ -40,17 +40,18 @@ jobs: echo '{ "data-root": "/mnt/docker" }' | sudo tee /etc/docker/daemon.json sudo systemctl restart docker - - name: Build the Linux builder image - working-directory: libcxx/utils/ci - run: | - docker compose build builder-base - docker compose build actions-builder + - name: Build the base image + run: docker compose --file libcxx/utils/ci/docker/docker-compose.yml build libcxx-linux-builder-base + env: + TAG: ${{ github.sha }} + + - name: Build the Linux Github Actions image + run: docker compose --file libcxx/utils/ci/docker/docker-compose.yml build libcxx-linux-builder env: TAG: ${{ github.sha }} - name: Build the Android builder image - working-directory: libcxx/utils/ci - run: docker compose build android-buildkite-builder + run: docker compose --file libcxx/utils/ci/docker/docker-compose.yml build libcxx-android-builder env: TAG: ${{ github.sha }} @@ -61,19 +62,21 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Push the Linux builder image + - name: Push the images if: github.event_name == 'push' - working-directory: libcxx/utils/ci - run: | - docker compose push builder-base - docker compose push actions-builder - env: - TAG: ${{ github.sha }} + run: docker compose push libcxx-linux-builder-base libcxx-linux-builder libcxx-android-builder - - name: Push the Android builder image - if: github.event_name == 'push' - working-directory: libcxx/utils/ci + # We create tarballs with the images and upload them as artifacts, since that's useful for testing + # the images when making changes. + - name: Create image tarballs run: | - docker compose push android-buildkite-builder - env: - TAG: ${{ github.sha }} + docker image save ghcr.io/llvm/libcxx-linux-builder-base:${{ github.sha }} | gzip > libcxx-linux-builder-base.tar.gz + docker image save ghcr.io/llvm/libcxx-linux-builder:${{ github.sha }} | gzip > libcxx-linux-builder.tar.gz + docker image save ghcr.io/llvm/libcxx-android-builder:${{ github.sha }} | gzip > libcxx-android-builder.tar.gz + - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0 + with: + name: libcxx-docker-images + path: | + libcxx-linux-builder-base.tar.gz + libcxx-linux-builder.tar.gz + libcxx-android-builder.tar.gz diff --git a/libcxx/docs/Contributing.rst b/libcxx/docs/Contributing.rst index 4e9d1ba52b47e..b814ccfd0ac9a 100644 --- a/libcxx/docs/Contributing.rst +++ b/libcxx/docs/Contributing.rst @@ -269,12 +269,12 @@ Updating the CI testing container images ---------------------------------------- The libcxx linux premerge testing can run on one of three sets of runner -groups. The three runner group names are "llvm-premerge-libcxx-runners", -"llvm-premerge-libcxx-release-runners" and "llvm-premerge-libcxx-next-runners". -Which runner set to use is controlled by the contents of +groups. The three runner group names are ``llvm-premerge-libcxx-runners``, +``llvm-premerge-libcxx-release-runners`` and ``llvm-premerge-libcxx-next-runners``. +The runner set to use is controlled by the contents of https://github.com/llvm/llvm-project/blob/main/.github/workflows/libcxx-build-and-test.yaml. -By default, it uses "llvm-premerge-libcxx-runners". To switch to one of the -other runner sets, just replace all uses of "llvm-premerge-libcxx-runners" in +By default, it uses ``llvm-premerge-libcxx-runners``. To switch to one of the +other runner sets, just replace all uses of ``llvm-premerge-libcxx-runners`` in the yaml file with the desired runner set. Which container image is used by these three runner sets is controlled @@ -282,7 +282,7 @@ and set by the variable values in https://github.com/llvm/llvm-zorg/blob/main/premerge/premerge_resources/variables.tf. The table below shows the variable names and the runner sets to which they correspond. To see their values, follow the -link above (to variables.tf in llvm-zorg). +link above (to ``variables.tf`` in llvm-zorg). +------------------------------------+---------------------------+ |Runner Set |Variable | @@ -295,39 +295,21 @@ link above (to variables.tf in llvm-zorg). +------------------------------------+---------------------------+ -When updating the container image you can either update just the -runner binary (the part the connects to Github), or you can update -everything (tools, etc.). Whether to update just the runner or to update -everything is controlled by the value of ``ACTIONS_BASE_IMAGE``, under -``actions-builder`` in ``libcxx/utils/ci/docker-compose.yml``. - -To update just the runner binary, change the value of ``ACTIONS_BASE_IMAGE`` -to be a modified version of one of the libcxx runner variable images from -https://github.com/llvm/llvm-zorg/blob/main/premerge/premerge_resources/variables.tf, -as follows: Find the libcxx runner image name you want to use from the -variables.tf file. The name will be something like -``ghcr.io/llvm/libcxx-linux-builder:``. Replace -``libcxx-linux-builder`` with ``libcxx-linux-builder-base``. Use this new image -name as the value you assign to ``ACTIONS_BASE_IMAGE``. - -To update the entire container image, set the value of ``ACTIONS_BASE_IMAGE`` -to ``builder-base``. If the value is already ``builder-base`` (there -have been no just-the-runner updates since the last complete update), then you -need to find the line containing ``RUN echo "Last forced update executed on`` -in ``libcxx/utils/ci/Dockerfile`` and update the date to be the current date. - -Once you have created and merged a PR with those changes, a new image -will be created, and a link to it can be found at -https://github.com/llvm/llvm-project/pkgs/container/libcxx-linux-builder, -where the actual image name should be -``ghcr.io/llvm/libcxx-linux-builder:``. - -Lastly you need to create a PR in the llvm-zorg repository, -updating the the value of the appropriate libcxx runner variable in -the variables.tf file mentioned above to the name of your newly created -image (see above paragraph about finding the image name). Once that change -has been merged, an LLVM premerge maintainer (a Google employee) must use -terraform to apply the change to the running GKE cluster. +When updating the container image you can either update just the runner binary (the part +that connects to Github), or you can update everything (tools, etc.). To update the runner +binary, bump the value of ``GITHUB_RUNNER_VERSION`` in ``libcxx/utils/ci/docker/docker-compose.yml``. +To update all of the tools, bump ``BASE_IMAGE_VERSION`` to a newer version of the ``libcxx-linux-builder-base`` +image. You can see all versions of that image at https://github.com/llvm/llvm-project/pkgs/container/libcxx-linux-builder-base. + +On push to ``main``, a new version of both the ``libcxx-linux-builder`` and the ``libcxx-android-builder`` +images will be built and pushed to https://github.com/llvm/llvm-project/packages. + +You can then update the image used by the actual runners by changing the sha associated +to ``libcxx_runner_image``, ``libcxx_release_runner_image`` or ``libcxx_next_runner_image`` +in `the Terraform configuration file `_. +To do so, you will need to create a PR in the llvm-zorg repository and wait for it to be +merged. Once that change has been merged, an LLVM premerge maintainer (a Google employee) +must use terraform to apply the change to the running GKE cluster. Monitoring premerge testing performance diff --git a/libcxx/utils/ci/Dockerfile b/libcxx/utils/ci/Dockerfile deleted file mode 100644 index d22deec4dadab..0000000000000 --- a/libcxx/utils/ci/Dockerfile +++ /dev/null @@ -1,329 +0,0 @@ -# ===----------------------------------------------------------------------===## -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# ===----------------------------------------------------------------------===## -# -# This file defines the buildkite and github actions builder images. -# This images are tagged with . You can build both images using: -# -# TAG= docker compose build -# -# Or you can select a single image to build -# -# TAG=test docker compose build actions-builder -# -# The final images can be found at -# -# ghcr.io/libcxx/libcxx-linux-builder -# ghcr.io/libcxx/android-buildkite-builder -# -# Members of the github.com/libcxx/ organizations can push new images to the CI. -# This is done by GitHub actions in the https://github.com/libcxx/builders repo. -# -# ===----------------------------------------------------------------------===## -# Running the buildkite image -# ===----------------------------------------------------------------------===## -# -# To start a Buildkite Agent, run it as: -# $ docker run --env-file -it $(docker build -q libcxx/utils/ci) -# -# The environment variables in `` should be the ones necessary -# to run a BuildKite agent: -# -# BUILDKITE_AGENT_TOKEN= -# -# If you're only looking to run the Docker image locally for debugging a -# build bot, see the `run-buildbot-container` script located in this directory. - -ARG ACTIONS_BASE_IMAGE - -# HACK: We set the base image in the docker-compose file depending on the final target (buildkite vs github actions). -# This means we have a much slower container build, but we can use the same Dockerfile for both targets. -ARG BASE_IMAGE -FROM $BASE_IMAGE AS builder-base - -# Changing this file causes a rebuild of the image in a GitHub action. -# However, it does not cause the CI runners to switch to that image -# automatically, that must be done by updating the SHA in the Github workflow -# file. The date uses the ISO format YYYY-MM-DD. -RUN echo "Last forced update executed on 2025-04-28." - -# Make sure apt-get doesn't try to prompt for stuff like our time zone, etc. -ENV DEBIAN_FRONTEND=noninteractive - -# populated in the docker-compose file -ARG GCC_HEAD_VERSION -ENV GCC_HEAD_VERSION=${GCC_HEAD_VERSION} - -# populated in the docker-compose file -ARG LLVM_HEAD_VERSION -ENV LLVM_HEAD_VERSION=${LLVM_HEAD_VERSION} - -# HACK: The github actions runner image already has sudo and requires its use. The buildkite base image does not. -# Reconcile this. -RUN <, and ToT, which are the ones we support. -# We also install because we need to support the "latest-1" of the -# current LLVM release branch, which is effectively the of the -# tip-of-trunk LLVM. For example, after branching LLVM 14 but before branching -# LLVM 15, we still need to have Clang 12 in this Docker image because the LLVM -# 14 release branch CI uses it. The tip-of-trunk CI will never use Clang 12, -# though. -RUN <> /home/libcxx-builder/.buildkite-agent/buildkite-agent.cfg -EOF - -USER libcxx-builder -WORKDIR /home/libcxx-builder - -ENV PATH="${PATH}:/home/libcxx-builder/.buildkite-agent/bin" - -CMD ["buildkite-agent", "start"] - -# ===----------------------------------------------------------------------===## -# Android Buildkite Builder Image -# ===----------------------------------------------------------------------===## -# -# IMAGE: ghcr.io/libcxx/android-buildkite-builder. -# -FROM buildkite-builder AS android-buildkite-builder - -COPY --from=android-builder-base /opt/android /opt/android -COPY ./vendor/android/container-setup.sh /opt/android/container-setup.sh - -ENV PATH="/opt/android/sdk/platform-tools:${PATH}" - -USER root - -# Install Docker -RUN <> /etc/sudoers -WORKDIR /home/gha -USER gha - -ENV RUNNER_MANUALLY_TRAP_SIG=1 -ENV ACTIONS_RUNNER_PRINT_LOG_TO_STDOUT=1 -RUN mkdir actions-runner && \ - cd actions-runner && \ - curl -O -L https://github.com/actions/runner/releases/download/v$GITHUB_RUNNER_VERSION/actions-runner-linux-x64-$GITHUB_RUNNER_VERSION.tar.gz && \ - tar xzf ./actions-runner-linux-x64-$GITHUB_RUNNER_VERSION.tar.gz && \ - rm ./actions-runner-linux-x64-$GITHUB_RUNNER_VERSION.tar.gz diff --git a/libcxx/utils/ci/docker-compose.yml b/libcxx/utils/ci/docker-compose.yml deleted file mode 100644 index 9367a8f1de6b8..0000000000000 --- a/libcxx/utils/ci/docker-compose.yml +++ /dev/null @@ -1,40 +0,0 @@ -x-versions: &compiler_versions - GCC_HEAD_VERSION: 16 - LLVM_HEAD_VERSION: 22 - -x-image-versions: &image_versions - BASE_IMAGE: docker.io/library/ubuntu:jammy - ACTIONS_BASE_IMAGE: ghcr.io/llvm/libcxx-linux-builder-base:77cb0980bcc2675b27d08141526939423fa0be76 - -services: - builder-base: - image: ghcr.io/llvm/libcxx-linux-builder-base:${TAG} - build: - context: . - dockerfile: Dockerfile - target: builder-base - args: - <<: [*image_versions, *compiler_versions] - - actions-builder: - image: ghcr.io/llvm/libcxx-linux-builder:${TAG} - build: - context: . - dockerfile: Dockerfile - target: actions-builder - args: - GITHUB_RUNNER_VERSION: "2.329.0" - <<: [*image_versions, *compiler_versions] - - android-buildkite-builder: - image: ghcr.io/llvm/libcxx-android-builder:${TAG} - build: - context: . - dockerfile: Dockerfile - target: android-buildkite-builder - args: - BASE_IMAGE: docker.io/library/ubuntu:noble - ANDROID_CLANG_VERSION: r563880 - ANDROID_CLANG_PREBUILTS_COMMIT: 6ae4184bb8706f9731569b9a0a82be3fcdcb951c - ANDROID_SYSROOT_COMMIT: f8b85cc5262c6e5cbc9a92c1bab2b18b32a4c63f - <<: [*image_versions, *compiler_versions] diff --git a/libcxx/utils/ci/docker/android-builder.dockerfile b/libcxx/utils/ci/docker/android-builder.dockerfile new file mode 100644 index 0000000000000..9c5d5047dbb86 --- /dev/null +++ b/libcxx/utils/ci/docker/android-builder.dockerfile @@ -0,0 +1,114 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## +# +# This file defines the image we use to run Android testing on Buildkite. +# From the root of the monorepo, this image can be built with: +# +# $ docker build --file libcxx/utils/ci/docker/android-builder.dockerfile \ +# --build-arg BASE_IMAGE_VERSION= \ +# --build-arg ANDROID_CLANG_VERSION= \ +# --build-arg ANDROID_CLANG_PREBUILTS_COMMIT= \ +# --build-arg ANDROID_SYSROOT_COMMIT= . +# +# This image also gets built on every push to `main` that modifies these Docker +# files, and can be found at ghcr.io/llvm/libcxx-android-builder. +# +# To run the image and start a Buildkite Agent, run it as: +# +# $ docker run --env-file -it ghcr.io/llvm/libcxx-android-builder:latest +# +# The environment variables in `` should be the ones necessary +# to run a BuildKite agent: +# +# BUILDKITE_AGENT_TOKEN= + +ARG BASE_IMAGE_VERSION +FROM ghcr.io/llvm/libcxx-linux-builder-base:${BASE_IMAGE_VERSION} + +ARG ANDROID_CLANG_VERSION +ARG ANDROID_CLANG_PREBUILTS_COMMIT +ARG ANDROID_SYSROOT_COMMIT + +# Install the Android platform tools (e.g. adb) into /opt/android/sdk. +RUN < +# + +services: + libcxx-linux-builder-base: + image: ghcr.io/llvm/libcxx-linux-builder-base:${TAG:-latest} + build: + context: ../../../.. # monorepo root + dockerfile: libcxx/utils/ci/docker/linux-builder-base.dockerfile + args: + GCC_HEAD_VERSION: 16 + LLVM_HEAD_VERSION: 22 + + libcxx-linux-builder: + image: ghcr.io/llvm/libcxx-linux-builder:${TAG:-latest} + build: + context: ../../../.. # monorepo root + dockerfile: libcxx/utils/ci/docker/linux-builder.dockerfile + args: + BASE_IMAGE_VERSION: 825943e06f840710177e5514c4f61c9e73660c44 + GITHUB_RUNNER_VERSION: 2.329.0 + + libcxx-android-builder: + image: ghcr.io/llvm/libcxx-android-builder:${TAG:-latest} + build: + context: ../../../.. # monorepo root + dockerfile: libcxx/utils/ci/docker/android-builder.dockerfile + args: + BASE_IMAGE_VERSION: 825943e06f840710177e5514c4f61c9e73660c44 + ANDROID_CLANG_VERSION: r563880 + ANDROID_CLANG_PREBUILTS_COMMIT: 6ae4184bb8706f9731569b9a0a82be3fcdcb951c + ANDROID_SYSROOT_COMMIT: f8b85cc5262c6e5cbc9a92c1bab2b18b32a4c63f diff --git a/libcxx/utils/ci/docker/linux-builder-base.dockerfile b/libcxx/utils/ci/docker/linux-builder-base.dockerfile new file mode 100644 index 0000000000000..af09f15a72747 --- /dev/null +++ b/libcxx/utils/ci/docker/linux-builder-base.dockerfile @@ -0,0 +1,148 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## +# +# This file defines the base image we use for Linux testing using Github Actions. +# From the root of the monorepo, this image can be built with: +# +# $ docker build --file libcxx/utils/ci/docker/linux-builder-base.dockerfile \ +# --build-arg GCC_HEAD_VERSION= \ +# --build-arg LLVM_HEAD_VERSION= . +# +# This image also gets built on every push to `main` that modifies these Docker +# files, and can be found at ghcr.io/libcxx/libcxx-linux-builder-base . + +FROM docker.io/library/ubuntu:noble + +# Changing this file causes a rebuild of the image in a GitHub action. However, it does not cause +# the CI runners to switch to that image automatically, that must be done by updating the image used +# by the libc++ self-hosted runners in llvm-zorg. The date uses the ISO format YYYY-MM-DD. +RUN echo "Last forced update executed on 2025-11-11." + +# Make sure apt-get doesn't try to prompt for stuff like our time zone, etc. +ENV DEBIAN_FRONTEND=noninteractive + +# populated in the docker-compose file +ARG GCC_HEAD_VERSION +ENV GCC_HEAD_VERSION=${GCC_HEAD_VERSION} + +# populated in the docker-compose file +ARG LLVM_HEAD_VERSION +ENV LLVM_HEAD_VERSION=${LLVM_HEAD_VERSION} + +# Install sudo and setup passwordless sudo. +RUN <, and ToT, which are the ones we support. +# We also install because we need to support the "latest-1" of the +# current LLVM release branch, which is effectively the of the +# tip-of-trunk LLVM. For example, after branching LLVM 14 but before branching +# LLVM 15, we still need to have Clang 12 in this Docker image because the LLVM +# 14 release branch CI uses it. The tip-of-trunk CI will never use Clang 12, +# though. +RUN < \ +# --build-arg GITHUB_RUNNER_VERSION= . +# +# This image also gets built on every push to `main` that modifies these Docker +# files, and can be found at ghcr.io/llvm/libcxx-linux-builder. + +ARG BASE_IMAGE_VERSION +FROM ghcr.io/llvm/libcxx-linux-builder-base:${BASE_IMAGE_VERSION} + +ARG GITHUB_RUNNER_VERSION + +# Setup the user +RUN useradd gha -u 1001 -m -s /bin/bash +RUN adduser gha sudo +RUN echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers +WORKDIR /home/gha +USER gha + +# Install the Github Actions runner +ENV RUNNER_MANUALLY_TRAP_SIG=1 +ENV ACTIONS_RUNNER_PRINT_LOG_TO_STDOUT=1 +RUN mkdir actions-runner && \ + cd actions-runner && \ + curl -O -L https://github.com/actions/runner/releases/download/v$GITHUB_RUNNER_VERSION/actions-runner-linux-x64-$GITHUB_RUNNER_VERSION.tar.gz && \ + tar xzf ./actions-runner-linux-x64-$GITHUB_RUNNER_VERSION.tar.gz && \ + rm ./actions-runner-linux-x64-$GITHUB_RUNNER_VERSION.tar.gz From 3277f6caef110359046e32983fee37932b8f9ac2 Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Fri, 14 Nov 2025 17:14:07 +0800 Subject: [PATCH 13/22] [LV] Explicitly disable in-loop reductions for AnyOf and FindIV. nfc (#163541) Currently, in-loop reductions for AnyOf and FindIV are not supported. They were implicitly blocked. This happened because RecurrenceDescriptor::getReductionOpChain could not detect their recurrence chain. The reason is that RecurrenceDescriptor::getOpcode was set to Instruction::Or, but the recurrence chains of AnyOf and FindIV do not actually contain an Instruction::Or. This patch explicitly disables in-loop reductions for AnyOf and FindIV instead of relying on getReductionOpChain to implicitly prevent them. --- llvm/lib/Analysis/IVDescriptors.cpp | 12 +++++++----- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 7 ++++++- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 7 +++---- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 9f8ac6e8e2e0b..641850b46bbd8 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -1220,11 +1220,6 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { return Instruction::Add; case RecurKind::Mul: return Instruction::Mul; - case RecurKind::AnyOf: - case RecurKind::FindFirstIVSMin: - case RecurKind::FindFirstIVUMin: - case RecurKind::FindLastIVSMax: - case RecurKind::FindLastIVUMax: case RecurKind::Or: return Instruction::Or; case RecurKind::And: @@ -1248,6 +1243,13 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { case RecurKind::FMaximumNum: case RecurKind::FMinimumNum: return Instruction::FCmp; + case RecurKind::AnyOf: + case RecurKind::FindFirstIVSMin: + case RecurKind::FindFirstIVUMin: + case RecurKind::FindLastIVSMax: + case RecurKind::FindLastIVUMax: + // TODO: Set AnyOf and FindIV to Instruction::Select once in-loop reductions + // are supported. default: llvm_unreachable("Unknown recurrence operation"); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9f0d6fcb237ef..58fcab40d5894 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6591,9 +6591,14 @@ void LoopVectorizationCostModel::collectInLoopReductions() { if (RdxDesc.getRecurrenceType() != Phi->getType()) continue; + // In-loop AnyOf and FindIV reductions are not yet supported. + RecurKind Kind = RdxDesc.getRecurrenceKind(); + if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) || + RecurrenceDescriptor::isFindIVRecurrenceKind(Kind)) + continue; + // If the target would prefer this reduction to happen "in-loop", then we // want to record it as such. - RecurKind Kind = RdxDesc.getRecurrenceKind(); if (!PreferInLoopReductions && !useOrderedReductions(RdxDesc) && !TTI.preferInLoopReduction(Kind, Phi->getType())) continue; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index e0e6990c56ec7..aa85bd435ee9e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -821,10 +821,9 @@ Value *VPInstruction::generate(VPTransformState &State) { auto *OrigPhi = cast(PhiR->getUnderlyingValue()); Value *ReducedPartRdx = State.get(getOperand(2)); for (unsigned Idx = 3; Idx < getNumOperands(); ++Idx) - ReducedPartRdx = Builder.CreateBinOp( - (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode( - RecurKind::AnyOf), - State.get(getOperand(Idx)), ReducedPartRdx, "bin.rdx"); + ReducedPartRdx = + Builder.CreateBinOp(Instruction::Or, State.get(getOperand(Idx)), + ReducedPartRdx, "bin.rdx"); return createAnyOfReduction(Builder, ReducedPartRdx, State.get(getOperand(1), VPLane(0)), OrigPhi); } From b2a81888f55735e9249ef25b6014a989e7f03969 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 14 Nov 2025 01:20:04 -0800 Subject: [PATCH 14/22] Destroy tasks as they are run in the thread pool (#167852) Without this, any RAII objects held in the task's captures aren't destroyed in a similar fashion to the task being run. If those objects in turn interact with the thread pool itself, chaos ensues. This comes up quite naturally with RAII-objects used for synchronization such as RAII-powered latches or releasing a mutex, etc. A unit test is crafted that tries to very directly test that the logic of the thread pool continues to hold even with an RAII object. This isn't the only type of failure mode (a deadlock due to mutexes in the captures can also occur), but seemed the easiest to test. --- llvm/lib/Support/ThreadPool.cpp | 9 +++++-- llvm/unittests/Support/ThreadPool.cpp | 37 +++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp index 4779e673cc055..2f9ff13109b61 100644 --- a/llvm/lib/Support/ThreadPool.cpp +++ b/llvm/lib/Support/ThreadPool.cpp @@ -110,8 +110,13 @@ void StdThreadPool::processTasks(ThreadPoolTaskGroup *WaitingForGroup) { CurrentThreadTaskGroups->push_back(GroupOfTask); #endif - // Run the task we just grabbed - Task(); + // Run the task we just grabbed. This also destroys the task once run to + // release any resources held by it through RAII captured objects. + // + // It is particularly important to do this here so that we're not holding + // any lock and any further operations on the thread or `ThreadPool` take + // place here, at the same point as the task itself is executed. + std::exchange(Task, {})(); #ifndef NDEBUG CurrentThreadTaskGroups->pop_back(); diff --git a/llvm/unittests/Support/ThreadPool.cpp b/llvm/unittests/Support/ThreadPool.cpp index b5268c82e4199..7f7274740db7d 100644 --- a/llvm/unittests/Support/ThreadPool.cpp +++ b/llvm/unittests/Support/ThreadPool.cpp @@ -8,6 +8,7 @@ #include "llvm/Support/ThreadPool.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Config/llvm-config.h" // for LLVM_ENABLE_THREADS @@ -197,6 +198,42 @@ TYPED_TEST(ThreadPoolTest, AsyncMoveOnly) { ASSERT_EQ(42, f.get()); } +TYPED_TEST(ThreadPoolTest, AsyncRAIICaptures) { + CHECK_UNSUPPORTED(); + DefaultThreadPool Pool(hardware_concurrency(2)); + + // We use a task group and a non-atomic value to stress test that the chaining + // of tasks via a captured RAII object in fact chains and synchronizes within + // a group. + ThreadPoolTaskGroup Group(Pool); + int value = 0; + + // Create an RAII object that when destroyed schedules more work. This makes + // it easy to check that the RAII is resolved at the same point as a task runs + // on the thread pool. + auto schedule_next = llvm::make_scope_exit([&Group, &value] { + // We sleep before scheduling the final task to make it much more likely + // that an incorrect implementation actually exbitits a bug. Without the + // sleep, we may get "lucky" and have the second task finish before the + // assertion below fails even with an incorrect implementaiton. The + // sleep is making _failures_ more reliable, it is not needed for + // correctness and this test should only flakily _pass_, never flakily + // fail. + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + Group.async([&value] { value = 42; }); + }); + + // Now schedule the initial task, moving the RAII object to schedule the final + // task into its captures. + Group.async([schedule_next = std::move(schedule_next)]() { + // Nothing to do here, the captured RAII object does the work. + }); + + // Both tasks should complete here, synchronizing with the read of value. + Group.wait(); + ASSERT_EQ(42, value); +} + TYPED_TEST(ThreadPoolTest, GetFuture) { CHECK_UNSUPPORTED(); DefaultThreadPool Pool(hardware_concurrency(2)); From 787f677cd45e7704f389be7edfd36e00b3acfa04 Mon Sep 17 00:00:00 2001 From: moleium Date: Fri, 14 Nov 2025 12:25:56 +0300 Subject: [PATCH 15/22] [libc++] proper guarding for locale usage in filesystem on Windows (#165470) - Resolves build issues when localization support is disabled on Windows. - Resolves dependencies on localization in filesystem header implementations. Related PR #164602 Fixes #164074 --- libcxx/include/__filesystem/path.h | 8 +++++--- libcxx/include/__filesystem/u8path.h | 16 +++++++++------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/libcxx/include/__filesystem/path.h b/libcxx/include/__filesystem/path.h index b3f324342f800..990ab6f66735f 100644 --- a/libcxx/include/__filesystem/path.h +++ b/libcxx/include/__filesystem/path.h @@ -324,6 +324,7 @@ struct _PathCVT { } }; +# if _LIBCPP_HAS_LOCALIZATION template struct _PathExport { typedef __narrow_to_utf8 _Narrower; @@ -364,7 +365,7 @@ struct _PathExport { } }; -# if _LIBCPP_HAS_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T template <> struct _PathExport { typedef __narrow_to_utf8 _Narrower; @@ -374,8 +375,9 @@ struct _PathExport { _Narrower()(back_inserter(__dest), __src.data(), __src.data() + __src.size()); } }; -# endif // _LIBCPP_HAS_CHAR8_T -# endif /* _LIBCPP_WIN32API */ +# endif // _LIBCPP_HAS_CHAR8_T +# endif // _LIBCPP_HAS_LOCALIZATION +# endif // _LIBCPP_WIN32API class _LIBCPP_EXPORTED_FROM_ABI path { template diff --git a/libcxx/include/__filesystem/u8path.h b/libcxx/include/__filesystem/u8path.h index 885372b0d1f86..ebdd51bfc03f0 100644 --- a/libcxx/include/__filesystem/u8path.h +++ b/libcxx/include/__filesystem/u8path.h @@ -24,28 +24,30 @@ _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM +# if !defined(_LIBCPP_WIN32API) || _LIBCPP_HAS_LOCALIZATION template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _InputIt __l) { static_assert( -# if _LIBCPP_HAS_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T is_same::__char_type, char8_t>::value || -# endif +# endif is_same::__char_type, char>::value, "u8path(Iter, Iter) requires Iter have a value_type of type 'char'" " or 'char8_t'"); -# if defined(_LIBCPP_WIN32API) +# if defined(_LIBCPP_WIN32API) string __tmp(__f, __l); using _CVT = __widen_from_utf8; std::wstring __w; __w.reserve(__tmp.size()); _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size()); return path(__w); -# else +# else return path(__f, __l); -# endif /* !_LIBCPP_WIN32API */ +# endif // defined(_LIBCPP_WIN32API) } +# endif // !defined(_LIBCPP_WIN32API) || _LIBCPP_HAS_LOCALIZATION -# if defined(_LIBCPP_WIN32API) +# if defined(_LIBCPP_WIN32API) && _LIBCPP_HAS_LOCALIZATION template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _NullSentinel) { static_assert( @@ -65,7 +67,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size()); return path(__w); } -# endif /* _LIBCPP_WIN32API */ +# endif // defined(_LIBCPP_WIN32API) && _LIBCPP_HAS_LOCALIZATION template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(const _Source& __s) { From 31b7f1fa0b8c24a7549d60d67faa882d5bf2eaae Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Fri, 14 Nov 2025 10:34:38 +0100 Subject: [PATCH 16/22] [GlobalISel] Add support for value/constants as inline asm memory operand (#161501) InlineAsmLowering rejected inline assembly with memory reference inputs if the values passed to the inline asm weren't pointers. The DAG lowering however handled them just fine. This patch updates InlineAsmLowering to store such values on the stack, and then use the stack pointer as the "indirect" version of the operand. --- .../CodeGen/GlobalISel/InlineAsmLowering.cpp | 53 ++++++++--- .../AArch64/GlobalISel/arm64-fallback.ll | 9 -- .../GlobalISel/irtranslator-inline-asm.ll | 93 ++++++++++++++++++ .../GlobalISel/irtranslator-inline-asm.ll | 94 +++++++++++++++++++ 4 files changed, 227 insertions(+), 22 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp index b4e64d7416d86..a8661ce629a4f 100644 --- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -454,26 +455,52 @@ bool InlineAsmLowering::lowerInlineAsm( } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { - - if (!OpInfo.isIndirect) { - LLVM_DEBUG(dbgs() - << "Cannot indirectify memory input operands yet\n"); - return false; - } - - assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); - const InlineAsm::ConstraintCode ConstraintID = TLI->getInlineAsmMemConstraint(OpInfo.ConstraintCode); InlineAsm::Flag OpFlags(InlineAsm::Kind::Mem, 1); OpFlags.setMemConstraint(ConstraintID); Inst.addImm(OpFlags); + + if (OpInfo.isIndirect) { + // already indirect + ArrayRef SourceRegs = + GetOrCreateVRegs(*OpInfo.CallOperandVal); + if (SourceRegs.size() != 1) { + LLVM_DEBUG(dbgs() << "Expected the memory input to fit into a " + "single virtual register " + "for constraint '" + << OpInfo.ConstraintCode << "'\n"); + return false; + } + Inst.addReg(SourceRegs[0]); + break; + } + + // Needs to be made indirect. Store the value on the stack and use + // a pointer to it. + Value *OpVal = OpInfo.CallOperandVal; + unsigned Bytes = DL.getTypeStoreSize(OpVal->getType()); + Align Alignment = DL.getPrefTypeAlign(OpVal->getType()); + int FrameIdx = + MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false); + + unsigned AddrSpace = DL.getAllocaAddrSpace(); + LLT FramePtrTy = + LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); + auto Ptr = MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx).getReg(0); ArrayRef SourceRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal); - assert( - SourceRegs.size() == 1 && - "Expected the memory input to fit into a single virtual register"); - Inst.addReg(SourceRegs[0]); + if (SourceRegs.size() != 1) { + LLVM_DEBUG(dbgs() << "Expected the memory input to fit into a single " + "virtual register " + "for constraint '" + << OpInfo.ConstraintCode << "'\n"); + return false; + } + MIRBuilder.buildStore(SourceRegs[0], Ptr, + MachinePointerInfo::getFixedStack(MF, FrameIdx), + Alignment); + Inst.addReg(Ptr); break; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll index 29c320da6c0a7..f8cd868a4c755 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -37,15 +37,6 @@ define i64 @strict_align_feature(ptr %p) #0 { attributes #0 = { "target-features"="+strict-align" } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to translate instruction: call -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for direct_mem -; FALLBACK-WITH-REPORT-OUT-LABEL: direct_mem -define void @direct_mem(i32 %x, i32 %y) { -entry: - tail call void asm sideeffect "", "imr,imr,~{memory}"(i32 %x, i32 %y) - ret void -} - ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to lower function{{.*}}scalable_arg ; FALLBACK-WITH-REPORT-OUT-LABEL: scalable_arg define @scalable_arg( %pred, ptr %addr) #1 { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll index 42f6570047fc7..8597ceb9ed87a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll @@ -258,3 +258,96 @@ define i64 @test_input_with_matching_constraint_to_physical_register() { %1 = tail call i64 asm "", "={x2},0"(i64 0) ret i64 %1 } + +define void @test_indirectify_i32_value(i32 %x, i32 %y) { + ; CHECK-LABEL: name: test_indirectify_i32_value + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %stack.0) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %stack.1) + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, {{[0-9]+}} /* mem:m */, [[FRAME_INDEX]](p0), 262158 /* mem:m */, [[FRAME_INDEX1]](p0) + ; CHECK-NEXT: RET_ReallyLR +entry: + tail call void asm sideeffect "", "imr,imr,~{memory}"(i32 %x, i32 %y) + ret void +} + +define void @test_indirectify_i32_constant() { + ; CHECK-LABEL: name: test_indirectify_i32_constant + ; CHECK: bb.1.entry: + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[C]](s32), [[FRAME_INDEX]](p0) :: (store (s32) into %stack.0) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1 + ; CHECK-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p0) :: (store (s32) into %stack.1) + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, {{[0-9]+}} /* mem:m */, [[FRAME_INDEX]](p0), 262158 /* mem:m */, [[FRAME_INDEX1]](p0) + ; CHECK-NEXT: RET_ReallyLR +entry: + tail call void asm sideeffect "", "imr,imr,~{memory}"(i32 42, i32 0) + ret void +} + +define void @test_indirectify_i16_value(i16 %val) { + ; CHECK-LABEL: name: test_indirectify_i16_value + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[FRAME_INDEX]](p0) :: (store (s16) into %stack.0) + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, {{[0-9]+}} /* mem:m */, [[FRAME_INDEX]](p0) + ; CHECK-NEXT: RET_ReallyLR +entry: + tail call void asm sideeffect "", "imr,~{memory}"(i16 %val) + ret void +} + +define void @test_indirectify_i16_constant() { + ; CHECK-LABEL: name: test_indirectify_i16_constant + ; CHECK: bb.1.entry: + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[C]](s16), [[FRAME_INDEX]](p0) :: (store (s16) into %stack.0) + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, {{[0-9]+}} /* mem:m */, [[FRAME_INDEX]](p0) + ; CHECK-NEXT: RET_ReallyLR +entry: + tail call void asm sideeffect "", "imr,~{memory}"(i16 42) + ret void +} + +define void @test_indirectify_i64_value(i64 %val) { + ; CHECK-LABEL: name: test_indirectify_i64_value + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX]](p0) :: (store (s64) into %stack.0) + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, {{[0-9]+}} /* mem:m */, [[FRAME_INDEX]](p0) + ; CHECK-NEXT: RET_ReallyLR +entry: + tail call void asm sideeffect "", "imr,~{memory}"(i64 %val) + ret void +} + +define void @test_indirectify_i64_constant() { + ; CHECK-LABEL: name: test_indirectify_i64_constant + ; CHECK: bb.1.entry: + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[C]](s64), [[FRAME_INDEX]](p0) :: (store (s64) into %stack.0) + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, {{[0-9]+}} /* mem:m */, [[FRAME_INDEX]](p0) + ; CHECK-NEXT: RET_ReallyLR +entry: + tail call void asm sideeffect "", "imr,~{memory}"(i64 42) + ret void +} + +; TODO: add more types diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index 70ff92f8eda92..b290c314f1154 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -331,6 +331,100 @@ define amdgpu_kernel void @asm_constraint_n_n() { ret void } +define void @test_indirectify_i32_value(i32 %x, i32 %y) { + ; CHECK-LABEL: name: test_indirectify_i32_value + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[COPY]](s32), [[FRAME_INDEX]](p5) :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1 + ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[FRAME_INDEX1]](p5) :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, 262158 /* mem:m */, [[FRAME_INDEX]](p5), 262158 /* mem:m */, [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: SI_RETURN +entry: + tail call void asm sideeffect "", "imr,imr,~{memory}"(i32 %x, i32 %y) + ret void +} + +define void @test_indirectify_i32_constant() { + ; CHECK-LABEL: name: test_indirectify_i32_constant + ; CHECK: bb.1.entry: + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[C]](s32), [[FRAME_INDEX]](p5) :: (store (s32) into %stack.0, addrspace 5) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1 + ; CHECK-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX1]](p5) :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, 262158 /* mem:m */, [[FRAME_INDEX]](p5), 262158 /* mem:m */, [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: SI_RETURN +entry: + tail call void asm sideeffect "", "imr,imr,~{memory}"(i32 42, i32 0) + ret void +} + + +define void @test_indirectify_i16_value(i16 %val) { + ; CHECK-LABEL: name: test_indirectify_i16_value + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[FRAME_INDEX]](p5) :: (store (s16) into %stack.0, addrspace 5) + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, 262158 /* mem:m */, [[FRAME_INDEX]](p5) + ; CHECK-NEXT: SI_RETURN +entry: + tail call void asm sideeffect "", "imr,~{memory}"(i16 %val) + ret void +} + +define void @test_indirectify_i16_constant() { + ; CHECK-LABEL: name: test_indirectify_i16_constant + ; CHECK: bb.1.entry: + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 42 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[C]](s16), [[FRAME_INDEX]](p5) :: (store (s16) into %stack.0, addrspace 5) + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, 262158 /* mem:m */, [[FRAME_INDEX]](p5) + ; CHECK-NEXT: SI_RETURN +entry: + tail call void asm sideeffect "", "imr,~{memory}"(i16 42) + ret void +} + +define void @test_indirectify_i64_value(i64 %val) { + ; CHECK-LABEL: name: test_indirectify_i64_value + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[MV]](s64), [[FRAME_INDEX]](p5) :: (store (s64) into %stack.0, addrspace 5) + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, 262158 /* mem:m */, [[FRAME_INDEX]](p5) + ; CHECK-NEXT: SI_RETURN +entry: + tail call void asm sideeffect "", "imr,~{memory}"(i64 %val) + ret void +} + +define void @test_indirectify_i64_constant() { + ; CHECK-LABEL: name: test_indirectify_i64_constant + ; CHECK: bb.1.entry: + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: G_STORE [[C]](s64), [[FRAME_INDEX]](p5) :: (store (s64) into %stack.0, addrspace 5) + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, 262158 /* mem:m */, [[FRAME_INDEX]](p5) + ; CHECK-NEXT: SI_RETURN +entry: + tail call void asm sideeffect "", "imr,~{memory}"(i64 42) + ret void +} + !llvm.module.flags = !{!1} !0 = !{i32 70} !1 = !{i32 1, !"amdhsa_code_object_version", i32 500} From 8723fe5606de6dfb344afacd667c20f55bb2f5e0 Mon Sep 17 00:00:00 2001 From: Luke Hutton Date: Fri, 14 Nov 2025 09:55:45 +0000 Subject: [PATCH 17/22] [mlir][tosa] Allow int64 index tensors in gather/scatter (#167894) This commit ensures that gather and scatter operations with int64 index tensors can be created. This aligns with the EXT_INT64 extension. --- mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td | 4 ++-- .../mlir/Dialect/Tosa/IR/TosaTypesBase.td | 6 ++--- mlir/test/Dialect/Tosa/ops.mlir | 22 +++++++++++++++---- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index 2b36b2c5113e1..bb8faf01802fa 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -2271,7 +2271,7 @@ def Tosa_GatherOp : Tosa_InferShapedTypeOp<"gather"> { let arguments = (ins Tosa_Tensor3D:$values, - Tosa_Int32Tensor2D:$indices + Tosa_IndexTensor2D:$indices ); let results = (outs @@ -2308,7 +2308,7 @@ def Tosa_ScatterOp : Tosa_InferShapedTypeOp<"scatter"> { let arguments = (ins Tosa_Tensor3D:$values_in, - Tosa_Int32Tensor2D:$indices, + Tosa_IndexTensor2D:$indices, Tosa_Tensor3D:$input ); diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td index 414b51bf4b135..266a9e3a7d946 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaTypesBase.td @@ -202,10 +202,8 @@ def Tosa_Tensor1Dto6D : AnyTypeOf<[ def Tosa_TensorUpto4D : AnyTypeOf<[ Tosa_UnrankedTensor, TosaTensorRankOf<[Tosa_AnyNumber], [0,1,2,3,4]>]>; -def Tosa_Int32TensorUpto4D : AnyTypeOf<[ - Tosa_UnrankedTensor, TosaTensorRankOf<[Tosa_Int32], [0,1,2,3,4]>]>; -def Tosa_Int32Tensor2D : AnyTypeOf<[ - Tosa_UnrankedTensor, TosaTensorRankOf<[Tosa_Int32], [2]>]>; +def Tosa_IndexTensor2D : AnyTypeOf<[ + Tosa_UnrankedTensor, TosaTensorRankOf<[Tosa_Int32, Tosa_Int64], [2]>]>; def Tosa_TensorAtLeast1D : AnyTypeOf<[ Tosa_UnrankedTensor, TosaRankedTensorOf<[Tosa_AnyNumber], [AtLeastRankOne]>], "tosa-conformant tensor of at least rank 1", "::mlir::TensorType">; diff --git a/mlir/test/Dialect/Tosa/ops.mlir b/mlir/test/Dialect/Tosa/ops.mlir index b2a71ab882230..a4591f7ffd393 100644 --- a/mlir/test/Dialect/Tosa/ops.mlir +++ b/mlir/test/Dialect/Tosa/ops.mlir @@ -757,10 +757,10 @@ func.func @test_gather(%arg0: tensor<13x21x3xf32>, %arg1: tensor<13x26xi32>) -> } // ----- -// CHECK-LABEL: scatter -func.func @test_scatter(%arg0: tensor<13x52x3xf32>, %arg1: tensor<13x26xi32>, %arg2: tensor<13x26x3xf32>) -> tensor<13x52x3xf32> { - %0 = tosa.scatter %arg0, %arg1, %arg2 : (tensor<13x52x3xf32>, tensor<13x26xi32>, tensor<13x26x3xf32>) -> tensor<13x52x3xf32> - return %0 : tensor<13x52x3xf32> +// CHECK-LABEL: gather_int64 +func.func @test_gather_int64(%arg0: tensor<13x21x3xf32>, %arg1: tensor<13x26xi64>) -> tensor<13x26x3xf32> { + %0 = tosa.gather %arg0, %arg1 : (tensor<13x21x3xf32>, tensor<13x26xi64>) -> tensor<13x26x3xf32> + return %0 : tensor<13x26x3xf32> } // ----- @@ -770,6 +770,20 @@ func.func @test_gather_unranked_indices(%arg0: tensor<13x21x3xf32>, %arg1: tenso return %0 : tensor<13x26x3xf32> } +// ----- +// CHECK-LABEL: scatter +func.func @test_scatter(%arg0: tensor<13x52x3xf32>, %arg1: tensor<13x26xi32>, %arg2: tensor<13x26x3xf32>) -> tensor<13x52x3xf32> { + %0 = tosa.scatter %arg0, %arg1, %arg2 : (tensor<13x52x3xf32>, tensor<13x26xi32>, tensor<13x26x3xf32>) -> tensor<13x52x3xf32> + return %0 : tensor<13x52x3xf32> +} + +// ----- +// CHECK-LABEL: scatter_int64 +func.func @test_scatter_int64(%arg0: tensor<13x52x3xf32>, %arg1: tensor<13x26xi64>, %arg2: tensor<13x26x3xf32>) -> tensor<13x52x3xf32> { + %0 = tosa.scatter %arg0, %arg1, %arg2 : (tensor<13x52x3xf32>, tensor<13x26xi64>, tensor<13x26x3xf32>) -> tensor<13x52x3xf32> + return %0 : tensor<13x52x3xf32> +} + // ----- // CHECK-LABEL: scatter_unranked_indices func.func @test_scatter_unranked_indices(%arg0: tensor<13x21x3xf32>, %arg1: tensor<*xi32>, %arg2: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { From 40a9e3482a7641d2e6783cbf762ac676f1ae8019 Mon Sep 17 00:00:00 2001 From: Jonathan Thackray Date: Fri, 14 Nov 2025 10:24:26 +0000 Subject: [PATCH 18/22] [AArch64][llvm] Add support for Permission Overlay Extension 2 (FEAT_S1POE2) (#164912) Add assembly/disassembly support for AArch64 `FEAT_S1POE2` (Stage 1 Permission Overlay Extension 2), as blogged about here: * https://developer.arm.com/community/arm-community-blogs/b/architectures-and-processors-blog/posts/future-architecture-technologies-poe2-and-vmte and as documented here: * https://developer.arm.com/documentation/109697/2025_09/Future-Architecture-Technologies Co-authored-by: Rodolfo Wottrich --- clang/test/Driver/aarch64-vfat.c | 12 + .../print-supported-extensions-aarch64.c | 3 + llvm/docs/ReleaseNotes.md | 3 +- llvm/lib/Target/AArch64/AArch64Features.td | 9 + .../lib/Target/AArch64/AArch64InstrFormats.td | 96 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 27 + .../Target/AArch64/AArch64SystemOperands.td | 176 + .../AArch64/AsmParser/AArch64AsmParser.cpp | 80 +- .../MCTargetDesc/AArch64InstPrinter.cpp | 25 + .../AArch64/MCTargetDesc/AArch64InstPrinter.h | 3 + .../Target/AArch64/Utils/AArch64BaseInfo.cpp | 20 +- .../Target/AArch64/Utils/AArch64BaseInfo.h | 16 + llvm/test/MC/AArch64/arm-btie.s | 26 + .../MC/AArch64/arm-poe2-tlbid-diagnostics.s | 72 + llvm/test/MC/AArch64/arm-poe2-tlbid.s | 87 + llvm/test/MC/AArch64/arm-poe2.s | 3263 +++++++++++++++++ llvm/test/MC/AArch64/arm-tev.s | 41 + .../TargetParser/TargetParserTest.cpp | 8 + 18 files changed, 3961 insertions(+), 6 deletions(-) create mode 100644 llvm/test/MC/AArch64/arm-btie.s create mode 100644 llvm/test/MC/AArch64/arm-poe2-tlbid-diagnostics.s create mode 100644 llvm/test/MC/AArch64/arm-poe2-tlbid.s create mode 100644 llvm/test/MC/AArch64/arm-poe2.s create mode 100644 llvm/test/MC/AArch64/arm-tev.s diff --git a/clang/test/Driver/aarch64-vfat.c b/clang/test/Driver/aarch64-vfat.c index bd5eed275489f..047869fe816c0 100644 --- a/clang/test/Driver/aarch64-vfat.c +++ b/clang/test/Driver/aarch64-vfat.c @@ -5,3 +5,15 @@ // RUN: %clang -target aarch64 -march=armv9.7a+mops-go -### -c %s 2>&1 | FileCheck -check-prefix=VFAT-MOPS-GO %s // RUN: %clang -target aarch64 -march=armv9.7-a+mops-go -### -c %s 2>&1 | FileCheck -check-prefix=VFAT-MOPS-GO %s // VFAT-MOPS-GO: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.7a"{{.*}} "-target-feature" "+mops-go" + +// RUN: %clang -target aarch64 -march=armv9.7a+poe2 -### -c %s 2>&1 | FileCheck -check-prefix=VFAT-POE2 %s +// RUN: %clang -target aarch64 -march=armv9.7-a+poe2 -### -c %s 2>&1 | FileCheck -check-prefix=VFAT-POE2 %s +// VFAT-POE2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.7a"{{.*}} "-target-feature" "+poe2" + +// RUN: %clang -target aarch64 -march=armv9.7a+tev -### -c %s 2>&1 | FileCheck -check-prefix=VFAT-TEV %s +// RUN: %clang -target aarch64 -march=armv9.7-a+tev -### -c %s 2>&1 | FileCheck -check-prefix=VFAT-TEV %s +// VFAT-TEV: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.7a"{{.*}} "-target-feature" "+tev" + +// RUN: %clang -target aarch64 -march=armv9.7a+btie -### -c %s 2>&1 | FileCheck -check-prefix=VFAT-BTIE %s +// RUN: %clang -target aarch64 -march=armv9.7-a+btie -### -c %s 2>&1 | FileCheck -check-prefix=VFAT-BTIE %s +// VFAT-BTIE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.7a"{{.*}} "-target-feature" "+btie" diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c index f2da680b68d70..93373f41ad2cf 100644 --- a/clang/test/Driver/print-supported-extensions-aarch64.c +++ b/clang/test/Driver/print-supported-extensions-aarch64.c @@ -8,6 +8,7 @@ // CHECK-NEXT: bf16 FEAT_BF16 Enable BFloat16 Extension // CHECK-NEXT: brbe FEAT_BRBE Enable Branch Record Buffer Extension // CHECK-NEXT: bti FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: btie FEAT_BTIE Enable Enhanced Branch Target Identification extension // CHECK-NEXT: cmh FEAT_CMH Enable Armv9.7-A Contention Management Hints // CHECK-NEXT: cmpbr FEAT_CMPBR Enable Armv9.6-A base compare and branch instructions // CHECK-NEXT: fcma FEAT_FCMA Enable Armv8.3-A Floating-point complex number support @@ -59,6 +60,7 @@ // CHECK-NEXT: pauth-lr FEAT_PAuth_LR Enable Armv9.5-A PAC enhancements // CHECK-NEXT: pcdphint FEAT_PCDPHINT Enable Armv9.6-A Producer Consumer Data Placement hints // CHECK-NEXT: pmuv3 FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension +// CHECK-NEXT: poe2 FEAT_S1POE2 Enable Stage 1 Permission Overlays Extension 2 instructions // CHECK-NEXT: pops FEAT_PoPS Enable Armv9.6-A Point Of Physical Storage (PoPS) DC instructions // CHECK-NEXT: predres FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions // CHECK-NEXT: rng FEAT_RNG Enable Random Number generation instructions @@ -113,6 +115,7 @@ // CHECK-NEXT: sve2p1 FEAT_SVE2p1 Enable Scalable Vector Extension 2.1 instructions // CHECK-NEXT: sve2p2 FEAT_SVE2p2 Enable Armv9.6-A Scalable Vector Extension 2.2 instructions // CHECK-NEXT: sve2p3 FEAT_SVE2p3 Enable Armv9.7-A Scalable Vector Extension 2.3 instructions +// CHECK-NEXT: tev FEAT_TEV Enable TIndex Exception-like Vector instructions // CHECK-NEXT: the FEAT_THE Enable Armv8.9-A Translation Hardening Extension // CHECK-NEXT: tlbid FEAT_TLBID Enable Armv9.7-A TLBI Domains extension // CHECK-NEXT: tlbiw FEAT_TLBIW Enable Armv9.5-A TLBI VMALL for Dirty State diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 6e30b20f55b6b..cf9b519dfad41 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -105,7 +105,8 @@ Changes to the AArch64 Backend architecture extensions. * Assembler/disassembler support has been added for 'Virtual Tagging - Extension (vMTE)' Future Architecture Technologies extension. + Extension (vMTE)' and 'Permission Overlay Extension version 2 (POE2)' + Future Architecture Technologies extensions. Changes to the AMDGPU Backend ----------------------------- diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index 7fd5254dfa536..58783d556324d 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -632,6 +632,15 @@ def FeatureF16F32MM : ExtensionWithMArch<"f16f32mm", "F16F32MM", "FEAT_F16F32MM" def FeatureMOPS_GO: ExtensionWithMArch<"mops-go", "MOPS_GO", "FEAT_MOPS_GO", "Enable memset acceleration granule only">; +def FeatureBTIE: ExtensionWithMArch<"btie", "BTIE", "FEAT_BTIE", + "Enable Enhanced Branch Target Identification extension">; + +def FeatureS1POE2: ExtensionWithMArch<"poe2", "POE2", "FEAT_S1POE2", + "Enable Stage 1 Permission Overlays Extension 2 instructions">; + +def FeatureTEV: ExtensionWithMArch<"tev", "TEV", "FEAT_TEV", + "Enable TIndex Exception-like Vector instructions">; + //===----------------------------------------------------------------------===// // Other Features //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 2bce5c89f8ba6..88c8d18c19320 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1909,6 +1909,21 @@ def CMHPriorityHint_op : Operand { }]; } +def TIndexHintOperand : AsmOperandClass { + let Name = "TIndexHint"; + let ParserMethod = "tryParseTIndexHint"; +} + +def TIndexhint_op : Operand { + let ParserMatchClass = TIndexHintOperand; + let PrintMethod = "printTIndexHintOp"; + let MCOperandPredicate = [{ + if (!MCOp.isImm()) + return false; + return AArch64TIndexHint::lookupTIndexByEncoding(MCOp.getImm()) != nullptr; + }]; +} + class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg), "mrs", "\t$Rt, $systemreg"> { bits<16> systemreg; @@ -13366,3 +13381,84 @@ class STCPHInst : I< let Inst{7-5} = 0b100; let Inst{4-0} = 0b11111; } + +//--- +// Permission Overlays Extension 2 (FEAT_S1POE2) +//--- + +class TCHANGERegInst : I< + (outs GPR64:$Xd), + (ins GPR64:$Xn, TIndexhint_op:$nb), + asm, "\t$Xd, $Xn, $nb", "", []>, Sched<[]> { + bits<5> Xd; + bits<5> Xn; + bits<1> nb; + let Inst{31-19} = 0b1101010110000; + let Inst{18} = isB; + let Inst{17} = nb; + let Inst{16-10} = 0b0000000; + let Inst{9-5} = Xn; + let Inst{4-0} = Xd; +} + +class TCHANGEImmInst : I< + (outs GPR64:$Xd), + (ins imm0_127:$imm, TIndexhint_op:$nb), + asm, "\t$Xd, $imm, $nb", "", []>, Sched<[]> { + bits<5> Xd; + bits<7> imm; + bits<1> nb; + let Inst{31-19} = 0b1101010110010; + let Inst{18} = isB; + let Inst{17} = nb; + let Inst{16-12} = 0b00000; + let Inst{11-5} = imm; + let Inst{4-0} = Xd; +} + +class TENTERInst : I< + (outs), + (ins imm0_127:$imm, TIndexhint_op:$nb), + asm, "\t$imm, $nb", "", []>, Sched<[]> { + bits<7> imm; + bits<1> nb; + let Inst{31-18} = 0b11010100111000; + let Inst{17} = nb; + let Inst{16-12} = 0b00000; + let Inst{11-5} = imm; + let Inst{4-0} = 0b00000; +} + +class TEXITInst : I< + (outs), + (ins TIndexhint_op:$nb), + asm, "\t$nb", "", []>, Sched<[]> { + bits<1> nb; + let Inst{31-11} = 0b110101101111111100000; + let Inst{10} = nb; + let Inst{9-0} = 0b1111100000; +} + + +multiclass TCHANGEReg { + def NAME : TCHANGERegInst; + def : InstAlias(NAME) GPR64:$Xd, GPR64:$Xn, 0), 1>; +} + +multiclass TCHANGEImm { + def NAME : TCHANGEImmInst; + def : InstAlias(NAME) GPR64:$Xd, imm0_127:$imm, 0), 1>; +} + +multiclass TENTER { + def NAME : TENTERInst; + def : InstAlias(NAME) imm0_127:$imm, 0), 1>; +} + +multiclass TEXIT { + def NAME : TEXITInst; + def : InstAlias(NAME) 0), 1>; +} diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 34a20f09d2806..6366624d4499b 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -346,6 +346,8 @@ def HasCCDP : Predicate<"Subtarget->hasCCDP()">, AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">; def HasBTI : Predicate<"Subtarget->hasBTI()">, AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">; +def HasBTIE : Predicate<"Subtarget->hasBTIE()">, + AssemblerPredicateWithAll<(all_of FeatureBTIE), "btie">; def HasMTE : Predicate<"Subtarget->hasMTE()">, AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">; def HasTME : Predicate<"Subtarget->hasTME()">, @@ -407,6 +409,10 @@ def HasGCIE : Predicate<"Subtarget->hasGCIE()">, AssemblerPredicateWithAll<(all_of FeatureGCIE), "gcie">; def HasMOPS_GO : Predicate<"Subtarget->hasMOPS_GO()">, AssemblerPredicateWithAll<(all_of FeatureMOPS_GO), "mops-go">; +def HasS1POE2 : Predicate<"Subtarget->hasS1POE2()">, + AssemblerPredicateWithAll<(all_of FeatureS1POE2), "poe2">; +def HasTEV : Predicate<"Subtarget->hasTEV()">, + AssemblerPredicateWithAll<(all_of FeatureTEV), "tev">; def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; @@ -1542,6 +1548,7 @@ let Predicates = [HasPCDPHINT] in { // should not emit these mnemonics unless BTI is enabled. def : InstAlias<"bti", (HINT 32), 0>; def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>; +def : InstAlias<"bti r", (HINT 32)>, Requires<[HasBTIE]>; def : InstAlias<"bti", (HINT 32)>, Requires<[HasBTI]>; def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>; @@ -11444,6 +11451,26 @@ let Predicates = [HasCMH] in { def STCPH : STCPHInst<"stcph">; // Store Concurrent Priority Hint instruction } +//===----------------------------------------------------------------------===// +// Permission Overlays Extension 2 (FEAT_S1POE2) +//===----------------------------------------------------------------------===// + +let Predicates = [HasS1POE2] in { + defm TCHANGEBrr : TCHANGEReg<"tchangeb", true>; + defm TCHANGEFrr : TCHANGEReg<"tchangef", false>; + defm TCHANGEBri : TCHANGEImm<"tchangeb", true>; + defm TCHANGEFri : TCHANGEImm<"tchangef", false>; +} + +//===----------------------------------------------------------------------===// +// TIndex Exception-like Vector (FEAT_TEV) +//===----------------------------------------------------------------------===// + +let Predicates = [HasTEV] in { + defm TENTER : TENTER<"tenter">; + defm TEXIT : TEXIT<"texit">; +} + include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" include "AArch64SMEInstrInfo.td" diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td index ae46d717d0cb1..1dd132e9a7301 100644 --- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td +++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td @@ -814,6 +814,7 @@ def lookupBTIByName : SearchIndex { let Key = ["Name"]; } +def : BTI<"r", 0b000>; def : BTI<"c", 0b010>; def : BTI<"j", 0b100>; def : BTI<"jc", 0b110>; @@ -833,6 +834,23 @@ class CMHPriorityHint encoding> : SearchableTable { def : CMHPriorityHint<"ph", 0b1>; + +//===----------------------------------------------------------------------===// +// TIndex instruction options. +//===----------------------------------------------------------------------===// + +class TIndex encoding> : SearchableTable { + let SearchableFields = ["Name", "Encoding"]; + let EnumValueField = "Encoding"; + + string Name = name; + bits<1> Encoding; + let Encoding = encoding; +} + +def : TIndex<"nb", 0b1>; + + //===----------------------------------------------------------------------===// // TLBI (translation lookaside buffer invalidate) instruction options. //===----------------------------------------------------------------------===// @@ -2694,3 +2712,161 @@ def : GIC<"ldhm", 0b110, 0b1100, 0b0010, 0b001>; def : GIC<"ldpend", 0b110, 0b1100, 0b0001, 0b100>; def : GIC<"ldpri", 0b110, 0b1100, 0b0001, 0b010>; def : GIC<"ldrcfg", 0b110, 0b1100, 0b0001, 0b101>; + + +// Stage 1 Permission Overlays Extension 2 (FEAT_S1POE2). +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"DPOTBR0_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b110>; +def : RWSysReg<"DPOTBR0_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b110>; +def : RWSysReg<"DPOTBR1_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b111>; +def : RWSysReg<"DPOTBR1_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b111>; +def : RWSysReg<"DPOTBR0_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b110>; +def : RWSysReg<"DPOTBR1_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b111>; +def : RWSysReg<"DPOTBR0_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b110>; + +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"IRTBRU_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b100>; +def : RWSysReg<"IRTBRU_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b100>; +def : RWSysReg<"IRTBRP_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b101>; +def : RWSysReg<"IRTBRP_EL12", 0b11, 0b101, 0b0010, 0b0000, 0b101>; +def : RWSysReg<"IRTBRU_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b100>; +def : RWSysReg<"IRTBRP_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b101>; +def : RWSysReg<"IRTBRP_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b101>; + +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"TTTBRU_EL1", 0b11, 0b000, 0b1010, 0b0010, 0b110>; +def : RWSysReg<"TTTBRU_EL12", 0b11, 0b101, 0b1010, 0b0010, 0b110>; +def : RWSysReg<"TTTBRP_EL1", 0b11, 0b000, 0b1010, 0b0010, 0b111>; +def : RWSysReg<"TTTBRP_EL12", 0b11, 0b101, 0b1010, 0b0010, 0b111>; +def : RWSysReg<"TTTBRU_EL2", 0b11, 0b100, 0b1010, 0b0010, 0b110>; +def : RWSysReg<"TTTBRP_EL2", 0b11, 0b100, 0b1010, 0b0010, 0b111>; +def : RWSysReg<"TTTBRP_EL3", 0b11, 0b110, 0b1010, 0b0010, 0b111>; + +foreach n = 0-15 in { + defvar nb = !cast>(n); + // Op0 Op1 CRn CRm Op2 + def : RWSysReg<"FGDTP"#n#"_EL1", 0b11, 0b000, 0b0011, {0b001,nb{3}}, nb{2-0}>; + def : RWSysReg<"FGDTP"#n#"_EL2", 0b11, 0b100, 0b0011, {0b001,nb{3}}, nb{2-0}>; + def : RWSysReg<"FGDTP"#n#"_EL12", 0b11, 0b101, 0b0011, {0b001,nb{3}}, nb{2-0}>; + def : RWSysReg<"FGDTP"#n#"_EL3", 0b11, 0b110, 0b0011, {0b001,nb{3}}, nb{2-0}>; + + def : RWSysReg<"FGDTU"#n#"_EL1", 0b11, 0b000, 0b0011, {0b010,nb{3}}, nb{2-0}>; + def : RWSysReg<"FGDTU"#n#"_EL2", 0b11, 0b100, 0b0011, {0b010,nb{3}}, nb{2-0}>; + def : RWSysReg<"FGDTU"#n#"_EL12", 0b11, 0b101, 0b0011, {0b010,nb{3}}, nb{2-0}>; +} + +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"LDSTT_EL1", 0b11, 0b000, 0b0010, 0b0001, 0b111>; +def : RWSysReg<"LDSTT_EL12", 0b11, 0b101, 0b0010, 0b0001, 0b111>; +def : RWSysReg<"LDSTT_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b111>; + +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"TINDEX_EL0", 0b11, 0b011, 0b0100, 0b0000, 0b011>; +def : RWSysReg<"TINDEX_EL1", 0b11, 0b000, 0b0100, 0b0000, 0b011>; +def : RWSysReg<"TINDEX_EL2", 0b11, 0b100, 0b0100, 0b0000, 0b011>; +def : RWSysReg<"TINDEX_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b011>; +def : RWSysReg<"TINDEX_EL3", 0b11, 0b110, 0b0100, 0b0000, 0b011>; + +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"STINDEX_EL1", 0b11, 0b000, 0b0100, 0b0000, 0b010>; +def : RWSysReg<"STINDEX_EL2", 0b11, 0b100, 0b0100, 0b0000, 0b010>; +def : RWSysReg<"STINDEX_EL12", 0b11, 0b101, 0b0100, 0b0000, 0b010>; +def : RWSysReg<"STINDEX_EL3", 0b11, 0b110, 0b0100, 0b0000, 0b010>; + +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"TPIDR3_EL0", 0b11, 0b011, 0b1101, 0b0000, 0b000>; +def : RWSysReg<"TPIDR3_EL1", 0b11, 0b000, 0b1101, 0b0000, 0b000>; +def : RWSysReg<"TPIDR3_EL12", 0b11, 0b101, 0b1101, 0b0000, 0b000>; +def : RWSysReg<"TPIDR3_EL2", 0b11, 0b100, 0b1101, 0b0000, 0b000>; +def : RWSysReg<"TPIDR3_EL3", 0b11, 0b110, 0b1101, 0b0000, 0b000>; + +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"VNCCR_EL2", 0b11, 0b100, 0b0010, 0b0010, 0b001>; + +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"DPOCR_EL0", 0b11, 0b011, 0b0100, 0b0101, 0b010>; + +foreach n = 0-15 in { + defvar nb = !cast>(n); + // Op0 Op1 CRn CRm Op2 + def : RWSysReg<"AFGDTP"#n#"_EL1", 0b11, 0b000, 0b0011, {0b011,nb{3}}, nb{2-0}>; + def : RWSysReg<"AFGDTU"#n#"_EL1", 0b11, 0b000, 0b0011, {0b100,nb{3}}, nb{2-0}>; + def : RWSysReg<"AFGDTP"#n#"_EL2", 0b11, 0b100, 0b0011, {0b011,nb{3}}, nb{2-0}>; + def : RWSysReg<"AFGDTU"#n#"_EL2", 0b11, 0b100, 0b0011, {0b100,nb{3}}, nb{2-0}>; + def : RWSysReg<"AFGDTP"#n#"_EL12", 0b11, 0b101, 0b0011, {0b011,nb{3}}, nb{2-0}>; + def : RWSysReg<"AFGDTU"#n#"_EL12", 0b11, 0b101, 0b0011, {0b100,nb{3}}, nb{2-0}>; + def : RWSysReg<"AFGDTP"#n#"_EL3", 0b11, 0b110, 0b0011, {0b011,nb{3}}, nb{2-0}>; +} + +// Extra S1POE2 Hypervisor Configuration Registers +// Op0 Op1 CRn CRm Op2 +def : RWSysReg<"HCRMASK_EL2", 0b11, 0b100, 0b0001, 0b0101, 0b110>; +def : RWSysReg<"HCRXMASK_EL2", 0b11, 0b100, 0b0001, 0b0101, 0b111>; +def : RWSysReg<"NVHCR_EL2", 0b11, 0b100, 0b0001, 0b0101, 0b000>; +def : RWSysReg<"NVHCRX_EL2", 0b11, 0b100, 0b0001, 0b0101, 0b001>; +def : RWSysReg<"NVHCRMASK_EL2", 0b11, 0b100, 0b0001, 0b0101, 0b100>; +def : RWSysReg<"NVHCRXMASK_EL2", 0b11, 0b100, 0b0001, 0b0101, 0b101>; + +// S1POE2 Thread private state extension (FEAT_TPS/TPSP). +foreach n = 0-1 in { + defvar nb = !cast>(n); + // Op0 Op1 CRn CRm Op2 + def : RWSysReg<"TPMIN"#n#"_EL0", 0b11, 0b011, 0b0010, 0b0010, {0b1,nb,0}>; + def : RWSysReg<"TPMAX"#n#"_EL0", 0b11, 0b011, 0b0010, 0b0010, {0b1,nb,1}>; + def : RWSysReg<"TPMIN"#n#"_EL1", 0b11, 0b000, 0b0010, 0b0010, {0b1,nb,0}>; + def : RWSysReg<"TPMAX"#n#"_EL1", 0b11, 0b000, 0b0010, 0b0010, {0b1,nb,1}>; + def : RWSysReg<"TPMIN"#n#"_EL2", 0b11, 0b100, 0b0010, 0b0010, {0b1,nb,0}>; + def : RWSysReg<"TPMAX"#n#"_EL2", 0b11, 0b100, 0b0010, 0b0010, {0b1,nb,1}>; + def : RWSysReg<"TPMIN"#n#"_EL12", 0b11, 0b101, 0b0010, 0b0010, {0b1,nb,0}>; + def : RWSysReg<"TPMAX"#n#"_EL12", 0b11, 0b101, 0b0010, 0b0010, {0b1,nb,1}>; +} + +class PLBIEntry op1, bits<4> crn, bits<4> crm, bits<3> op2, string name, + bit needsreg, bit optionalreg> { + string Name = name; + bits<14> Encoding; + let Encoding{13-11} = op1; + let Encoding{10-7} = crn; + let Encoding{6-3} = crm; + let Encoding{2-0} = op2; + bit NeedsReg = needsreg; + bit OptionalReg = optionalreg; + string RequiresStr = [{ {AArch64::FeatureS1POE2} }]; +} + +def PLBITable : GenericTable { + let FilterClass = "PLBIEntry"; + let CppTypeName = "PLBI"; + let Fields = ["Name", "Encoding", "NeedsReg", "OptionalReg", "RequiresStr"]; + + let PrimaryKey = ["Encoding"]; + let PrimaryKeyName = "lookupPLBIByEncoding"; +} + +def lookupPLBIByName : SearchIndex { + let Table = PLBITable; + let Key = ["Name"]; +} + +multiclass PLBI op1, bits<4> crn, bits<3> op2, + bit needsreg, bit optreg> { + // Entries containing "IS" or "OS" allow optional regs when +tlbid enabled + def : PLBIEntry; + def : PLBIEntry; + def : PLBIEntry; + def : PLBIEntry; + def : PLBIEntry; + def : PLBIEntry; +} + +// CRm defines above six variants of each instruction. It is omitted here. +// Op1 CRn Op2 nr optreg +defm : PLBI<"ALLE3", 0b110, 0b1010, 0b000, 0, 0>; +defm : PLBI<"ALLE2", 0b100, 0b1010, 0b000, 0, 1>; +defm : PLBI<"ALLE1", 0b100, 0b1010, 0b100, 0, 1>; +defm : PLBI<"VMALLE1", 0b000, 0b1010, 0b000, 0, 1>; +defm : PLBI<"ASIDE1", 0b000, 0b1010, 0b010, 1, 0>; +defm : PLBI<"PERME3", 0b110, 0b1010, 0b001, 1, 0>; +defm : PLBI<"PERME2", 0b100, 0b1010, 0b001, 1, 0>; +defm : PLBI<"PERME1", 0b000, 0b1010, 0b001, 1, 0>; +defm : PLBI<"PERMAE1", 0b000, 0b1010, 0b011, 1, 0>; diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 2730833ba06d9..2b7b6ff41af12 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -268,6 +268,7 @@ class AArch64AsmParser : public MCTargetAsmParser { ParseStatus tryParsePSBHint(OperandVector &Operands); ParseStatus tryParseBTIHint(OperandVector &Operands); ParseStatus tryParseCMHPriorityHint(OperandVector &Operands); + ParseStatus tryParseTIndexHint(OperandVector &Operands); ParseStatus tryParseAdrpLabel(OperandVector &Operands); ParseStatus tryParseAdrLabel(OperandVector &Operands); template @@ -373,6 +374,7 @@ class AArch64Operand : public MCParsedAsmOperand { k_PHint, k_BTIHint, k_CMHPriorityHint, + k_TIndexHint, } Kind; SMLoc StartLoc, EndLoc; @@ -507,6 +509,11 @@ class AArch64Operand : public MCParsedAsmOperand { unsigned Length; unsigned Val; }; + struct TIndexHintOp { + const char *Data; + unsigned Length; + unsigned Val; + }; struct SVCROp { const char *Data; @@ -534,6 +541,7 @@ class AArch64Operand : public MCParsedAsmOperand { struct PHintOp PHint; struct BTIHintOp BTIHint; struct CMHPriorityHintOp CMHPriorityHint; + struct TIndexHintOp TIndexHint; struct ShiftExtendOp ShiftExtend; struct SVCROp SVCR; }; @@ -607,6 +615,9 @@ class AArch64Operand : public MCParsedAsmOperand { case k_CMHPriorityHint: CMHPriorityHint = o.CMHPriorityHint; break; + case k_TIndexHint: + TIndexHint = o.TIndexHint; + break; case k_ShiftExtend: ShiftExtend = o.ShiftExtend; break; @@ -791,6 +802,16 @@ class AArch64Operand : public MCParsedAsmOperand { return StringRef(CMHPriorityHint.Data, CMHPriorityHint.Length); } + unsigned getTIndexHint() const { + assert(Kind == k_TIndexHint && "Invalid access!"); + return TIndexHint.Val; + } + + StringRef getTIndexHintName() const { + assert(Kind == k_TIndexHint && "Invalid access!"); + return StringRef(TIndexHint.Data, TIndexHint.Length); + } + StringRef getSVCR() const { assert(Kind == k_SVCR && "Invalid access!"); return StringRef(SVCR.Data, SVCR.Length); @@ -1534,6 +1555,7 @@ class AArch64Operand : public MCParsedAsmOperand { bool isPHint() const { return Kind == k_PHint; } bool isBTIHint() const { return Kind == k_BTIHint; } bool isCMHPriorityHint() const { return Kind == k_CMHPriorityHint; } + bool isTIndexHint() const { return Kind == k_TIndexHint; } bool isShiftExtend() const { return Kind == k_ShiftExtend; } bool isShifter() const { if (!isShiftExtend()) @@ -2224,6 +2246,11 @@ class AArch64Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::createImm(getCMHPriorityHint())); } + void addTIndexHintOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createImm(getTIndexHint())); + } + void addShifterOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); unsigned Imm = @@ -2582,6 +2609,17 @@ class AArch64Operand : public MCParsedAsmOperand { return Op; } + static std::unique_ptr + CreateTIndexHint(unsigned Val, StringRef Str, SMLoc S, MCContext &Ctx) { + auto Op = std::make_unique(k_TIndexHint, Ctx); + Op->TIndexHint.Val = Val; + Op->TIndexHint.Data = Str.data(); + Op->TIndexHint.Length = Str.size(); + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } + static std::unique_ptr CreateMatrixRegister(MCRegister Reg, unsigned ElementWidth, MatrixKind Kind, SMLoc S, SMLoc E, MCContext &Ctx) { @@ -2695,6 +2733,9 @@ void AArch64Operand::print(raw_ostream &OS, const MCAsmInfo &MAI) const { case k_CMHPriorityHint: OS << getCMHPriorityHintName(); break; + case k_TIndexHint: + OS << getTIndexHintName(); + break; case k_MatrixRegister: OS << ""; break; @@ -3336,6 +3377,23 @@ ParseStatus AArch64AsmParser::tryParseCMHPriorityHint(OperandVector &Operands) { return ParseStatus::Success; } +/// tryParseTIndexHint - Try to parse a TIndex operand +ParseStatus AArch64AsmParser::tryParseTIndexHint(OperandVector &Operands) { + SMLoc S = getLoc(); + const AsmToken &Tok = getTok(); + if (Tok.isNot(AsmToken::Identifier)) + return TokError("invalid operand for instruction"); + + auto TIndex = AArch64TIndexHint::lookupTIndexByName(Tok.getString()); + if (!TIndex) + return TokError("invalid operand for instruction"); + + Operands.push_back(AArch64Operand::CreateTIndexHint( + TIndex->Encoding, Tok.getString(), S, getContext())); + Lex(); // Eat identifier token. + return ParseStatus::Success; +} + /// tryParseAdrpLabel - Parse and validate a source label for the ADRP /// instruction. ParseStatus AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) { @@ -3894,6 +3952,9 @@ static const struct Extension { {"f16f32dot", {AArch64::FeatureF16F32DOT}}, {"f16f32mm", {AArch64::FeatureF16F32MM}}, {"mops-go", {AArch64::FeatureMOPS_GO}}, + {"poe2", {AArch64::FeatureS1POE2}}, + {"tev", {AArch64::FeatureTEV}}, + {"btie", {AArch64::FeatureBTIE}}, }; static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { @@ -3983,6 +4044,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, bool ExpectRegister = true; bool OptionalRegister = false; bool hasAll = getSTI().hasFeature(AArch64::FeatureAll); + bool hasTLBID = getSTI().hasFeature(AArch64::FeatureTLBID); if (Mnemonic == "ic") { const AArch64IC::IC *IC = AArch64IC::lookupICByName(Op); @@ -4063,6 +4125,20 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc, } ExpectRegister = false; createSysAlias(GSB->Encoding, Operands, S); + } else if (Mnemonic == "plbi") { + const AArch64PLBI::PLBI *PLBI = AArch64PLBI::lookupPLBIByName(Op); + if (!PLBI) + return TokError("invalid operand for PLBI instruction"); + else if (!PLBI->haveFeatures(getSTI().getFeatureBits())) { + std::string Str("PLBI " + std::string(PLBI->Name) + " requires: "); + setRequiredFeatureString(PLBI->getRequiredFeatures(), Str); + return TokError(Str); + } + ExpectRegister = PLBI->NeedsReg; + if (hasAll || hasTLBID) { + OptionalRegister = PLBI->OptionalReg; + } + createSysAlias(PLBI->Encoding, Operands, S); } else if (Mnemonic == "cfp" || Mnemonic == "dvp" || Mnemonic == "cpp" || Mnemonic == "cosp") { @@ -5437,11 +5513,11 @@ bool AArch64AsmParser::parseInstruction(ParseInstructionInfo &Info, size_t Start = 0, Next = Name.find('.'); StringRef Head = Name.slice(Start, Next); - // IC, DC, AT, TLBI, MLBI, GIC{R}, GSB and Prediction invalidation + // IC, DC, AT, TLBI, MLBI, PLBI, GIC{R}, GSB and Prediction invalidation // instructions are aliases for the SYS instruction. if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi" || Head == "cfp" || Head == "dvp" || Head == "cpp" || Head == "cosp" || - Head == "mlbi" || Head == "gic" || Head == "gsb") + Head == "mlbi" || Head == "plbi" || Head == "gic" || Head == "gsb") return parseSysAlias(Head, NameLoc, Operands); // GICR instructions are aliases for the SYSL instruction. diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index 4cd51d6701d97..bbc34ad35296c 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1047,6 +1047,18 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, Ins = "gsb\t"; Name = std::string(GSB->Name); } + } else if (CnVal == 10) { + // PLBI aliases + const AArch64PLBI::PLBI *PLBI = AArch64PLBI::lookupPLBIByEncoding(Encoding); + if (!PLBI || !PLBI->haveFeatures(STI.getFeatureBits())) + return false; + + NeedsReg = PLBI->NeedsReg; + if (STI.hasFeature(AArch64::FeatureAll) || + STI.hasFeature(AArch64::FeatureTLBID)) + OptionalReg = PLBI->OptionalReg; + Ins = "plbi\t"; + Name = std::string(PLBI->Name); } else return false; @@ -1608,6 +1620,19 @@ void AArch64InstPrinter::printCMHPriorityHintOp(const MCInst *MI, AArch64CMHPriorityHint::lookupCMHPriorityHintByEncoding(priorityhint_op); if (PHint) O << PHint->Name; + else + markup(O, Markup::Immediate) << '#' << formatImm(priorityhint_op); +} + +void AArch64InstPrinter::printTIndexHintOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + unsigned tindexhintop = MI->getOperand(OpNum).getImm(); + auto TIndex = AArch64TIndexHint::lookupTIndexByEncoding(tindexhintop); + if (TIndex) + O << TIndex->Name; + else + markup(O, Markup::Immediate) << '#' << formatImm(tindexhintop); } void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum, diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h index 307402d920d32..3f7a3b4b0667b 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h @@ -156,6 +156,9 @@ class AArch64InstPrinter : public MCInstPrinter { void printCMHPriorityHintOp(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + void printTIndexHintOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + void printFPImmOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 268a22968f8ab..556d2c32569b4 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -145,6 +145,13 @@ namespace AArch64CMHPriorityHint { } // namespace AArch64CMHPriorityHint } // namespace llvm +namespace llvm { +namespace AArch64TIndexHint { +#define GET_TINDEX_IMPL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64TIndexHint +} // namespace llvm + namespace llvm { namespace AArch64SysReg { #define GET_SysRegsList_IMPL @@ -186,11 +193,18 @@ std::string AArch64SysReg::genericRegisterString(uint32_t Bits) { } namespace llvm { - namespace AArch64TLBI { +namespace AArch64TLBI { #define GET_TLBITable_IMPL #include "AArch64GenSystemOperands.inc" - } -} +} // namespace AArch64TLBI +} // namespace llvm + +namespace llvm { +namespace AArch64PLBI { +#define GET_PLBITable_IMPL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64PLBI +} // namespace llvm namespace llvm { namespace AArch64TLBIP { diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 27812e94a3516..83157b5513da2 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -695,6 +695,14 @@ struct CMHPriorityHint : SysAlias { #include "AArch64GenSystemOperands.inc" } // namespace AArch64CMHPriorityHint +namespace AArch64TIndexHint { +struct TIndex : SysAlias { + using SysAlias::SysAlias; +}; +#define GET_TINDEX_DECL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64TIndexHint + namespace AArch64SME { enum ToggleCondition : unsigned { Always, @@ -853,6 +861,14 @@ struct GSB : SysAlias { #include "AArch64GenSystemOperands.inc" } // namespace AArch64GSB +namespace AArch64PLBI { +struct PLBI : SysAliasOptionalReg { + using SysAliasOptionalReg::SysAliasOptionalReg; +}; +#define GET_PLBITable_DECL +#include "AArch64GenSystemOperands.inc" +} // namespace AArch64PLBI + namespace AArch64II { /// Target Operand Flag enum. enum TOF { diff --git a/llvm/test/MC/AArch64/arm-btie.s b/llvm/test/MC/AArch64/arm-btie.s new file mode 100644 index 0000000000000..889b6b94063dc --- /dev/null +++ b/llvm/test/MC/AArch64/arm-btie.s @@ -0,0 +1,26 @@ +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+btie < %s | FileCheck %s +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+bti < %s | FileCheck %s --check-prefix=NOBTIE +// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s | FileCheck %s --check-prefix=HINT + +// "bti r" is the preferred encoding when +btie or +poe2 is specified. +// Otherwise default back to plain "bti". They are aliases of each other. +// Check that disassembly when `btie` is not specified causes plain +// "bti" to be emitted. + +bti +bti r + +// CHECK: bti r // encoding: [0x1f,0x24,0x03,0xd5] +// CHECK: bti r // encoding: [0x1f,0x24,0x03,0xd5] + +// NOBTIE: bti // encoding: [0x1f,0x24,0x03,0xd5] +// NOBTIE: bti // encoding: [0x1f,0x24,0x03,0xd5] + +// HINT: hint #32 // encoding: [0x1f,0x24,0x03,0xd5] +// HINT: hint #32 // encoding: [0x1f,0x24,0x03,0xd5] + +hint #32 + +// CHECK: bti r // encoding: [0x1f,0x24,0x03,0xd5] +// NOBTIE: bti // encoding: [0x1f,0x24,0x03,0xd5] +// HINT: hint #32 // encoding: [0x1f,0x24,0x03,0xd5] diff --git a/llvm/test/MC/AArch64/arm-poe2-tlbid-diagnostics.s b/llvm/test/MC/AArch64/arm-poe2-tlbid-diagnostics.s new file mode 100644 index 0000000000000..c96978b7cf47e --- /dev/null +++ b/llvm/test/MC/AArch64/arm-poe2-tlbid-diagnostics.s @@ -0,0 +1,72 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+poe2 < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+poe2,+tlbid < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-NO-REGISTER + +// Test without using +tlbid - no optional register operand allowed + +plbi alle2is, x0 +// CHECK-ERROR: error: specified plbi op does not use a register + +plbi alle2os, x0 +// CHECK-ERROR: error: specified plbi op does not use a register + +plbi alle1is, x0 +// CHECK-ERROR: error: specified plbi op does not use a register + +plbi alle1os, x0 +// CHECK-ERROR: error: specified plbi op does not use a register + +plbi vmalle1is, x0 +// CHECK-ERROR: error: specified plbi op does not use a register + +plbi vmalle1os, x0 +// CHECK-ERROR: error: specified plbi op does not use a register + +plbi alle2isnxs, x0 +// CHECK-ERROR: error: specified plbi op does not use a register + +plbi alle2osnxs, x0 +// CHECK-ERROR: error: specified plbi op does not use a register + +plbi alle1isnxs, x0 +// CHECK-ERROR: error: specified plbi op does not use a register + +plbi alle1osnxs, x0 +// CHECK-ERROR: error: specified plbi op does not use a register + +plbi vmalle1isnxs, x0 +// CHECK-ERROR: error: specified plbi op does not use a register + +plbi vmalle1osnxs, x0 +// CHECK-ERROR: error: specified plbi op does not use a register + +// Tests where no optional register operand allowed +plbi alle2, x0 +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-NO-REGISTER: error: specified plbi op does not use a register + +plbi alle1, x0 +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-NO-REGISTER: error: specified plbi op does not use a register + +plbi vmalle1, x0 +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-NO-REGISTER: error: specified plbi op does not use a register + +plbi alle2nxs, x0 +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-NO-REGISTER: error: specified plbi op does not use a register + +plbi alle1nxs, x0 +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-NO-REGISTER: error: specified plbi op does not use a register + +plbi vmalle1nxs, x0 +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-NO-REGISTER: error: specified plbi op does not use a register + +plbi alle3, x0 +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-NO-REGISTER: error: specified plbi op does not use a register + diff --git a/llvm/test/MC/AArch64/arm-poe2-tlbid.s b/llvm/test/MC/AArch64/arm-poe2-tlbid.s new file mode 100644 index 0000000000000..e2d4c5ca99a58 --- /dev/null +++ b/llvm/test/MC/AArch64/arm-poe2-tlbid.s @@ -0,0 +1,87 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+tlbid,+poe2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+poe2 < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+tlbid,+poe2 < %s \ +// RUN: | llvm-objdump -d --mattr=+tlbid,+poe2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+tlbid,+poe2 < %s \ +// RUN: | llvm-objdump -d --mattr=-tlbid,-poe2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+tlbid,+poe2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+tlbid,+poe2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// FEAT_TLBID and POE2 combined + +plbi alle2is, x0 +// CHECK-INST: plbi alle2is, x0 +// CHECK-ENCODING: encoding: [0x00,0xa3,0x0c,0xd5] +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-UNKNOWN: d50ca300 sys #4, c10, c3, #0, x0 + +plbi alle2os, x0 +// CHECK-INST: plbi alle2os, x0 +// CHECK-ENCODING: encoding: [0x00,0xa1,0x0c,0xd5] +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-UNKNOWN: d50ca100 sys #4, c10, c1, #0, x0 + +plbi alle1is, x0 +// CHECK-INST: plbi alle1is, x0 +// CHECK-ENCODING: encoding: [0x80,0xa3,0x0c,0xd5] +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-UNKNOWN: d50ca380 sys #4, c10, c3, #4, x0 + +plbi alle1os, x0 +// CHECK-INST: plbi alle1os, x0 +// CHECK-ENCODING: encoding: [0x80,0xa1,0x0c,0xd5] +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-UNKNOWN: d50ca180 sys #4, c10, c1, #4, x0 + +plbi vmalle1is, x0 +// CHECK-INST: plbi vmalle1is, x0 +// CHECK-ENCODING: encoding: [0x00,0xa3,0x08,0xd5] +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-UNKNOWN: d508a300 sys #0, c10, c3, #0, x0 + +plbi vmalle1os, x0 +// CHECK-INST: plbi vmalle1os, x0 +// CHECK-ENCODING: encoding: [0x00,0xa1,0x08,0xd5] +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-UNKNOWN: d508a100 sys #0, c10, c1, #0, x0 + +plbi alle2isnxs, x0 +// CHECK-INST: plbi alle2isnxs, x0 +// CHECK-ENCODING: encoding: [0x00,0xab,0x0c,0xd5] +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-UNKNOWN: d50cab00 sys #4, c10, c11, #0, x0 + +plbi alle2osnxs, x0 +// CHECK-INST: plbi alle2osnxs, x0 +// CHECK-ENCODING: encoding: [0x00,0xa9,0x0c,0xd5] +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-UNKNOWN: d50ca900 sys #4, c10, c9, #0, x0 + +plbi alle1isnxs, x0 +// CHECK-INST: plbi alle1isnxs, x0 +// CHECK-ENCODING: encoding: [0x80,0xab,0x0c,0xd5] +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-UNKNOWN: d50cab80 sys #4, c10, c11, #4, x0 + +plbi alle1osnxs, x0 +// CHECK-INST: plbi alle1osnxs, x0 +// CHECK-ENCODING: encoding: [0x80,0xa9,0x0c,0xd5] +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-UNKNOWN: d50ca980 sys #4, c10, c9, #4, x0 + +plbi vmalle1isnxs, x0 +// CHECK-INST: plbi vmalle1isnxs, x0 +// CHECK-ENCODING: encoding: [0x00,0xab,0x08,0xd5] +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-UNKNOWN: d508ab00 sys #0, c10, c11, #0, x0 + +plbi vmalle1osnxs, x0 +// CHECK-INST: plbi vmalle1osnxs, x0 +// CHECK-ENCODING: encoding: [0x00,0xa9,0x08,0xd5] +// CHECK-ERROR: error: specified plbi op does not use a register +// CHECK-UNKNOWN: d508a900 sys #0, c10, c9, #0, x0 diff --git a/llvm/test/MC/AArch64/arm-poe2.s b/llvm/test/MC/AArch64/arm-poe2.s new file mode 100644 index 0000000000000..b9aa734cf5b68 --- /dev/null +++ b/llvm/test/MC/AArch64/arm-poe2.s @@ -0,0 +1,3263 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+poe2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+poe2 < %s \ +// RUN: | llvm-objdump -d --mattr=+poe2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+poe2 < %s \ +// RUN: | llvm-objdump -d --mattr=-poe2 --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+poe2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+poe2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +//------------------------------------------------------------------------------ +// Stage 1 Permission Overlays Extension 2 (FEAT_S1POE2). +//------------------------------------------------------------------------------ + + +mrs x0, TPIDR3_EL0 +// CHECK-INST: mrs x0, TPIDR3_EL0 +// CHECK-ENCODING: encoding: [0x00,0xd0,0x3b,0xd5] +// CHECK-UNKNOWN: d53bd000 mrs x0, TPIDR3_EL0 + +mrs x0, TPIDR3_EL1 +// CHECK-INST: mrs x0, TPIDR3_EL1 +// CHECK-ENCODING: encoding: [0x00,0xd0,0x38,0xd5] +// CHECK-UNKNOWN: d538d000 mrs x0, TPIDR3_EL1 + +mrs x0, TPIDR3_EL12 +// CHECK-INST: mrs x0, TPIDR3_EL12 +// CHECK-ENCODING: encoding: [0x00,0xd0,0x3d,0xd5] +// CHECK-UNKNOWN: d53dd000 mrs x0, TPIDR3_EL12 + +mrs x0, TPIDR3_EL2 +// CHECK-INST: mrs x0, TPIDR3_EL2 +// CHECK-ENCODING: encoding: [0x00,0xd0,0x3c,0xd5] +// CHECK-UNKNOWN: d53cd000 mrs x0, TPIDR3_EL2 + +mrs x0, TPIDR3_EL3 +// CHECK-INST: mrs x0, TPIDR3_EL3 +// CHECK-ENCODING: encoding: [0x00,0xd0,0x3e,0xd5] +// CHECK-UNKNOWN: d53ed000 mrs x0, TPIDR3_EL3 + +mrs x0, VNCCR_EL2 +// CHECK-INST: mrs x0, VNCCR_EL2 +// CHECK-ENCODING: encoding: [0x20,0x22,0x3c,0xd5] +// CHECK-UNKNOWN: d53c2220 mrs x0, VNCCR_EL2 + +mrs x0, DPOCR_EL0 +// CHECK-INST: mrs x0, DPOCR_EL0 +// CHECK-ENCODING: encoding: [0x40,0x45,0x3b,0xd5] +// CHECK-UNKNOWN: d53b4540 mrs x0, DPOCR_EL0 + +mrs x0, HCRMASK_EL2 +// CHECK-INST: mrs x0, HCRMASK_EL2 +// CHECK-ENCODING: encoding: [0xc0,0x15,0x3c,0xd5] +// CHECK-UNKNOWN: d53c15c0 mrs x0, HCRMASK_EL2 + +mrs x0, HCRXMASK_EL2 +// CHECK-INST: mrs x0, HCRXMASK_EL2 +// CHECK-ENCODING: encoding: [0xe0,0x15,0x3c,0xd5] +// CHECK-UNKNOWN: d53c15e0 mrs x0, HCRXMASK_EL2 + +mrs x0, HCR_EL2 +// CHECK-INST: mrs x0, HCR_EL2 +// CHECK-ENCODING: encoding: [0x00,0x11,0x3c,0xd5] +// CHECK-UNKNOWN: d53c1100 mrs x0, HCR_EL2 + +mrs x0, NVHCR_EL2 +// CHECK-INST: mrs x0, NVHCR_EL2 +// CHECK-ENCODING: encoding: [0x00,0x15,0x3c,0xd5] +// CHECK-UNKNOWN: d53c1500 mrs x0, NVHCR_EL2 + +mrs x0, NVHCRX_EL2 +// CHECK-INST: mrs x0, NVHCRX_EL2 +// CHECK-ENCODING: encoding: [0x20,0x15,0x3c,0xd5] +// CHECK-UNKNOWN: d53c1520 mrs x0, NVHCRX_EL2 + +mrs x0, NVHCRMASK_EL2 +// CHECK-INST: mrs x0, NVHCRMASK_EL2 +// CHECK-ENCODING: encoding: [0x80,0x15,0x3c,0xd5] +// CHECK-UNKNOWN: d53c1580 mrs x0, NVHCRMASK_EL2 + +mrs x0, NVHCRXMASK_EL2 +// CHECK-INST: mrs x0, NVHCRXMASK_EL2 +// CHECK-ENCODING: encoding: [0xa0,0x15,0x3c,0xd5] +// CHECK-UNKNOWN: d53c15a0 mrs x0, NVHCRXMASK_EL2 + +mrs x3, DPOTBR0_EL1 +// CHECK-INST: mrs x3, DPOTBR0_EL1 +// CHECK-ENCODING: encoding: [0xc3,0x20,0x38,0xd5] +// CHECK-UNKNOWN: d53820c3 mrs x3, DPOTBR0_EL1 + +mrs x3, DPOTBR0_EL12 +// CHECK-INST: mrs x3, DPOTBR0_EL12 +// CHECK-ENCODING: encoding: [0xc3,0x20,0x3d,0xd5] +// CHECK-UNKNOWN: d53d20c3 mrs x3, DPOTBR0_EL12 + +mrs x3, DPOTBR1_EL1 +// CHECK-INST: mrs x3, DPOTBR1_EL1 +// CHECK-ENCODING: encoding: [0xe3,0x20,0x38,0xd5] +// CHECK-UNKNOWN: d53820e3 mrs x3, DPOTBR1_EL1 + +mrs x3, DPOTBR1_EL12 +// CHECK-INST: mrs x3, DPOTBR1_EL12 +// CHECK-ENCODING: encoding: [0xe3,0x20,0x3d,0xd5] +// CHECK-UNKNOWN: d53d20e3 mrs x3, DPOTBR1_EL12 + +mrs x3, DPOTBR0_EL2 +// CHECK-INST: mrs x3, DPOTBR0_EL2 +// CHECK-ENCODING: encoding: [0xc3,0x20,0x3c,0xd5] +// CHECK-UNKNOWN: d53c20c3 mrs x3, DPOTBR0_EL2 + +mrs x3, DPOTBR1_EL2 +// CHECK-INST: mrs x3, DPOTBR1_EL2 +// CHECK-ENCODING: encoding: [0xe3,0x20,0x3c,0xd5] +// CHECK-UNKNOWN: d53c20e3 mrs x3, DPOTBR1_EL2 + +mrs x3, DPOTBR0_EL3 +// CHECK-INST: mrs x3, DPOTBR0_EL3 +// CHECK-ENCODING: encoding: [0xc3,0x20,0x3e,0xd5] +// CHECK-UNKNOWN: d53e20c3 mrs x3, DPOTBR0_EL3 + +mrs x3, IRTBRU_EL1 +// CHECK-INST: mrs x3, IRTBRU_EL1 +// CHECK-ENCODING: encoding: [0x83,0x20,0x38,0xd5] +// CHECK-UNKNOWN: d5382083 mrs x3, IRTBRU_EL1 + +mrs x3, IRTBRU_EL12 +// CHECK-INST: mrs x3, IRTBRU_EL12 +// CHECK-ENCODING: encoding: [0x83,0x20,0x3d,0xd5] +// CHECK-UNKNOWN: d53d2083 mrs x3, IRTBRU_EL12 + +mrs x3, IRTBRP_EL1 +// CHECK-INST: mrs x3, IRTBRP_EL1 +// CHECK-ENCODING: encoding: [0xa3,0x20,0x38,0xd5] +// CHECK-UNKNOWN: d53820a3 mrs x3, IRTBRP_EL1 + +mrs x3, IRTBRP_EL12 +// CHECK-INST: mrs x3, IRTBRP_EL12 +// CHECK-ENCODING: encoding: [0xa3,0x20,0x3d,0xd5] +// CHECK-UNKNOWN: d53d20a3 mrs x3, IRTBRP_EL12 + +mrs x3, IRTBRU_EL2 +// CHECK-INST: mrs x3, IRTBRU_EL2 +// CHECK-ENCODING: encoding: [0x83,0x20,0x3c,0xd5] +// CHECK-UNKNOWN: d53c2083 mrs x3, IRTBRU_EL2 + +mrs x3, IRTBRP_EL2 +// CHECK-INST: mrs x3, IRTBRP_EL2 +// CHECK-ENCODING: encoding: [0xa3,0x20,0x3c,0xd5] +// CHECK-UNKNOWN: d53c20a3 mrs x3, IRTBRP_EL2 + +mrs x3, IRTBRP_EL3 +// CHECK-INST: mrs x3, IRTBRP_EL3 +// CHECK-ENCODING: encoding: [0xa3,0x20,0x3e,0xd5] +// CHECK-UNKNOWN: d53e20a3 mrs x3, IRTBRP_EL3 + +mrs x3, TTTBRU_EL1 +// CHECK-INST: mrs x3, TTTBRU_EL1 +// CHECK-ENCODING: encoding: [0xc3,0xa2,0x38,0xd5] +// CHECK-UNKNOWN: d538a2c3 mrs x3, TTTBRU_EL1 + +mrs x3, TTTBRU_EL12 +// CHECK-INST: mrs x3, TTTBRU_EL12 +// CHECK-ENCODING: encoding: [0xc3,0xa2,0x3d,0xd5] +// CHECK-UNKNOWN: d53da2c3 mrs x3, TTTBRU_EL12 + +mrs x3, TTTBRP_EL1 +// CHECK-INST: mrs x3, TTTBRP_EL1 +// CHECK-ENCODING: encoding: [0xe3,0xa2,0x38,0xd5] +// CHECK-UNKNOWN: d538a2e3 mrs x3, TTTBRP_EL1 + +mrs x3, TTTBRP_EL12 +// CHECK-INST: mrs x3, TTTBRP_EL12 +// CHECK-ENCODING: encoding: [0xe3,0xa2,0x3d,0xd5] +// CHECK-UNKNOWN: d53da2e3 mrs x3, TTTBRP_EL12 + +mrs x3, TTTBRU_EL2 +// CHECK-INST: mrs x3, TTTBRU_EL2 +// CHECK-ENCODING: encoding: [0xc3,0xa2,0x3c,0xd5] +// CHECK-UNKNOWN: d53ca2c3 mrs x3, TTTBRU_EL2 + +mrs x3, TTTBRP_EL2 +// CHECK-INST: mrs x3, TTTBRP_EL2 +// CHECK-ENCODING: encoding: [0xe3,0xa2,0x3c,0xd5] +// CHECK-UNKNOWN: d53ca2e3 mrs x3, TTTBRP_EL2 + +mrs x3, TTTBRP_EL3 +// CHECK-INST: mrs x3, TTTBRP_EL3 +// CHECK-ENCODING: encoding: [0xe3,0xa2,0x3e,0xd5] +// CHECK-UNKNOWN: d53ea2e3 mrs x3, TTTBRP_EL3 + +mrs x3, LDSTT_EL1 +// CHECK-INST: mrs x3, LDSTT_EL1 +// CHECK-ENCODING: encoding: [0xe3,0x21,0x38,0xd5] +// CHECK-UNKNOWN: d53821e3 mrs x3, LDSTT_EL1 + +mrs x3, LDSTT_EL12 +// CHECK-INST: mrs x3, LDSTT_EL12 +// CHECK-ENCODING: encoding: [0xe3,0x21,0x3d,0xd5] +// CHECK-UNKNOWN: d53d21e3 mrs x3, LDSTT_EL12 + +mrs x3, LDSTT_EL2 +// CHECK-INST: mrs x3, LDSTT_EL2 +// CHECK-ENCODING: encoding: [0xe3,0x21,0x3c,0xd5] +// CHECK-UNKNOWN: d53c21e3 mrs x3, LDSTT_EL2 + +mrs x3, TINDEX_EL0 +// CHECK-INST: mrs x3, TINDEX_EL0 +// CHECK-ENCODING: encoding: [0x63,0x40,0x3b,0xd5] +// CHECK-UNKNOWN: d53b4063 mrs x3, TINDEX_EL0 + +mrs x3, TINDEX_EL1 +// CHECK-INST: mrs x3, TINDEX_EL1 +// CHECK-ENCODING: encoding: [0x63,0x40,0x38,0xd5] +// CHECK-UNKNOWN: d5384063 mrs x3, TINDEX_EL1 + +mrs x3, TINDEX_EL12 +// CHECK-INST: mrs x3, TINDEX_EL12 +// CHECK-ENCODING: encoding: [0x63,0x40,0x3d,0xd5] +// CHECK-UNKNOWN: d53d4063 mrs x3, TINDEX_EL12 + +mrs x3, TINDEX_EL2 +// CHECK-INST: mrs x3, TINDEX_EL2 +// CHECK-ENCODING: encoding: [0x63,0x40,0x3c,0xd5] +// CHECK-UNKNOWN: d53c4063 mrs x3, TINDEX_EL2 + +mrs x3, TINDEX_EL3 +// CHECK-INST: mrs x3, TINDEX_EL3 +// CHECK-ENCODING: encoding: [0x63,0x40,0x3e,0xd5] +// CHECK-UNKNOWN: d53e4063 mrs x3, TINDEX_EL3 + +mrs x3, STINDEX_EL1 +// CHECK-INST: mrs x3, STINDEX_EL1 +// CHECK-ENCODING: encoding: [0x43,0x40,0x38,0xd5] +// CHECK-UNKNOWN: d5384043 mrs x3, STINDEX_EL1 + +mrs x3, STINDEX_EL12 +// CHECK-INST: mrs x3, STINDEX_EL12 +// CHECK-ENCODING: encoding: [0x43,0x40,0x3d,0xd5] +// CHECK-UNKNOWN: d53d4043 mrs x3, STINDEX_EL12 + +mrs x3, STINDEX_EL2 +// CHECK-INST: mrs x3, STINDEX_EL2 +// CHECK-ENCODING: encoding: [0x43,0x40,0x3c,0xd5] +// CHECK-UNKNOWN: d53c4043 mrs x3, STINDEX_EL2 + +mrs x3, STINDEX_EL3 +// CHECK-INST: mrs x3, STINDEX_EL3 +// CHECK-ENCODING: encoding: [0x43,0x40,0x3e,0xd5] +// CHECK-UNKNOWN: d53e4043 mrs x3, STINDEX_EL3 + +mrs x3, FGDTP0_EL1 +// CHECK-INST: mrs x3, FGDTP0_EL1 +// CHECK-ENCODING: encoding: [0x03,0x32,0x38,0xd5] +// CHECK-UNKNOWN: d5383203 mrs x3, FGDTP0_EL1 + +mrs x3, FGDTU0_EL1 +// CHECK-INST: mrs x3, FGDTU0_EL1 +// CHECK-ENCODING: encoding: [0x03,0x34,0x38,0xd5] +// CHECK-UNKNOWN: d5383403 mrs x3, FGDTU0_EL1 + +mrs x3, FGDTP0_EL2 +// CHECK-INST: mrs x3, FGDTP0_EL2 +// CHECK-ENCODING: encoding: [0x03,0x32,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3203 mrs x3, FGDTP0_EL2 + +mrs x3, FGDTU0_EL2 +// CHECK-INST: mrs x3, FGDTU0_EL2 +// CHECK-ENCODING: encoding: [0x03,0x34,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3403 mrs x3, FGDTU0_EL2 + +mrs x3, FGDTP0_EL12 +// CHECK-INST: mrs x3, FGDTP0_EL12 +// CHECK-ENCODING: encoding: [0x03,0x32,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3203 mrs x3, FGDTP0_EL12 + +mrs x3, FGDTU0_EL12 +// CHECK-INST: mrs x3, FGDTU0_EL12 +// CHECK-ENCODING: encoding: [0x03,0x34,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3403 mrs x3, FGDTU0_EL12 + +mrs x3, FGDTP0_EL3 +// CHECK-INST: mrs x3, FGDTP0_EL3 +// CHECK-ENCODING: encoding: [0x03,0x32,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3203 mrs x3, FGDTP0_EL3 + +mrs x3, FGDTP1_EL1 +// CHECK-INST: mrs x3, FGDTP1_EL1 +// CHECK-ENCODING: encoding: [0x23,0x32,0x38,0xd5] +// CHECK-UNKNOWN: d5383223 mrs x3, FGDTP1_EL1 + +mrs x3, FGDTU1_EL1 +// CHECK-INST: mrs x3, FGDTU1_EL1 +// CHECK-ENCODING: encoding: [0x23,0x34,0x38,0xd5] +// CHECK-UNKNOWN: d5383423 mrs x3, FGDTU1_EL1 + +mrs x3, FGDTP1_EL2 +// CHECK-INST: mrs x3, FGDTP1_EL2 +// CHECK-ENCODING: encoding: [0x23,0x32,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3223 mrs x3, FGDTP1_EL2 + +mrs x3, FGDTU1_EL2 +// CHECK-INST: mrs x3, FGDTU1_EL2 +// CHECK-ENCODING: encoding: [0x23,0x34,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3423 mrs x3, FGDTU1_EL2 + +mrs x3, FGDTP1_EL12 +// CHECK-INST: mrs x3, FGDTP1_EL12 +// CHECK-ENCODING: encoding: [0x23,0x32,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3223 mrs x3, FGDTP1_EL12 + +mrs x3, FGDTU1_EL12 +// CHECK-INST: mrs x3, FGDTU1_EL12 +// CHECK-ENCODING: encoding: [0x23,0x34,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3423 mrs x3, FGDTU1_EL12 + +mrs x3, FGDTP1_EL3 +// CHECK-INST: mrs x3, FGDTP1_EL3 +// CHECK-ENCODING: encoding: [0x23,0x32,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3223 mrs x3, FGDTP1_EL3 + +mrs x3, FGDTP2_EL1 +// CHECK-INST: mrs x3, FGDTP2_EL1 +// CHECK-ENCODING: encoding: [0x43,0x32,0x38,0xd5] +// CHECK-UNKNOWN: d5383243 mrs x3, FGDTP2_EL1 + +mrs x3, FGDTU2_EL1 +// CHECK-INST: mrs x3, FGDTU2_EL1 +// CHECK-ENCODING: encoding: [0x43,0x34,0x38,0xd5] +// CHECK-UNKNOWN: d5383443 mrs x3, FGDTU2_EL1 + +mrs x3, FGDTP2_EL2 +// CHECK-INST: mrs x3, FGDTP2_EL2 +// CHECK-ENCODING: encoding: [0x43,0x32,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3243 mrs x3, FGDTP2_EL2 + +mrs x3, FGDTU2_EL2 +// CHECK-INST: mrs x3, FGDTU2_EL2 +// CHECK-ENCODING: encoding: [0x43,0x34,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3443 mrs x3, FGDTU2_EL2 + +mrs x3, FGDTP2_EL12 +// CHECK-INST: mrs x3, FGDTP2_EL12 +// CHECK-ENCODING: encoding: [0x43,0x32,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3243 mrs x3, FGDTP2_EL12 + +mrs x3, FGDTU2_EL12 +// CHECK-INST: mrs x3, FGDTU2_EL12 +// CHECK-ENCODING: encoding: [0x43,0x34,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3443 mrs x3, FGDTU2_EL12 + +mrs x3, FGDTP2_EL3 +// CHECK-INST: mrs x3, FGDTP2_EL3 +// CHECK-ENCODING: encoding: [0x43,0x32,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3243 mrs x3, FGDTP2_EL3 + +mrs x3, FGDTP3_EL1 +// CHECK-INST: mrs x3, FGDTP3_EL1 +// CHECK-ENCODING: encoding: [0x63,0x32,0x38,0xd5] +// CHECK-UNKNOWN: d5383263 mrs x3, FGDTP3_EL1 + +mrs x3, FGDTU3_EL1 +// CHECK-INST: mrs x3, FGDTU3_EL1 +// CHECK-ENCODING: encoding: [0x63,0x34,0x38,0xd5] +// CHECK-UNKNOWN: d5383463 mrs x3, FGDTU3_EL1 + +mrs x3, FGDTP3_EL2 +// CHECK-INST: mrs x3, FGDTP3_EL2 +// CHECK-ENCODING: encoding: [0x63,0x32,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3263 mrs x3, FGDTP3_EL2 + +mrs x3, FGDTU3_EL2 +// CHECK-INST: mrs x3, FGDTU3_EL2 +// CHECK-ENCODING: encoding: [0x63,0x34,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3463 mrs x3, FGDTU3_EL2 + +mrs x3, FGDTP3_EL12 +// CHECK-INST: mrs x3, FGDTP3_EL12 +// CHECK-ENCODING: encoding: [0x63,0x32,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3263 mrs x3, FGDTP3_EL12 + +mrs x3, FGDTU3_EL12 +// CHECK-INST: mrs x3, FGDTU3_EL12 +// CHECK-ENCODING: encoding: [0x63,0x34,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3463 mrs x3, FGDTU3_EL12 + +mrs x3, FGDTP3_EL3 +// CHECK-INST: mrs x3, FGDTP3_EL3 +// CHECK-ENCODING: encoding: [0x63,0x32,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3263 mrs x3, FGDTP3_EL3 + +mrs x3, FGDTP4_EL1 +// CHECK-INST: mrs x3, FGDTP4_EL1 +// CHECK-ENCODING: encoding: [0x83,0x32,0x38,0xd5] +// CHECK-UNKNOWN: d5383283 mrs x3, FGDTP4_EL1 + +mrs x3, FGDTU4_EL1 +// CHECK-INST: mrs x3, FGDTU4_EL1 +// CHECK-ENCODING: encoding: [0x83,0x34,0x38,0xd5] +// CHECK-UNKNOWN: d5383483 mrs x3, FGDTU4_EL1 + +mrs x3, FGDTP4_EL2 +// CHECK-INST: mrs x3, FGDTP4_EL2 +// CHECK-ENCODING: encoding: [0x83,0x32,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3283 mrs x3, FGDTP4_EL2 + +mrs x3, FGDTU4_EL2 +// CHECK-INST: mrs x3, FGDTU4_EL2 +// CHECK-ENCODING: encoding: [0x83,0x34,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3483 mrs x3, FGDTU4_EL2 + +mrs x3, FGDTP4_EL12 +// CHECK-INST: mrs x3, FGDTP4_EL12 +// CHECK-ENCODING: encoding: [0x83,0x32,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3283 mrs x3, FGDTP4_EL12 + +mrs x3, FGDTU4_EL12 +// CHECK-INST: mrs x3, FGDTU4_EL12 +// CHECK-ENCODING: encoding: [0x83,0x34,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3483 mrs x3, FGDTU4_EL12 + +mrs x3, FGDTP4_EL3 +// CHECK-INST: mrs x3, FGDTP4_EL3 +// CHECK-ENCODING: encoding: [0x83,0x32,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3283 mrs x3, FGDTP4_EL3 + +mrs x3, FGDTP5_EL1 +// CHECK-INST: mrs x3, FGDTP5_EL1 +// CHECK-ENCODING: encoding: [0xa3,0x32,0x38,0xd5] +// CHECK-UNKNOWN: d53832a3 mrs x3, FGDTP5_EL1 + +mrs x3, FGDTU5_EL1 +// CHECK-INST: mrs x3, FGDTU5_EL1 +// CHECK-ENCODING: encoding: [0xa3,0x34,0x38,0xd5] +// CHECK-UNKNOWN: d53834a3 mrs x3, FGDTU5_EL1 + +mrs x3, FGDTP5_EL2 +// CHECK-INST: mrs x3, FGDTP5_EL2 +// CHECK-ENCODING: encoding: [0xa3,0x32,0x3c,0xd5] +// CHECK-UNKNOWN: d53c32a3 mrs x3, FGDTP5_EL2 + +mrs x3, FGDTU5_EL2 +// CHECK-INST: mrs x3, FGDTU5_EL2 +// CHECK-ENCODING: encoding: [0xa3,0x34,0x3c,0xd5] +// CHECK-UNKNOWN: d53c34a3 mrs x3, FGDTU5_EL2 + +mrs x3, FGDTP5_EL12 +// CHECK-INST: mrs x3, FGDTP5_EL12 +// CHECK-ENCODING: encoding: [0xa3,0x32,0x3d,0xd5] +// CHECK-UNKNOWN: d53d32a3 mrs x3, FGDTP5_EL12 + +mrs x3, FGDTU5_EL12 +// CHECK-INST: mrs x3, FGDTU5_EL12 +// CHECK-ENCODING: encoding: [0xa3,0x34,0x3d,0xd5] +// CHECK-UNKNOWN: d53d34a3 mrs x3, FGDTU5_EL12 + +mrs x3, FGDTP5_EL3 +// CHECK-INST: mrs x3, FGDTP5_EL3 +// CHECK-ENCODING: encoding: [0xa3,0x32,0x3e,0xd5] +// CHECK-UNKNOWN: d53e32a3 mrs x3, FGDTP5_EL3 + +mrs x3, FGDTP6_EL1 +// CHECK-INST: mrs x3, FGDTP6_EL1 +// CHECK-ENCODING: encoding: [0xc3,0x32,0x38,0xd5] +// CHECK-UNKNOWN: d53832c3 mrs x3, FGDTP6_EL1 + +mrs x3, FGDTU6_EL1 +// CHECK-INST: mrs x3, FGDTU6_EL1 +// CHECK-ENCODING: encoding: [0xc3,0x34,0x38,0xd5] +// CHECK-UNKNOWN: d53834c3 mrs x3, FGDTU6_EL1 + +mrs x3, FGDTP6_EL2 +// CHECK-INST: mrs x3, FGDTP6_EL2 +// CHECK-ENCODING: encoding: [0xc3,0x32,0x3c,0xd5] +// CHECK-UNKNOWN: d53c32c3 mrs x3, FGDTP6_EL2 + +mrs x3, FGDTU6_EL2 +// CHECK-INST: mrs x3, FGDTU6_EL2 +// CHECK-ENCODING: encoding: [0xc3,0x34,0x3c,0xd5] +// CHECK-UNKNOWN: d53c34c3 mrs x3, FGDTU6_EL2 + +mrs x3, FGDTP6_EL12 +// CHECK-INST: mrs x3, FGDTP6_EL12 +// CHECK-ENCODING: encoding: [0xc3,0x32,0x3d,0xd5] +// CHECK-UNKNOWN: d53d32c3 mrs x3, FGDTP6_EL12 + +mrs x3, FGDTU6_EL12 +// CHECK-INST: mrs x3, FGDTU6_EL12 +// CHECK-ENCODING: encoding: [0xc3,0x34,0x3d,0xd5] +// CHECK-UNKNOWN: d53d34c3 mrs x3, FGDTU6_EL12 + +mrs x3, FGDTP6_EL3 +// CHECK-INST: mrs x3, FGDTP6_EL3 +// CHECK-ENCODING: encoding: [0xc3,0x32,0x3e,0xd5] +// CHECK-UNKNOWN: d53e32c3 mrs x3, FGDTP6_EL3 + +mrs x3, FGDTP7_EL1 +// CHECK-INST: mrs x3, FGDTP7_EL1 +// CHECK-ENCODING: encoding: [0xe3,0x32,0x38,0xd5] +// CHECK-UNKNOWN: d53832e3 mrs x3, FGDTP7_EL1 + +mrs x3, FGDTU7_EL1 +// CHECK-INST: mrs x3, FGDTU7_EL1 +// CHECK-ENCODING: encoding: [0xe3,0x34,0x38,0xd5] +// CHECK-UNKNOWN: d53834e3 mrs x3, FGDTU7_EL1 + +mrs x3, FGDTP7_EL2 +// CHECK-INST: mrs x3, FGDTP7_EL2 +// CHECK-ENCODING: encoding: [0xe3,0x32,0x3c,0xd5] +// CHECK-UNKNOWN: d53c32e3 mrs x3, FGDTP7_EL2 + +mrs x3, FGDTU7_EL2 +// CHECK-INST: mrs x3, FGDTU7_EL2 +// CHECK-ENCODING: encoding: [0xe3,0x34,0x3c,0xd5] +// CHECK-UNKNOWN: d53c34e3 mrs x3, FGDTU7_EL2 + +mrs x3, FGDTP7_EL12 +// CHECK-INST: mrs x3, FGDTP7_EL12 +// CHECK-ENCODING: encoding: [0xe3,0x32,0x3d,0xd5] +// CHECK-UNKNOWN: d53d32e3 mrs x3, FGDTP7_EL12 + +mrs x3, FGDTU7_EL12 +// CHECK-INST: mrs x3, FGDTU7_EL12 +// CHECK-ENCODING: encoding: [0xe3,0x34,0x3d,0xd5] +// CHECK-UNKNOWN: d53d34e3 mrs x3, FGDTU7_EL12 + +mrs x3, FGDTP7_EL3 +// CHECK-INST: mrs x3, FGDTP7_EL3 +// CHECK-ENCODING: encoding: [0xe3,0x32,0x3e,0xd5] +// CHECK-UNKNOWN: d53e32e3 mrs x3, FGDTP7_EL3 + +mrs x3, FGDTP8_EL1 +// CHECK-INST: mrs x3, FGDTP8_EL1 +// CHECK-ENCODING: encoding: [0x03,0x33,0x38,0xd5] +// CHECK-UNKNOWN: d5383303 mrs x3, FGDTP8_EL1 + +mrs x3, FGDTU8_EL1 +// CHECK-INST: mrs x3, FGDTU8_EL1 +// CHECK-ENCODING: encoding: [0x03,0x35,0x38,0xd5] +// CHECK-UNKNOWN: d5383503 mrs x3, FGDTU8_EL1 + +mrs x3, FGDTP8_EL2 +// CHECK-INST: mrs x3, FGDTP8_EL2 +// CHECK-ENCODING: encoding: [0x03,0x33,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3303 mrs x3, FGDTP8_EL2 + +mrs x3, FGDTU8_EL2 +// CHECK-INST: mrs x3, FGDTU8_EL2 +// CHECK-ENCODING: encoding: [0x03,0x35,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3503 mrs x3, FGDTU8_EL2 + +mrs x3, FGDTP8_EL12 +// CHECK-INST: mrs x3, FGDTP8_EL12 +// CHECK-ENCODING: encoding: [0x03,0x33,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3303 mrs x3, FGDTP8_EL12 + +mrs x3, FGDTU8_EL12 +// CHECK-INST: mrs x3, FGDTU8_EL12 +// CHECK-ENCODING: encoding: [0x03,0x35,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3503 mrs x3, FGDTU8_EL12 + +mrs x3, FGDTP8_EL3 +// CHECK-INST: mrs x3, FGDTP8_EL3 +// CHECK-ENCODING: encoding: [0x03,0x33,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3303 mrs x3, FGDTP8_EL3 + +mrs x3, FGDTP9_EL1 +// CHECK-INST: mrs x3, FGDTP9_EL1 +// CHECK-ENCODING: encoding: [0x23,0x33,0x38,0xd5] +// CHECK-UNKNOWN: d5383323 mrs x3, FGDTP9_EL1 + +mrs x3, FGDTU9_EL1 +// CHECK-INST: mrs x3, FGDTU9_EL1 +// CHECK-ENCODING: encoding: [0x23,0x35,0x38,0xd5] +// CHECK-UNKNOWN: d5383523 mrs x3, FGDTU9_EL1 + +mrs x3, FGDTP9_EL2 +// CHECK-INST: mrs x3, FGDTP9_EL2 +// CHECK-ENCODING: encoding: [0x23,0x33,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3323 mrs x3, FGDTP9_EL2 + +mrs x3, FGDTU9_EL2 +// CHECK-INST: mrs x3, FGDTU9_EL2 +// CHECK-ENCODING: encoding: [0x23,0x35,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3523 mrs x3, FGDTU9_EL2 + +mrs x3, FGDTP9_EL12 +// CHECK-INST: mrs x3, FGDTP9_EL12 +// CHECK-ENCODING: encoding: [0x23,0x33,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3323 mrs x3, FGDTP9_EL12 + +mrs x3, FGDTU9_EL12 +// CHECK-INST: mrs x3, FGDTU9_EL12 +// CHECK-ENCODING: encoding: [0x23,0x35,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3523 mrs x3, FGDTU9_EL12 + +mrs x3, FGDTP9_EL3 +// CHECK-INST: mrs x3, FGDTP9_EL3 +// CHECK-ENCODING: encoding: [0x23,0x33,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3323 mrs x3, FGDTP9_EL3 + +mrs x3, FGDTP10_EL1 +// CHECK-INST: mrs x3, FGDTP10_EL1 +// CHECK-ENCODING: encoding: [0x43,0x33,0x38,0xd5] +// CHECK-UNKNOWN: d5383343 mrs x3, FGDTP10_EL1 + +mrs x3, FGDTU10_EL1 +// CHECK-INST: mrs x3, FGDTU10_EL1 +// CHECK-ENCODING: encoding: [0x43,0x35,0x38,0xd5] +// CHECK-UNKNOWN: d5383543 mrs x3, FGDTU10_EL1 + +mrs x3, FGDTP10_EL2 +// CHECK-INST: mrs x3, FGDTP10_EL2 +// CHECK-ENCODING: encoding: [0x43,0x33,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3343 mrs x3, FGDTP10_EL2 + +mrs x3, FGDTU10_EL2 +// CHECK-INST: mrs x3, FGDTU10_EL2 +// CHECK-ENCODING: encoding: [0x43,0x35,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3543 mrs x3, FGDTU10_EL2 + +mrs x3, FGDTP10_EL12 +// CHECK-INST: mrs x3, FGDTP10_EL12 +// CHECK-ENCODING: encoding: [0x43,0x33,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3343 mrs x3, FGDTP10_EL12 + +mrs x3, FGDTU10_EL12 +// CHECK-INST: mrs x3, FGDTU10_EL12 +// CHECK-ENCODING: encoding: [0x43,0x35,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3543 mrs x3, FGDTU10_EL12 + +mrs x3, FGDTP10_EL3 +// CHECK-INST: mrs x3, FGDTP10_EL3 +// CHECK-ENCODING: encoding: [0x43,0x33,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3343 mrs x3, FGDTP10_EL3 + +mrs x3, FGDTP11_EL1 +// CHECK-INST: mrs x3, FGDTP11_EL1 +// CHECK-ENCODING: encoding: [0x63,0x33,0x38,0xd5] +// CHECK-UNKNOWN: d5383363 mrs x3, FGDTP11_EL1 + +mrs x3, FGDTU11_EL1 +// CHECK-INST: mrs x3, FGDTU11_EL1 +// CHECK-ENCODING: encoding: [0x63,0x35,0x38,0xd5] +// CHECK-UNKNOWN: d5383563 mrs x3, FGDTU11_EL1 + +mrs x3, FGDTP11_EL2 +// CHECK-INST: mrs x3, FGDTP11_EL2 +// CHECK-ENCODING: encoding: [0x63,0x33,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3363 mrs x3, FGDTP11_EL2 + +mrs x3, FGDTU11_EL2 +// CHECK-INST: mrs x3, FGDTU11_EL2 +// CHECK-ENCODING: encoding: [0x63,0x35,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3563 mrs x3, FGDTU11_EL2 + +mrs x3, FGDTP11_EL12 +// CHECK-INST: mrs x3, FGDTP11_EL12 +// CHECK-ENCODING: encoding: [0x63,0x33,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3363 mrs x3, FGDTP11_EL12 + +mrs x3, FGDTU11_EL12 +// CHECK-INST: mrs x3, FGDTU11_EL12 +// CHECK-ENCODING: encoding: [0x63,0x35,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3563 mrs x3, FGDTU11_EL12 + +mrs x3, FGDTP11_EL3 +// CHECK-INST: mrs x3, FGDTP11_EL3 +// CHECK-ENCODING: encoding: [0x63,0x33,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3363 mrs x3, FGDTP11_EL3 + +mrs x3, FGDTP12_EL1 +// CHECK-INST: mrs x3, FGDTP12_EL1 +// CHECK-ENCODING: encoding: [0x83,0x33,0x38,0xd5] +// CHECK-UNKNOWN: d5383383 mrs x3, FGDTP12_EL1 + +mrs x3, FGDTU12_EL1 +// CHECK-INST: mrs x3, FGDTU12_EL1 +// CHECK-ENCODING: encoding: [0x83,0x35,0x38,0xd5] +// CHECK-UNKNOWN: d5383583 mrs x3, FGDTU12_EL1 + +mrs x3, FGDTP12_EL2 +// CHECK-INST: mrs x3, FGDTP12_EL2 +// CHECK-ENCODING: encoding: [0x83,0x33,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3383 mrs x3, FGDTP12_EL2 + +mrs x3, FGDTU12_EL2 +// CHECK-INST: mrs x3, FGDTU12_EL2 +// CHECK-ENCODING: encoding: [0x83,0x35,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3583 mrs x3, FGDTU12_EL2 + +mrs x3, FGDTP12_EL12 +// CHECK-INST: mrs x3, FGDTP12_EL12 +// CHECK-ENCODING: encoding: [0x83,0x33,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3383 mrs x3, FGDTP12_EL12 + +mrs x3, FGDTU12_EL12 +// CHECK-INST: mrs x3, FGDTU12_EL12 +// CHECK-ENCODING: encoding: [0x83,0x35,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3583 mrs x3, FGDTU12_EL12 + +mrs x3, FGDTP12_EL3 +// CHECK-INST: mrs x3, FGDTP12_EL3 +// CHECK-ENCODING: encoding: [0x83,0x33,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3383 mrs x3, FGDTP12_EL3 + +mrs x3, FGDTP13_EL1 +// CHECK-INST: mrs x3, FGDTP13_EL1 +// CHECK-ENCODING: encoding: [0xa3,0x33,0x38,0xd5] +// CHECK-UNKNOWN: d53833a3 mrs x3, FGDTP13_EL1 + +mrs x3, FGDTU13_EL1 +// CHECK-INST: mrs x3, FGDTU13_EL1 +// CHECK-ENCODING: encoding: [0xa3,0x35,0x38,0xd5] +// CHECK-UNKNOWN: d53835a3 mrs x3, FGDTU13_EL1 + +mrs x3, FGDTP13_EL2 +// CHECK-INST: mrs x3, FGDTP13_EL2 +// CHECK-ENCODING: encoding: [0xa3,0x33,0x3c,0xd5] +// CHECK-UNKNOWN: d53c33a3 mrs x3, FGDTP13_EL2 + +mrs x3, FGDTU13_EL2 +// CHECK-INST: mrs x3, FGDTU13_EL2 +// CHECK-ENCODING: encoding: [0xa3,0x35,0x3c,0xd5] +// CHECK-UNKNOWN: d53c35a3 mrs x3, FGDTU13_EL2 + +mrs x3, FGDTP13_EL12 +// CHECK-INST: mrs x3, FGDTP13_EL12 +// CHECK-ENCODING: encoding: [0xa3,0x33,0x3d,0xd5] +// CHECK-UNKNOWN: d53d33a3 mrs x3, FGDTP13_EL12 + +mrs x3, FGDTU13_EL12 +// CHECK-INST: mrs x3, FGDTU13_EL12 +// CHECK-ENCODING: encoding: [0xa3,0x35,0x3d,0xd5] +// CHECK-UNKNOWN: d53d35a3 mrs x3, FGDTU13_EL12 + +mrs x3, FGDTP13_EL3 +// CHECK-INST: mrs x3, FGDTP13_EL3 +// CHECK-ENCODING: encoding: [0xa3,0x33,0x3e,0xd5] +// CHECK-UNKNOWN: d53e33a3 mrs x3, FGDTP13_EL3 + +mrs x3, FGDTP14_EL1 +// CHECK-INST: mrs x3, FGDTP14_EL1 +// CHECK-ENCODING: encoding: [0xc3,0x33,0x38,0xd5] +// CHECK-UNKNOWN: d53833c3 mrs x3, FGDTP14_EL1 + +mrs x3, FGDTU14_EL1 +// CHECK-INST: mrs x3, FGDTU14_EL1 +// CHECK-ENCODING: encoding: [0xc3,0x35,0x38,0xd5] +// CHECK-UNKNOWN: d53835c3 mrs x3, FGDTU14_EL1 + +mrs x3, FGDTP14_EL2 +// CHECK-INST: mrs x3, FGDTP14_EL2 +// CHECK-ENCODING: encoding: [0xc3,0x33,0x3c,0xd5] +// CHECK-UNKNOWN: d53c33c3 mrs x3, FGDTP14_EL2 + +mrs x3, FGDTU14_EL2 +// CHECK-INST: mrs x3, FGDTU14_EL2 +// CHECK-ENCODING: encoding: [0xc3,0x35,0x3c,0xd5] +// CHECK-UNKNOWN: d53c35c3 mrs x3, FGDTU14_EL2 + +mrs x3, FGDTP14_EL12 +// CHECK-INST: mrs x3, FGDTP14_EL12 +// CHECK-ENCODING: encoding: [0xc3,0x33,0x3d,0xd5] +// CHECK-UNKNOWN: d53d33c3 mrs x3, FGDTP14_EL12 + +mrs x3, FGDTU14_EL12 +// CHECK-INST: mrs x3, FGDTU14_EL12 +// CHECK-ENCODING: encoding: [0xc3,0x35,0x3d,0xd5] +// CHECK-UNKNOWN: d53d35c3 mrs x3, FGDTU14_EL12 + +mrs x3, FGDTP14_EL3 +// CHECK-INST: mrs x3, FGDTP14_EL3 +// CHECK-ENCODING: encoding: [0xc3,0x33,0x3e,0xd5] +// CHECK-UNKNOWN: d53e33c3 mrs x3, FGDTP14_EL3 + +mrs x3, FGDTP15_EL1 +// CHECK-INST: mrs x3, FGDTP15_EL1 +// CHECK-ENCODING: encoding: [0xe3,0x33,0x38,0xd5] +// CHECK-UNKNOWN: d53833e3 mrs x3, FGDTP15_EL1 + +mrs x3, FGDTU15_EL1 +// CHECK-INST: mrs x3, FGDTU15_EL1 +// CHECK-ENCODING: encoding: [0xe3,0x35,0x38,0xd5] +// CHECK-UNKNOWN: d53835e3 mrs x3, FGDTU15_EL1 + +mrs x3, FGDTP15_EL2 +// CHECK-INST: mrs x3, FGDTP15_EL2 +// CHECK-ENCODING: encoding: [0xe3,0x33,0x3c,0xd5] +// CHECK-UNKNOWN: d53c33e3 mrs x3, FGDTP15_EL2 + +mrs x3, FGDTU15_EL2 +// CHECK-INST: mrs x3, FGDTU15_EL2 +// CHECK-ENCODING: encoding: [0xe3,0x35,0x3c,0xd5] +// CHECK-UNKNOWN: d53c35e3 mrs x3, FGDTU15_EL2 + +mrs x3, FGDTP15_EL12 +// CHECK-INST: mrs x3, FGDTP15_EL12 +// CHECK-ENCODING: encoding: [0xe3,0x33,0x3d,0xd5] +// CHECK-UNKNOWN: d53d33e3 mrs x3, FGDTP15_EL12 + +mrs x3, FGDTU15_EL12 +// CHECK-INST: mrs x3, FGDTU15_EL12 +// CHECK-ENCODING: encoding: [0xe3,0x35,0x3d,0xd5] +// CHECK-UNKNOWN: d53d35e3 mrs x3, FGDTU15_EL12 + +mrs x3, FGDTP15_EL3 +// CHECK-INST: mrs x3, FGDTP15_EL3 +// CHECK-ENCODING: encoding: [0xe3,0x33,0x3e,0xd5] +// CHECK-UNKNOWN: d53e33e3 mrs x3, FGDTP15_EL3 + +mrs x0, AFGDTP0_EL1 +// CHECK-INST: mrs x0, AFGDTP0_EL1 +// CHECK-ENCODING: encoding: [0x00,0x36,0x38,0xd5] +// CHECK-UNKNOWN: d5383600 mrs x0, AFGDTP0_EL1 + +mrs x0, AFGDTU0_EL1 +// CHECK-INST: mrs x0, AFGDTU0_EL1 +// CHECK-ENCODING: encoding: [0x00,0x38,0x38,0xd5] +// CHECK-UNKNOWN: d5383800 mrs x0, AFGDTU0_EL1 + +mrs x0, AFGDTP0_EL2 +// CHECK-INST: mrs x0, AFGDTP0_EL2 +// CHECK-ENCODING: encoding: [0x00,0x36,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3600 mrs x0, AFGDTP0_EL2 + +mrs x0, AFGDTU0_EL2 +// CHECK-INST: mrs x0, AFGDTU0_EL2 +// CHECK-ENCODING: encoding: [0x00,0x38,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3800 mrs x0, AFGDTU0_EL2 + +mrs x0, AFGDTP0_EL12 +// CHECK-INST: mrs x0, AFGDTP0_EL12 +// CHECK-ENCODING: encoding: [0x00,0x36,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3600 mrs x0, AFGDTP0_EL12 + +mrs x0, AFGDTU0_EL12 +// CHECK-INST: mrs x0, AFGDTU0_EL12 +// CHECK-ENCODING: encoding: [0x00,0x38,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3800 mrs x0, AFGDTU0_EL12 + +mrs x0, AFGDTP0_EL3 +// CHECK-INST: mrs x0, AFGDTP0_EL3 +// CHECK-ENCODING: encoding: [0x00,0x36,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3600 mrs x0, AFGDTP0_EL3 + +mrs x0, AFGDTP1_EL1 +// CHECK-INST: mrs x0, AFGDTP1_EL1 +// CHECK-ENCODING: encoding: [0x20,0x36,0x38,0xd5] +// CHECK-UNKNOWN: d5383620 mrs x0, AFGDTP1_EL1 + +mrs x0, AFGDTU1_EL1 +// CHECK-INST: mrs x0, AFGDTU1_EL1 +// CHECK-ENCODING: encoding: [0x20,0x38,0x38,0xd5] +// CHECK-UNKNOWN: d5383820 mrs x0, AFGDTU1_EL1 + +mrs x0, AFGDTP1_EL2 +// CHECK-INST: mrs x0, AFGDTP1_EL2 +// CHECK-ENCODING: encoding: [0x20,0x36,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3620 mrs x0, AFGDTP1_EL2 + +mrs x0, AFGDTU1_EL2 +// CHECK-INST: mrs x0, AFGDTU1_EL2 +// CHECK-ENCODING: encoding: [0x20,0x38,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3820 mrs x0, AFGDTU1_EL2 + +mrs x0, AFGDTP1_EL12 +// CHECK-INST: mrs x0, AFGDTP1_EL12 +// CHECK-ENCODING: encoding: [0x20,0x36,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3620 mrs x0, AFGDTP1_EL12 + +mrs x0, AFGDTU1_EL12 +// CHECK-INST: mrs x0, AFGDTU1_EL12 +// CHECK-ENCODING: encoding: [0x20,0x38,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3820 mrs x0, AFGDTU1_EL12 + +mrs x0, AFGDTP1_EL3 +// CHECK-INST: mrs x0, AFGDTP1_EL3 +// CHECK-ENCODING: encoding: [0x20,0x36,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3620 mrs x0, AFGDTP1_EL3 + +mrs x0, AFGDTP2_EL1 +// CHECK-INST: mrs x0, AFGDTP2_EL1 +// CHECK-ENCODING: encoding: [0x40,0x36,0x38,0xd5] +// CHECK-UNKNOWN: d5383640 mrs x0, AFGDTP2_EL1 + +mrs x0, AFGDTU2_EL1 +// CHECK-INST: mrs x0, AFGDTU2_EL1 +// CHECK-ENCODING: encoding: [0x40,0x38,0x38,0xd5] +// CHECK-UNKNOWN: d5383840 mrs x0, AFGDTU2_EL1 + +mrs x0, AFGDTP2_EL2 +// CHECK-INST: mrs x0, AFGDTP2_EL2 +// CHECK-ENCODING: encoding: [0x40,0x36,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3640 mrs x0, AFGDTP2_EL2 + +mrs x0, AFGDTU2_EL2 +// CHECK-INST: mrs x0, AFGDTU2_EL2 +// CHECK-ENCODING: encoding: [0x40,0x38,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3840 mrs x0, AFGDTU2_EL2 + +mrs x0, AFGDTP2_EL12 +// CHECK-INST: mrs x0, AFGDTP2_EL12 +// CHECK-ENCODING: encoding: [0x40,0x36,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3640 mrs x0, AFGDTP2_EL12 + +mrs x0, AFGDTU2_EL12 +// CHECK-INST: mrs x0, AFGDTU2_EL12 +// CHECK-ENCODING: encoding: [0x40,0x38,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3840 mrs x0, AFGDTU2_EL12 + +mrs x0, AFGDTP2_EL3 +// CHECK-INST: mrs x0, AFGDTP2_EL3 +// CHECK-ENCODING: encoding: [0x40,0x36,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3640 mrs x0, AFGDTP2_EL3 + +mrs x0, AFGDTP3_EL1 +// CHECK-INST: mrs x0, AFGDTP3_EL1 +// CHECK-ENCODING: encoding: [0x60,0x36,0x38,0xd5] +// CHECK-UNKNOWN: d5383660 mrs x0, AFGDTP3_EL1 + +mrs x0, AFGDTU3_EL1 +// CHECK-INST: mrs x0, AFGDTU3_EL1 +// CHECK-ENCODING: encoding: [0x60,0x38,0x38,0xd5] +// CHECK-UNKNOWN: d5383860 mrs x0, AFGDTU3_EL1 + +mrs x0, AFGDTP3_EL2 +// CHECK-INST: mrs x0, AFGDTP3_EL2 +// CHECK-ENCODING: encoding: [0x60,0x36,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3660 mrs x0, AFGDTP3_EL2 + +mrs x0, AFGDTU3_EL2 +// CHECK-INST: mrs x0, AFGDTU3_EL2 +// CHECK-ENCODING: encoding: [0x60,0x38,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3860 mrs x0, AFGDTU3_EL2 + +mrs x0, AFGDTP3_EL12 +// CHECK-INST: mrs x0, AFGDTP3_EL12 +// CHECK-ENCODING: encoding: [0x60,0x36,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3660 mrs x0, AFGDTP3_EL12 + +mrs x0, AFGDTU3_EL12 +// CHECK-INST: mrs x0, AFGDTU3_EL12 +// CHECK-ENCODING: encoding: [0x60,0x38,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3860 mrs x0, AFGDTU3_EL12 + +mrs x0, AFGDTP3_EL3 +// CHECK-INST: mrs x0, AFGDTP3_EL3 +// CHECK-ENCODING: encoding: [0x60,0x36,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3660 mrs x0, AFGDTP3_EL3 + +mrs x0, AFGDTP4_EL1 +// CHECK-INST: mrs x0, AFGDTP4_EL1 +// CHECK-ENCODING: encoding: [0x80,0x36,0x38,0xd5] +// CHECK-UNKNOWN: d5383680 mrs x0, AFGDTP4_EL1 + +mrs x0, AFGDTU4_EL1 +// CHECK-INST: mrs x0, AFGDTU4_EL1 +// CHECK-ENCODING: encoding: [0x80,0x38,0x38,0xd5] +// CHECK-UNKNOWN: d5383880 mrs x0, AFGDTU4_EL1 + +mrs x0, AFGDTP4_EL2 +// CHECK-INST: mrs x0, AFGDTP4_EL2 +// CHECK-ENCODING: encoding: [0x80,0x36,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3680 mrs x0, AFGDTP4_EL2 + +mrs x0, AFGDTU4_EL2 +// CHECK-INST: mrs x0, AFGDTU4_EL2 +// CHECK-ENCODING: encoding: [0x80,0x38,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3880 mrs x0, AFGDTU4_EL2 + +mrs x0, AFGDTP4_EL12 +// CHECK-INST: mrs x0, AFGDTP4_EL12 +// CHECK-ENCODING: encoding: [0x80,0x36,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3680 mrs x0, AFGDTP4_EL12 + +mrs x0, AFGDTU4_EL12 +// CHECK-INST: mrs x0, AFGDTU4_EL12 +// CHECK-ENCODING: encoding: [0x80,0x38,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3880 mrs x0, AFGDTU4_EL12 + +mrs x0, AFGDTP4_EL3 +// CHECK-INST: mrs x0, AFGDTP4_EL3 +// CHECK-ENCODING: encoding: [0x80,0x36,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3680 mrs x0, AFGDTP4_EL3 + +mrs x0, AFGDTP5_EL1 +// CHECK-INST: mrs x0, AFGDTP5_EL1 +// CHECK-ENCODING: encoding: [0xa0,0x36,0x38,0xd5] +// CHECK-UNKNOWN: d53836a0 mrs x0, AFGDTP5_EL1 + +mrs x0, AFGDTU5_EL1 +// CHECK-INST: mrs x0, AFGDTU5_EL1 +// CHECK-ENCODING: encoding: [0xa0,0x38,0x38,0xd5] +// CHECK-UNKNOWN: d53838a0 mrs x0, AFGDTU5_EL1 + +mrs x0, AFGDTP5_EL2 +// CHECK-INST: mrs x0, AFGDTP5_EL2 +// CHECK-ENCODING: encoding: [0xa0,0x36,0x3c,0xd5] +// CHECK-UNKNOWN: d53c36a0 mrs x0, AFGDTP5_EL2 + +mrs x0, AFGDTU5_EL2 +// CHECK-INST: mrs x0, AFGDTU5_EL2 +// CHECK-ENCODING: encoding: [0xa0,0x38,0x3c,0xd5] +// CHECK-UNKNOWN: d53c38a0 mrs x0, AFGDTU5_EL2 + +mrs x0, AFGDTP5_EL12 +// CHECK-INST: mrs x0, AFGDTP5_EL12 +// CHECK-ENCODING: encoding: [0xa0,0x36,0x3d,0xd5] +// CHECK-UNKNOWN: d53d36a0 mrs x0, AFGDTP5_EL12 + +mrs x0, AFGDTU5_EL12 +// CHECK-INST: mrs x0, AFGDTU5_EL12 +// CHECK-ENCODING: encoding: [0xa0,0x38,0x3d,0xd5] +// CHECK-UNKNOWN: d53d38a0 mrs x0, AFGDTU5_EL12 + +mrs x0, AFGDTP5_EL3 +// CHECK-INST: mrs x0, AFGDTP5_EL3 +// CHECK-ENCODING: encoding: [0xa0,0x36,0x3e,0xd5] +// CHECK-UNKNOWN: d53e36a0 mrs x0, AFGDTP5_EL3 + +mrs x0, AFGDTP6_EL1 +// CHECK-INST: mrs x0, AFGDTP6_EL1 +// CHECK-ENCODING: encoding: [0xc0,0x36,0x38,0xd5] +// CHECK-UNKNOWN: d53836c0 mrs x0, AFGDTP6_EL1 + +mrs x0, AFGDTU6_EL1 +// CHECK-INST: mrs x0, AFGDTU6_EL1 +// CHECK-ENCODING: encoding: [0xc0,0x38,0x38,0xd5] +// CHECK-UNKNOWN: d53838c0 mrs x0, AFGDTU6_EL1 + +mrs x0, AFGDTP6_EL2 +// CHECK-INST: mrs x0, AFGDTP6_EL2 +// CHECK-ENCODING: encoding: [0xc0,0x36,0x3c,0xd5] +// CHECK-UNKNOWN: d53c36c0 mrs x0, AFGDTP6_EL2 + +mrs x0, AFGDTU6_EL2 +// CHECK-INST: mrs x0, AFGDTU6_EL2 +// CHECK-ENCODING: encoding: [0xc0,0x38,0x3c,0xd5] +// CHECK-UNKNOWN: d53c38c0 mrs x0, AFGDTU6_EL2 + +mrs x0, AFGDTP6_EL12 +// CHECK-INST: mrs x0, AFGDTP6_EL12 +// CHECK-ENCODING: encoding: [0xc0,0x36,0x3d,0xd5] +// CHECK-UNKNOWN: d53d36c0 mrs x0, AFGDTP6_EL12 + +mrs x0, AFGDTU6_EL12 +// CHECK-INST: mrs x0, AFGDTU6_EL12 +// CHECK-ENCODING: encoding: [0xc0,0x38,0x3d,0xd5] +// CHECK-UNKNOWN: d53d38c0 mrs x0, AFGDTU6_EL12 + +mrs x0, AFGDTP6_EL3 +// CHECK-INST: mrs x0, AFGDTP6_EL3 +// CHECK-ENCODING: encoding: [0xc0,0x36,0x3e,0xd5] +// CHECK-UNKNOWN: d53e36c0 mrs x0, AFGDTP6_EL3 + +mrs x0, AFGDTP7_EL1 +// CHECK-INST: mrs x0, AFGDTP7_EL1 +// CHECK-ENCODING: encoding: [0xe0,0x36,0x38,0xd5] +// CHECK-UNKNOWN: d53836e0 mrs x0, AFGDTP7_EL1 + +mrs x0, AFGDTU7_EL1 +// CHECK-INST: mrs x0, AFGDTU7_EL1 +// CHECK-ENCODING: encoding: [0xe0,0x38,0x38,0xd5] +// CHECK-UNKNOWN: d53838e0 mrs x0, AFGDTU7_EL1 + +mrs x0, AFGDTP7_EL2 +// CHECK-INST: mrs x0, AFGDTP7_EL2 +// CHECK-ENCODING: encoding: [0xe0,0x36,0x3c,0xd5] +// CHECK-UNKNOWN: d53c36e0 mrs x0, AFGDTP7_EL2 + +mrs x0, AFGDTU7_EL2 +// CHECK-INST: mrs x0, AFGDTU7_EL2 +// CHECK-ENCODING: encoding: [0xe0,0x38,0x3c,0xd5] +// CHECK-UNKNOWN: d53c38e0 mrs x0, AFGDTU7_EL2 + +mrs x0, AFGDTP7_EL12 +// CHECK-INST: mrs x0, AFGDTP7_EL12 +// CHECK-ENCODING: encoding: [0xe0,0x36,0x3d,0xd5] +// CHECK-UNKNOWN: d53d36e0 mrs x0, AFGDTP7_EL12 + +mrs x0, AFGDTU7_EL12 +// CHECK-INST: mrs x0, AFGDTU7_EL12 +// CHECK-ENCODING: encoding: [0xe0,0x38,0x3d,0xd5] +// CHECK-UNKNOWN: d53d38e0 mrs x0, AFGDTU7_EL12 + +mrs x0, AFGDTP7_EL3 +// CHECK-INST: mrs x0, AFGDTP7_EL3 +// CHECK-ENCODING: encoding: [0xe0,0x36,0x3e,0xd5] +// CHECK-UNKNOWN: d53e36e0 mrs x0, AFGDTP7_EL3 + +mrs x0, AFGDTP8_EL1 +// CHECK-INST: mrs x0, AFGDTP8_EL1 +// CHECK-ENCODING: encoding: [0x00,0x37,0x38,0xd5] +// CHECK-UNKNOWN: d5383700 mrs x0, AFGDTP8_EL1 + +mrs x0, AFGDTU8_EL1 +// CHECK-INST: mrs x0, AFGDTU8_EL1 +// CHECK-ENCODING: encoding: [0x00,0x39,0x38,0xd5] +// CHECK-UNKNOWN: d5383900 mrs x0, AFGDTU8_EL1 + +mrs x0, AFGDTP8_EL2 +// CHECK-INST: mrs x0, AFGDTP8_EL2 +// CHECK-ENCODING: encoding: [0x00,0x37,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3700 mrs x0, AFGDTP8_EL2 + +mrs x0, AFGDTU8_EL2 +// CHECK-INST: mrs x0, AFGDTU8_EL2 +// CHECK-ENCODING: encoding: [0x00,0x39,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3900 mrs x0, AFGDTU8_EL2 + +mrs x0, AFGDTP8_EL12 +// CHECK-INST: mrs x0, AFGDTP8_EL12 +// CHECK-ENCODING: encoding: [0x00,0x37,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3700 mrs x0, AFGDTP8_EL12 + +mrs x0, AFGDTU8_EL12 +// CHECK-INST: mrs x0, AFGDTU8_EL12 +// CHECK-ENCODING: encoding: [0x00,0x39,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3900 mrs x0, AFGDTU8_EL12 + +mrs x0, AFGDTP8_EL3 +// CHECK-INST: mrs x0, AFGDTP8_EL3 +// CHECK-ENCODING: encoding: [0x00,0x37,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3700 mrs x0, AFGDTP8_EL3 + +mrs x0, AFGDTP9_EL1 +// CHECK-INST: mrs x0, AFGDTP9_EL1 +// CHECK-ENCODING: encoding: [0x20,0x37,0x38,0xd5] +// CHECK-UNKNOWN: d5383720 mrs x0, AFGDTP9_EL1 + +mrs x0, AFGDTU9_EL1 +// CHECK-INST: mrs x0, AFGDTU9_EL1 +// CHECK-ENCODING: encoding: [0x20,0x39,0x38,0xd5] +// CHECK-UNKNOWN: d5383920 mrs x0, AFGDTU9_EL1 + +mrs x0, AFGDTP9_EL2 +// CHECK-INST: mrs x0, AFGDTP9_EL2 +// CHECK-ENCODING: encoding: [0x20,0x37,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3720 mrs x0, AFGDTP9_EL2 + +mrs x0, AFGDTU9_EL2 +// CHECK-INST: mrs x0, AFGDTU9_EL2 +// CHECK-ENCODING: encoding: [0x20,0x39,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3920 mrs x0, AFGDTU9_EL2 + +mrs x0, AFGDTP9_EL12 +// CHECK-INST: mrs x0, AFGDTP9_EL12 +// CHECK-ENCODING: encoding: [0x20,0x37,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3720 mrs x0, AFGDTP9_EL12 + +mrs x0, AFGDTU9_EL12 +// CHECK-INST: mrs x0, AFGDTU9_EL12 +// CHECK-ENCODING: encoding: [0x20,0x39,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3920 mrs x0, AFGDTU9_EL12 + +mrs x0, AFGDTP9_EL3 +// CHECK-INST: mrs x0, AFGDTP9_EL3 +// CHECK-ENCODING: encoding: [0x20,0x37,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3720 mrs x0, AFGDTP9_EL3 + +mrs x0, AFGDTP10_EL1 +// CHECK-INST: mrs x0, AFGDTP10_EL1 +// CHECK-ENCODING: encoding: [0x40,0x37,0x38,0xd5] +// CHECK-UNKNOWN: d5383740 mrs x0, AFGDTP10_EL1 + +mrs x0, AFGDTU10_EL1 +// CHECK-INST: mrs x0, AFGDTU10_EL1 +// CHECK-ENCODING: encoding: [0x40,0x39,0x38,0xd5] +// CHECK-UNKNOWN: d5383940 mrs x0, AFGDTU10_EL1 + +mrs x0, AFGDTP10_EL2 +// CHECK-INST: mrs x0, AFGDTP10_EL2 +// CHECK-ENCODING: encoding: [0x40,0x37,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3740 mrs x0, AFGDTP10_EL2 + +mrs x0, AFGDTU10_EL2 +// CHECK-INST: mrs x0, AFGDTU10_EL2 +// CHECK-ENCODING: encoding: [0x40,0x39,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3940 mrs x0, AFGDTU10_EL2 + +mrs x0, AFGDTP10_EL12 +// CHECK-INST: mrs x0, AFGDTP10_EL12 +// CHECK-ENCODING: encoding: [0x40,0x37,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3740 mrs x0, AFGDTP10_EL12 + +mrs x0, AFGDTU10_EL12 +// CHECK-INST: mrs x0, AFGDTU10_EL12 +// CHECK-ENCODING: encoding: [0x40,0x39,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3940 mrs x0, AFGDTU10_EL12 + +mrs x0, AFGDTP10_EL3 +// CHECK-INST: mrs x0, AFGDTP10_EL3 +// CHECK-ENCODING: encoding: [0x40,0x37,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3740 mrs x0, AFGDTP10_EL3 + +mrs x0, AFGDTP11_EL1 +// CHECK-INST: mrs x0, AFGDTP11_EL1 +// CHECK-ENCODING: encoding: [0x60,0x37,0x38,0xd5] +// CHECK-UNKNOWN: d5383760 mrs x0, AFGDTP11_EL1 + +mrs x0, AFGDTU11_EL1 +// CHECK-INST: mrs x0, AFGDTU11_EL1 +// CHECK-ENCODING: encoding: [0x60,0x39,0x38,0xd5] +// CHECK-UNKNOWN: d5383960 mrs x0, AFGDTU11_EL1 + +mrs x0, AFGDTP11_EL2 +// CHECK-INST: mrs x0, AFGDTP11_EL2 +// CHECK-ENCODING: encoding: [0x60,0x37,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3760 mrs x0, AFGDTP11_EL2 + +mrs x0, AFGDTU11_EL2 +// CHECK-INST: mrs x0, AFGDTU11_EL2 +// CHECK-ENCODING: encoding: [0x60,0x39,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3960 mrs x0, AFGDTU11_EL2 + +mrs x0, AFGDTP11_EL12 +// CHECK-INST: mrs x0, AFGDTP11_EL12 +// CHECK-ENCODING: encoding: [0x60,0x37,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3760 mrs x0, AFGDTP11_EL12 + +mrs x0, AFGDTU11_EL12 +// CHECK-INST: mrs x0, AFGDTU11_EL12 +// CHECK-ENCODING: encoding: [0x60,0x39,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3960 mrs x0, AFGDTU11_EL12 + +mrs x0, AFGDTP11_EL3 +// CHECK-INST: mrs x0, AFGDTP11_EL3 +// CHECK-ENCODING: encoding: [0x60,0x37,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3760 mrs x0, AFGDTP11_EL3 + +mrs x0, AFGDTP12_EL1 +// CHECK-INST: mrs x0, AFGDTP12_EL1 +// CHECK-ENCODING: encoding: [0x80,0x37,0x38,0xd5] +// CHECK-UNKNOWN: d5383780 mrs x0, AFGDTP12_EL1 + +mrs x0, AFGDTU12_EL1 +// CHECK-INST: mrs x0, AFGDTU12_EL1 +// CHECK-ENCODING: encoding: [0x80,0x39,0x38,0xd5] +// CHECK-UNKNOWN: d5383980 mrs x0, AFGDTU12_EL1 + +mrs x0, AFGDTP12_EL2 +// CHECK-INST: mrs x0, AFGDTP12_EL2 +// CHECK-ENCODING: encoding: [0x80,0x37,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3780 mrs x0, AFGDTP12_EL2 + +mrs x0, AFGDTU12_EL2 +// CHECK-INST: mrs x0, AFGDTU12_EL2 +// CHECK-ENCODING: encoding: [0x80,0x39,0x3c,0xd5] +// CHECK-UNKNOWN: d53c3980 mrs x0, AFGDTU12_EL2 + +mrs x0, AFGDTP12_EL12 +// CHECK-INST: mrs x0, AFGDTP12_EL12 +// CHECK-ENCODING: encoding: [0x80,0x37,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3780 mrs x0, AFGDTP12_EL12 + +mrs x0, AFGDTU12_EL12 +// CHECK-INST: mrs x0, AFGDTU12_EL12 +// CHECK-ENCODING: encoding: [0x80,0x39,0x3d,0xd5] +// CHECK-UNKNOWN: d53d3980 mrs x0, AFGDTU12_EL12 + +mrs x0, AFGDTP12_EL3 +// CHECK-INST: mrs x0, AFGDTP12_EL3 +// CHECK-ENCODING: encoding: [0x80,0x37,0x3e,0xd5] +// CHECK-UNKNOWN: d53e3780 mrs x0, AFGDTP12_EL3 + +mrs x0, AFGDTP13_EL1 +// CHECK-INST: mrs x0, AFGDTP13_EL1 +// CHECK-ENCODING: encoding: [0xa0,0x37,0x38,0xd5] +// CHECK-UNKNOWN: d53837a0 mrs x0, AFGDTP13_EL1 + +mrs x0, AFGDTU13_EL1 +// CHECK-INST: mrs x0, AFGDTU13_EL1 +// CHECK-ENCODING: encoding: [0xa0,0x39,0x38,0xd5] +// CHECK-UNKNOWN: d53839a0 mrs x0, AFGDTU13_EL1 + +mrs x0, AFGDTP13_EL2 +// CHECK-INST: mrs x0, AFGDTP13_EL2 +// CHECK-ENCODING: encoding: [0xa0,0x37,0x3c,0xd5] +// CHECK-UNKNOWN: d53c37a0 mrs x0, AFGDTP13_EL2 + +mrs x0, AFGDTU13_EL2 +// CHECK-INST: mrs x0, AFGDTU13_EL2 +// CHECK-ENCODING: encoding: [0xa0,0x39,0x3c,0xd5] +// CHECK-UNKNOWN: d53c39a0 mrs x0, AFGDTU13_EL2 + +mrs x0, AFGDTP13_EL12 +// CHECK-INST: mrs x0, AFGDTP13_EL12 +// CHECK-ENCODING: encoding: [0xa0,0x37,0x3d,0xd5] +// CHECK-UNKNOWN: d53d37a0 mrs x0, AFGDTP13_EL12 + +mrs x0, AFGDTU13_EL12 +// CHECK-INST: mrs x0, AFGDTU13_EL12 +// CHECK-ENCODING: encoding: [0xa0,0x39,0x3d,0xd5] +// CHECK-UNKNOWN: d53d39a0 mrs x0, AFGDTU13_EL12 + +mrs x0, AFGDTP13_EL3 +// CHECK-INST: mrs x0, AFGDTP13_EL3 +// CHECK-ENCODING: encoding: [0xa0,0x37,0x3e,0xd5] +// CHECK-UNKNOWN: d53e37a0 mrs x0, AFGDTP13_EL3 + +mrs x0, AFGDTP14_EL1 +// CHECK-INST: mrs x0, AFGDTP14_EL1 +// CHECK-ENCODING: encoding: [0xc0,0x37,0x38,0xd5] +// CHECK-UNKNOWN: d53837c0 mrs x0, AFGDTP14_EL1 + +mrs x0, AFGDTU14_EL1 +// CHECK-INST: mrs x0, AFGDTU14_EL1 +// CHECK-ENCODING: encoding: [0xc0,0x39,0x38,0xd5] +// CHECK-UNKNOWN: d53839c0 mrs x0, AFGDTU14_EL1 + +mrs x0, AFGDTP14_EL2 +// CHECK-INST: mrs x0, AFGDTP14_EL2 +// CHECK-ENCODING: encoding: [0xc0,0x37,0x3c,0xd5] +// CHECK-UNKNOWN: d53c37c0 mrs x0, AFGDTP14_EL2 + +mrs x0, AFGDTU14_EL2 +// CHECK-INST: mrs x0, AFGDTU14_EL2 +// CHECK-ENCODING: encoding: [0xc0,0x39,0x3c,0xd5] +// CHECK-UNKNOWN: d53c39c0 mrs x0, AFGDTU14_EL2 + +mrs x0, AFGDTP14_EL12 +// CHECK-INST: mrs x0, AFGDTP14_EL12 +// CHECK-ENCODING: encoding: [0xc0,0x37,0x3d,0xd5] +// CHECK-UNKNOWN: d53d37c0 mrs x0, AFGDTP14_EL12 + +mrs x0, AFGDTU14_EL12 +// CHECK-INST: mrs x0, AFGDTU14_EL12 +// CHECK-ENCODING: encoding: [0xc0,0x39,0x3d,0xd5] +// CHECK-UNKNOWN: d53d39c0 mrs x0, AFGDTU14_EL12 + +mrs x0, AFGDTP14_EL3 +// CHECK-INST: mrs x0, AFGDTP14_EL3 +// CHECK-ENCODING: encoding: [0xc0,0x37,0x3e,0xd5] +// CHECK-UNKNOWN: d53e37c0 mrs x0, AFGDTP14_EL3 + +mrs x0, AFGDTP15_EL1 +// CHECK-INST: mrs x0, AFGDTP15_EL1 +// CHECK-ENCODING: encoding: [0xe0,0x37,0x38,0xd5] +// CHECK-UNKNOWN: d53837e0 mrs x0, AFGDTP15_EL1 + +mrs x0, AFGDTU15_EL1 +// CHECK-INST: mrs x0, AFGDTU15_EL1 +// CHECK-ENCODING: encoding: [0xe0,0x39,0x38,0xd5] +// CHECK-UNKNOWN: d53839e0 mrs x0, AFGDTU15_EL1 + +mrs x0, AFGDTP15_EL2 +// CHECK-INST: mrs x0, AFGDTP15_EL2 +// CHECK-ENCODING: encoding: [0xe0,0x37,0x3c,0xd5] +// CHECK-UNKNOWN: d53c37e0 mrs x0, AFGDTP15_EL2 + +mrs x0, AFGDTU15_EL2 +// CHECK-INST: mrs x0, AFGDTU15_EL2 +// CHECK-ENCODING: encoding: [0xe0,0x39,0x3c,0xd5] +// CHECK-UNKNOWN: d53c39e0 mrs x0, AFGDTU15_EL2 + +mrs x0, AFGDTP15_EL12 +// CHECK-INST: mrs x0, AFGDTP15_EL12 +// CHECK-ENCODING: encoding: [0xe0,0x37,0x3d,0xd5] +// CHECK-UNKNOWN: d53d37e0 mrs x0, AFGDTP15_EL12 + +mrs x0, AFGDTU15_EL12 +// CHECK-INST: mrs x0, AFGDTU15_EL12 +// CHECK-ENCODING: encoding: [0xe0,0x39,0x3d,0xd5] +// CHECK-UNKNOWN: d53d39e0 mrs x0, AFGDTU15_EL12 + +mrs x0, AFGDTP15_EL3 +// CHECK-INST: mrs x0, AFGDTP15_EL3 +// CHECK-ENCODING: encoding: [0xe0,0x37,0x3e,0xd5] +// CHECK-UNKNOWN: d53e37e0 mrs x0, AFGDTP15_EL3 + +mrs x2, TPMIN0_EL0 +// CHECK-INST: mrs x2, TPMIN0_EL0 +// CHECK-ENCODING: encoding: [0x82,0x22,0x3b,0xd5] +// CHECK-UNKNOWN: d53b2282 mrs x2, TPMIN0_EL0 + +mrs x2, TPMAX0_EL0 +// CHECK-INST: mrs x2, TPMAX0_EL0 +// CHECK-ENCODING: encoding: [0xa2,0x22,0x3b,0xd5] +// CHECK-UNKNOWN: d53b22a2 mrs x2, TPMAX0_EL0 + +mrs x2, TPMIN1_EL0 +// CHECK-INST: mrs x2, TPMIN1_EL0 +// CHECK-ENCODING: encoding: [0xc2,0x22,0x3b,0xd5] +// CHECK-UNKNOWN: d53b22c2 mrs x2, TPMIN1_EL0 + +mrs x2, TPMAX1_EL0 +// CHECK-INST: mrs x2, TPMAX1_EL0 +// CHECK-ENCODING: encoding: [0xe2,0x22,0x3b,0xd5] +// CHECK-UNKNOWN: d53b22e2 mrs x2, TPMAX1_EL0 + +mrs x2, TPMIN0_EL1 +// CHECK-INST: mrs x2, TPMIN0_EL1 +// CHECK-ENCODING: encoding: [0x82,0x22,0x38,0xd5] +// CHECK-UNKNOWN: d5382282 mrs x2, TPMIN0_EL1 + +mrs x2, TPMAX0_EL1 +// CHECK-INST: mrs x2, TPMAX0_EL1 +// CHECK-ENCODING: encoding: [0xa2,0x22,0x38,0xd5] +// CHECK-UNKNOWN: d53822a2 mrs x2, TPMAX0_EL1 + +mrs x2, TPMIN1_EL1 +// CHECK-INST: mrs x2, TPMIN1_EL1 +// CHECK-ENCODING: encoding: [0xc2,0x22,0x38,0xd5] +// CHECK-UNKNOWN: d53822c2 mrs x2, TPMIN1_EL1 + +mrs x2, TPMAX1_EL1 +// CHECK-INST: mrs x2, TPMAX1_EL1 +// CHECK-ENCODING: encoding: [0xe2,0x22,0x38,0xd5] +// CHECK-UNKNOWN: d53822e2 mrs x2, TPMAX1_EL1 + +mrs x2, TPMIN0_EL2 +// CHECK-INST: mrs x2, TPMIN0_EL2 +// CHECK-ENCODING: encoding: [0x82,0x22,0x3c,0xd5] +// CHECK-UNKNOWN: d53c2282 mrs x2, TPMIN0_EL2 + +mrs x2, TPMAX0_EL2 +// CHECK-INST: mrs x2, TPMAX0_EL2 +// CHECK-ENCODING: encoding: [0xa2,0x22,0x3c,0xd5] +// CHECK-UNKNOWN: d53c22a2 mrs x2, TPMAX0_EL2 + +mrs x2, TPMIN1_EL2 +// CHECK-INST: mrs x2, TPMIN1_EL2 +// CHECK-ENCODING: encoding: [0xc2,0x22,0x3c,0xd5] +// CHECK-UNKNOWN: d53c22c2 mrs x2, TPMIN1_EL2 + +mrs x2, TPMAX1_EL2 +// CHECK-INST: mrs x2, TPMAX1_EL2 +// CHECK-ENCODING: encoding: [0xe2,0x22,0x3c,0xd5] +// CHECK-UNKNOWN: d53c22e2 mrs x2, TPMAX1_EL2 + +mrs x2, TPMIN0_EL12 +// CHECK-INST: mrs x2, TPMIN0_EL12 +// CHECK-ENCODING: encoding: [0x82,0x22,0x3d,0xd5] +// CHECK-UNKNOWN: d53d2282 mrs x2, TPMIN0_EL12 + +mrs x2, TPMAX0_EL12 +// CHECK-INST: mrs x2, TPMAX0_EL12 +// CHECK-ENCODING: encoding: [0xa2,0x22,0x3d,0xd5] +// CHECK-UNKNOWN: d53d22a2 mrs x2, TPMAX0_EL12 + +mrs x2, TPMIN1_EL12 +// CHECK-INST: mrs x2, TPMIN1_EL12 +// CHECK-ENCODING: encoding: [0xc2,0x22,0x3d,0xd5] +// CHECK-UNKNOWN: d53d22c2 mrs x2, TPMIN1_EL12 + +mrs x2, TPMAX1_EL12 +// CHECK-INST: mrs x2, TPMAX1_EL12 +// CHECK-ENCODING: encoding: [0xe2,0x22,0x3d,0xd5] +// CHECK-UNKNOWN: d53d22e2 mrs x2, TPMAX1_EL12 + + +msr TPIDR3_EL0, x0 +// CHECK-INST: msr TPIDR3_EL0, x0 +// CHECK-ENCODING: encoding: [0x00,0xd0,0x1b,0xd5] +// CHECK-UNKNOWN: d51bd000 msr TPIDR3_EL0, x0 + +msr TPIDR3_EL1, x0 +// CHECK-INST: msr TPIDR3_EL1, x0 +// CHECK-ENCODING: encoding: [0x00,0xd0,0x18,0xd5] +// CHECK-UNKNOWN: d518d000 msr TPIDR3_EL1, x0 + +msr TPIDR3_EL12, x0 +// CHECK-INST: msr TPIDR3_EL12, x0 +// CHECK-ENCODING: encoding: [0x00,0xd0,0x1d,0xd5] +// CHECK-UNKNOWN: d51dd000 msr TPIDR3_EL12, x0 + +msr TPIDR3_EL2, x0 +// CHECK-INST: msr TPIDR3_EL2, x0 +// CHECK-ENCODING: encoding: [0x00,0xd0,0x1c,0xd5] +// CHECK-UNKNOWN: d51cd000 msr TPIDR3_EL2, x0 + +msr TPIDR3_EL3, x0 +// CHECK-INST: msr TPIDR3_EL3, x0 +// CHECK-ENCODING: encoding: [0x00,0xd0,0x1e,0xd5] +// CHECK-UNKNOWN: d51ed000 msr TPIDR3_EL3, x0 + +msr VNCCR_EL2, x0 +// CHECK-INST: msr VNCCR_EL2, x0 +// CHECK-ENCODING: encoding: [0x20,0x22,0x1c,0xd5] +// CHECK-UNKNOWN: d51c2220 msr VNCCR_EL2, x0 + +msr DPOCR_EL0, x0 +// CHECK-INST: msr DPOCR_EL0, x0 +// CHECK-ENCODING: encoding: [0x40,0x45,0x1b,0xd5] +// CHECK-UNKNOWN: d51b4540 msr DPOCR_EL0, x0 + +msr HCRMASK_EL2, x0 +// CHECK-INST: msr HCRMASK_EL2, x0 +// CHECK-ENCODING: encoding: [0xc0,0x15,0x1c,0xd5] +// CHECK-UNKNOWN: d51c15c0 msr HCRMASK_EL2, x0 + +msr HCRXMASK_EL2, x0 +// CHECK-INST: msr HCRXMASK_EL2, x0 +// CHECK-ENCODING: encoding: [0xe0,0x15,0x1c,0xd5] +// CHECK-UNKNOWN: d51c15e0 msr HCRXMASK_EL2, x0 + +msr HCR_EL2, x0 +// CHECK-INST: msr HCR_EL2, x0 +// CHECK-ENCODING: encoding: [0x00,0x11,0x1c,0xd5] +// CHECK-UNKNOWN: d51c1100 msr HCR_EL2, x0 + +msr NVHCR_EL2, x0 +// CHECK-INST: msr NVHCR_EL2, x0 +// CHECK-ENCODING: encoding: [0x00,0x15,0x1c,0xd5] +// CHECK-UNKNOWN: d51c1500 msr NVHCR_EL2, x0 + +msr NVHCRX_EL2, x0 +// CHECK-INST: msr NVHCRX_EL2, x0 +// CHECK-ENCODING: encoding: [0x20,0x15,0x1c,0xd5] +// CHECK-UNKNOWN: d51c1520 msr NVHCRX_EL2, x0 + +msr NVHCRMASK_EL2, x0 +// CHECK-INST: msr NVHCRMASK_EL2, x0 +// CHECK-ENCODING: encoding: [0x80,0x15,0x1c,0xd5] +// CHECK-UNKNOWN: d51c1580 msr NVHCRMASK_EL2, x0 + +msr NVHCRXMASK_EL2, x0 +// CHECK-INST: msr NVHCRXMASK_EL2, x0 +// CHECK-ENCODING: encoding: [0xa0,0x15,0x1c,0xd5] +// CHECK-UNKNOWN: d51c15a0 msr NVHCRXMASK_EL2, x0 + +msr DPOTBR0_EL1, x3 +// CHECK-INST: msr DPOTBR0_EL1, x3 +// CHECK-ENCODING: encoding: [0xc3,0x20,0x18,0xd5] +// CHECK-UNKNOWN: d51820c3 msr DPOTBR0_EL1, x3 + +msr DPOTBR0_EL12, x3 +// CHECK-INST: msr DPOTBR0_EL12, x3 +// CHECK-ENCODING: encoding: [0xc3,0x20,0x1d,0xd5] +// CHECK-UNKNOWN: d51d20c3 msr DPOTBR0_EL12, x3 + +msr DPOTBR1_EL1, x3 +// CHECK-INST: msr DPOTBR1_EL1, x3 +// CHECK-ENCODING: encoding: [0xe3,0x20,0x18,0xd5] +// CHECK-UNKNOWN: d51820e3 msr DPOTBR1_EL1, x3 + +msr DPOTBR1_EL12, x3 +// CHECK-INST: msr DPOTBR1_EL12, x3 +// CHECK-ENCODING: encoding: [0xe3,0x20,0x1d,0xd5] +// CHECK-UNKNOWN: d51d20e3 msr DPOTBR1_EL12, x3 + +msr DPOTBR0_EL2, x3 +// CHECK-INST: msr DPOTBR0_EL2, x3 +// CHECK-ENCODING: encoding: [0xc3,0x20,0x1c,0xd5] +// CHECK-UNKNOWN: d51c20c3 msr DPOTBR0_EL2, x3 + +msr DPOTBR1_EL2, x3 +// CHECK-INST: msr DPOTBR1_EL2, x3 +// CHECK-ENCODING: encoding: [0xe3,0x20,0x1c,0xd5] +// CHECK-UNKNOWN: d51c20e3 msr DPOTBR1_EL2, x3 + +msr DPOTBR0_EL3, x3 +// CHECK-INST: msr DPOTBR0_EL3, x3 +// CHECK-ENCODING: encoding: [0xc3,0x20,0x1e,0xd5] +// CHECK-UNKNOWN: d51e20c3 msr DPOTBR0_EL3, x3 + +msr IRTBRU_EL1, x3 +// CHECK-INST: msr IRTBRU_EL1, x3 +// CHECK-ENCODING: encoding: [0x83,0x20,0x18,0xd5] +// CHECK-UNKNOWN: d5182083 msr IRTBRU_EL1, x3 + +msr IRTBRU_EL12, x3 +// CHECK-INST: msr IRTBRU_EL12, x3 +// CHECK-ENCODING: encoding: [0x83,0x20,0x1d,0xd5] +// CHECK-UNKNOWN: d51d2083 msr IRTBRU_EL12, x3 + +msr IRTBRP_EL1, x3 +// CHECK-INST: msr IRTBRP_EL1, x3 +// CHECK-ENCODING: encoding: [0xa3,0x20,0x18,0xd5] +// CHECK-UNKNOWN: d51820a3 msr IRTBRP_EL1, x3 + +msr IRTBRP_EL12, x3 +// CHECK-INST: msr IRTBRP_EL12, x3 +// CHECK-ENCODING: encoding: [0xa3,0x20,0x1d,0xd5] +// CHECK-UNKNOWN: d51d20a3 msr IRTBRP_EL12, x3 + +msr IRTBRU_EL2, x3 +// CHECK-INST: msr IRTBRU_EL2, x3 +// CHECK-ENCODING: encoding: [0x83,0x20,0x1c,0xd5] +// CHECK-UNKNOWN: d51c2083 msr IRTBRU_EL2, x3 + +msr IRTBRP_EL2, x3 +// CHECK-INST: msr IRTBRP_EL2, x3 +// CHECK-ENCODING: encoding: [0xa3,0x20,0x1c,0xd5] +// CHECK-UNKNOWN: d51c20a3 msr IRTBRP_EL2, x3 + +msr IRTBRP_EL3, x3 +// CHECK-INST: msr IRTBRP_EL3, x3 +// CHECK-ENCODING: encoding: [0xa3,0x20,0x1e,0xd5] +// CHECK-UNKNOWN: d51e20a3 msr IRTBRP_EL3, x3 + +msr TTTBRU_EL1, x3 +// CHECK-INST: msr TTTBRU_EL1, x3 +// CHECK-ENCODING: encoding: [0xc3,0xa2,0x18,0xd5] +// CHECK-UNKNOWN: d518a2c3 msr TTTBRU_EL1, x3 + +msr TTTBRU_EL12, x3 +// CHECK-INST: msr TTTBRU_EL12, x3 +// CHECK-ENCODING: encoding: [0xc3,0xa2,0x1d,0xd5] +// CHECK-UNKNOWN: d51da2c3 msr TTTBRU_EL12, x3 + +msr TTTBRP_EL1, x3 +// CHECK-INST: msr TTTBRP_EL1, x3 +// CHECK-ENCODING: encoding: [0xe3,0xa2,0x18,0xd5] +// CHECK-UNKNOWN: d518a2e3 msr TTTBRP_EL1, x3 + +msr TTTBRP_EL12, x3 +// CHECK-INST: msr TTTBRP_EL12, x3 +// CHECK-ENCODING: encoding: [0xe3,0xa2,0x1d,0xd5] +// CHECK-UNKNOWN: d51da2e3 msr TTTBRP_EL12, x3 + +msr TTTBRU_EL2, x3 +// CHECK-INST: msr TTTBRU_EL2, x3 +// CHECK-ENCODING: encoding: [0xc3,0xa2,0x1c,0xd5] +// CHECK-UNKNOWN: d51ca2c3 msr TTTBRU_EL2, x3 + +msr TTTBRP_EL2, x3 +// CHECK-INST: msr TTTBRP_EL2, x3 +// CHECK-ENCODING: encoding: [0xe3,0xa2,0x1c,0xd5] +// CHECK-UNKNOWN: d51ca2e3 msr TTTBRP_EL2, x3 + +msr TTTBRP_EL3, x3 +// CHECK-INST: msr TTTBRP_EL3, x3 +// CHECK-ENCODING: encoding: [0xe3,0xa2,0x1e,0xd5] +// CHECK-UNKNOWN: d51ea2e3 msr TTTBRP_EL3, x3 + +msr LDSTT_EL1, x3 +// CHECK-INST: msr LDSTT_EL1, x3 +// CHECK-ENCODING: encoding: [0xe3,0x21,0x18,0xd5] +// CHECK-UNKNOWN: d51821e3 msr LDSTT_EL1, x3 + +msr LDSTT_EL12, x3 +// CHECK-INST: msr LDSTT_EL12, x3 +// CHECK-ENCODING: encoding: [0xe3,0x21,0x1d,0xd5] +// CHECK-UNKNOWN: d51d21e3 msr LDSTT_EL12, x3 + +msr LDSTT_EL2, x3 +// CHECK-INST: msr LDSTT_EL2, x3 +// CHECK-ENCODING: encoding: [0xe3,0x21,0x1c,0xd5] +// CHECK-UNKNOWN: d51c21e3 msr LDSTT_EL2, x3 + +msr TINDEX_EL0, x3 +// CHECK-INST: msr TINDEX_EL0, x3 +// CHECK-ENCODING: encoding: [0x63,0x40,0x1b,0xd5] +// CHECK-UNKNOWN: d51b4063 msr TINDEX_EL0, x3 + +msr TINDEX_EL1, x3 +// CHECK-INST: msr TINDEX_EL1, x3 +// CHECK-ENCODING: encoding: [0x63,0x40,0x18,0xd5] +// CHECK-UNKNOWN: d5184063 msr TINDEX_EL1, x3 + +msr TINDEX_EL2, x3 +// CHECK-INST: msr TINDEX_EL2, x3 +// CHECK-ENCODING: encoding: [0x63,0x40,0x1c,0xd5] +// CHECK-UNKNOWN: d51c4063 msr TINDEX_EL2, x3 + +msr TINDEX_EL12, x3 +// CHECK-INST: msr TINDEX_EL12, x3 +// CHECK-ENCODING: encoding: [0x63,0x40,0x1d,0xd5] +// CHECK-UNKNOWN: d51d4063 msr TINDEX_EL12, x3 + +msr TINDEX_EL3, x3 +// CHECK-INST: msr TINDEX_EL3, x3 +// CHECK-ENCODING: encoding: [0x63,0x40,0x1e,0xd5] +// CHECK-UNKNOWN: d51e4063 msr TINDEX_EL3, x3 + +msr STINDEX_EL1, x3 +// CHECK-INST: msr STINDEX_EL1, x3 +// CHECK-ENCODING: encoding: [0x43,0x40,0x18,0xd5] +// CHECK-UNKNOWN: d5184043 msr STINDEX_EL1, x3 + +msr STINDEX_EL2, x3 +// CHECK-INST: msr STINDEX_EL2, x3 +// CHECK-ENCODING: encoding: [0x43,0x40,0x1c,0xd5] +// CHECK-UNKNOWN: d51c4043 msr STINDEX_EL2, x3 + +msr STINDEX_EL12, x3 +// CHECK-INST: msr STINDEX_EL12, x3 +// CHECK-ENCODING: encoding: [0x43,0x40,0x1d,0xd5] +// CHECK-UNKNOWN: d51d4043 msr STINDEX_EL12, x3 + +msr STINDEX_EL3, x3 +// CHECK-INST: msr STINDEX_EL3, x3 +// CHECK-ENCODING: encoding: [0x43,0x40,0x1e,0xd5] +// CHECK-UNKNOWN: d51e4043 msr STINDEX_EL3, x3 + +msr FGDTP0_EL1, x3 +// CHECK-INST: msr FGDTP0_EL1, x3 +// CHECK-ENCODING: encoding: [0x03,0x32,0x18,0xd5] +// CHECK-UNKNOWN: d5183203 msr FGDTP0_EL1, x3 + +msr FGDTU0_EL1, x3 +// CHECK-INST: msr FGDTU0_EL1, x3 +// CHECK-ENCODING: encoding: [0x03,0x34,0x18,0xd5] +// CHECK-UNKNOWN: d5183403 msr FGDTU0_EL1, x3 + +msr FGDTP0_EL2, x3 +// CHECK-INST: msr FGDTP0_EL2, x3 +// CHECK-ENCODING: encoding: [0x03,0x32,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3203 msr FGDTP0_EL2, x3 + +msr FGDTU0_EL2, x3 +// CHECK-INST: msr FGDTU0_EL2, x3 +// CHECK-ENCODING: encoding: [0x03,0x34,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3403 msr FGDTU0_EL2, x3 + +msr FGDTP0_EL12, x3 +// CHECK-INST: msr FGDTP0_EL12, x3 +// CHECK-ENCODING: encoding: [0x03,0x32,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3203 msr FGDTP0_EL12, x3 + +msr FGDTU0_EL12, x3 +// CHECK-INST: msr FGDTU0_EL12, x3 +// CHECK-ENCODING: encoding: [0x03,0x34,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3403 msr FGDTU0_EL12, x3 + +msr FGDTP0_EL3, x3 +// CHECK-INST: msr FGDTP0_EL3, x3 +// CHECK-ENCODING: encoding: [0x03,0x32,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3203 msr FGDTP0_EL3, x3 + +msr FGDTP1_EL1, x3 +// CHECK-INST: msr FGDTP1_EL1, x3 +// CHECK-ENCODING: encoding: [0x23,0x32,0x18,0xd5] +// CHECK-UNKNOWN: d5183223 msr FGDTP1_EL1, x3 + +msr FGDTU1_EL1, x3 +// CHECK-INST: msr FGDTU1_EL1, x3 +// CHECK-ENCODING: encoding: [0x23,0x34,0x18,0xd5] +// CHECK-UNKNOWN: d5183423 msr FGDTU1_EL1, x3 + +msr FGDTP1_EL2, x3 +// CHECK-INST: msr FGDTP1_EL2, x3 +// CHECK-ENCODING: encoding: [0x23,0x32,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3223 msr FGDTP1_EL2, x3 + +msr FGDTU1_EL2, x3 +// CHECK-INST: msr FGDTU1_EL2, x3 +// CHECK-ENCODING: encoding: [0x23,0x34,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3423 msr FGDTU1_EL2, x3 + +msr FGDTP1_EL12, x3 +// CHECK-INST: msr FGDTP1_EL12, x3 +// CHECK-ENCODING: encoding: [0x23,0x32,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3223 msr FGDTP1_EL12, x3 + +msr FGDTU1_EL12, x3 +// CHECK-INST: msr FGDTU1_EL12, x3 +// CHECK-ENCODING: encoding: [0x23,0x34,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3423 msr FGDTU1_EL12, x3 + +msr FGDTP1_EL3, x3 +// CHECK-INST: msr FGDTP1_EL3, x3 +// CHECK-ENCODING: encoding: [0x23,0x32,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3223 msr FGDTP1_EL3, x3 + +msr FGDTP2_EL1, x3 +// CHECK-INST: msr FGDTP2_EL1, x3 +// CHECK-ENCODING: encoding: [0x43,0x32,0x18,0xd5] +// CHECK-UNKNOWN: d5183243 msr FGDTP2_EL1, x3 + +msr FGDTU2_EL1, x3 +// CHECK-INST: msr FGDTU2_EL1, x3 +// CHECK-ENCODING: encoding: [0x43,0x34,0x18,0xd5] +// CHECK-UNKNOWN: d5183443 msr FGDTU2_EL1, x3 + +msr FGDTP2_EL2, x3 +// CHECK-INST: msr FGDTP2_EL2, x3 +// CHECK-ENCODING: encoding: [0x43,0x32,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3243 msr FGDTP2_EL2, x3 + +msr FGDTU2_EL2, x3 +// CHECK-INST: msr FGDTU2_EL2, x3 +// CHECK-ENCODING: encoding: [0x43,0x34,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3443 msr FGDTU2_EL2, x3 + +msr FGDTP2_EL12, x3 +// CHECK-INST: msr FGDTP2_EL12, x3 +// CHECK-ENCODING: encoding: [0x43,0x32,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3243 msr FGDTP2_EL12, x3 + +msr FGDTU2_EL12, x3 +// CHECK-INST: msr FGDTU2_EL12, x3 +// CHECK-ENCODING: encoding: [0x43,0x34,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3443 msr FGDTU2_EL12, x3 + +msr FGDTP2_EL3, x3 +// CHECK-INST: msr FGDTP2_EL3, x3 +// CHECK-ENCODING: encoding: [0x43,0x32,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3243 msr FGDTP2_EL3, x3 + +msr FGDTP3_EL1, x3 +// CHECK-INST: msr FGDTP3_EL1, x3 +// CHECK-ENCODING: encoding: [0x63,0x32,0x18,0xd5] +// CHECK-UNKNOWN: d5183263 msr FGDTP3_EL1, x3 + +msr FGDTU3_EL1, x3 +// CHECK-INST: msr FGDTU3_EL1, x3 +// CHECK-ENCODING: encoding: [0x63,0x34,0x18,0xd5] +// CHECK-UNKNOWN: d5183463 msr FGDTU3_EL1, x3 + +msr FGDTP3_EL2, x3 +// CHECK-INST: msr FGDTP3_EL2, x3 +// CHECK-ENCODING: encoding: [0x63,0x32,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3263 msr FGDTP3_EL2, x3 + +msr FGDTU3_EL2, x3 +// CHECK-INST: msr FGDTU3_EL2, x3 +// CHECK-ENCODING: encoding: [0x63,0x34,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3463 msr FGDTU3_EL2, x3 + +msr FGDTP3_EL12, x3 +// CHECK-INST: msr FGDTP3_EL12, x3 +// CHECK-ENCODING: encoding: [0x63,0x32,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3263 msr FGDTP3_EL12, x3 + +msr FGDTU3_EL12, x3 +// CHECK-INST: msr FGDTU3_EL12, x3 +// CHECK-ENCODING: encoding: [0x63,0x34,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3463 msr FGDTU3_EL12, x3 + +msr FGDTP3_EL3, x3 +// CHECK-INST: msr FGDTP3_EL3, x3 +// CHECK-ENCODING: encoding: [0x63,0x32,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3263 msr FGDTP3_EL3, x3 + +msr FGDTP4_EL1, x3 +// CHECK-INST: msr FGDTP4_EL1, x3 +// CHECK-ENCODING: encoding: [0x83,0x32,0x18,0xd5] +// CHECK-UNKNOWN: d5183283 msr FGDTP4_EL1, x3 + +msr FGDTU4_EL1, x3 +// CHECK-INST: msr FGDTU4_EL1, x3 +// CHECK-ENCODING: encoding: [0x83,0x34,0x18,0xd5] +// CHECK-UNKNOWN: d5183483 msr FGDTU4_EL1, x3 + +msr FGDTP4_EL2, x3 +// CHECK-INST: msr FGDTP4_EL2, x3 +// CHECK-ENCODING: encoding: [0x83,0x32,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3283 msr FGDTP4_EL2, x3 + +msr FGDTU4_EL2, x3 +// CHECK-INST: msr FGDTU4_EL2, x3 +// CHECK-ENCODING: encoding: [0x83,0x34,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3483 msr FGDTU4_EL2, x3 + +msr FGDTP4_EL12, x3 +// CHECK-INST: msr FGDTP4_EL12, x3 +// CHECK-ENCODING: encoding: [0x83,0x32,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3283 msr FGDTP4_EL12, x3 + +msr FGDTU4_EL12, x3 +// CHECK-INST: msr FGDTU4_EL12, x3 +// CHECK-ENCODING: encoding: [0x83,0x34,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3483 msr FGDTU4_EL12, x3 + +msr FGDTP4_EL3, x3 +// CHECK-INST: msr FGDTP4_EL3, x3 +// CHECK-ENCODING: encoding: [0x83,0x32,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3283 msr FGDTP4_EL3, x3 + +msr FGDTP5_EL1, x3 +// CHECK-INST: msr FGDTP5_EL1, x3 +// CHECK-ENCODING: encoding: [0xa3,0x32,0x18,0xd5] +// CHECK-UNKNOWN: d51832a3 msr FGDTP5_EL1, x3 + +msr FGDTU5_EL1, x3 +// CHECK-INST: msr FGDTU5_EL1, x3 +// CHECK-ENCODING: encoding: [0xa3,0x34,0x18,0xd5] +// CHECK-UNKNOWN: d51834a3 msr FGDTU5_EL1, x3 + +msr FGDTP5_EL2, x3 +// CHECK-INST: msr FGDTP5_EL2, x3 +// CHECK-ENCODING: encoding: [0xa3,0x32,0x1c,0xd5] +// CHECK-UNKNOWN: d51c32a3 msr FGDTP5_EL2, x3 + +msr FGDTU5_EL2, x3 +// CHECK-INST: msr FGDTU5_EL2, x3 +// CHECK-ENCODING: encoding: [0xa3,0x34,0x1c,0xd5] +// CHECK-UNKNOWN: d51c34a3 msr FGDTU5_EL2, x3 + +msr FGDTP5_EL12, x3 +// CHECK-INST: msr FGDTP5_EL12, x3 +// CHECK-ENCODING: encoding: [0xa3,0x32,0x1d,0xd5] +// CHECK-UNKNOWN: d51d32a3 msr FGDTP5_EL12, x3 + +msr FGDTU5_EL12, x3 +// CHECK-INST: msr FGDTU5_EL12, x3 +// CHECK-ENCODING: encoding: [0xa3,0x34,0x1d,0xd5] +// CHECK-UNKNOWN: d51d34a3 msr FGDTU5_EL12, x3 + +msr FGDTP5_EL3, x3 +// CHECK-INST: msr FGDTP5_EL3, x3 +// CHECK-ENCODING: encoding: [0xa3,0x32,0x1e,0xd5] +// CHECK-UNKNOWN: d51e32a3 msr FGDTP5_EL3, x3 + +msr FGDTP6_EL1, x3 +// CHECK-INST: msr FGDTP6_EL1, x3 +// CHECK-ENCODING: encoding: [0xc3,0x32,0x18,0xd5] +// CHECK-UNKNOWN: d51832c3 msr FGDTP6_EL1, x3 + +msr FGDTU6_EL1, x3 +// CHECK-INST: msr FGDTU6_EL1, x3 +// CHECK-ENCODING: encoding: [0xc3,0x34,0x18,0xd5] +// CHECK-UNKNOWN: d51834c3 msr FGDTU6_EL1, x3 + +msr FGDTP6_EL2, x3 +// CHECK-INST: msr FGDTP6_EL2, x3 +// CHECK-ENCODING: encoding: [0xc3,0x32,0x1c,0xd5] +// CHECK-UNKNOWN: d51c32c3 msr FGDTP6_EL2, x3 + +msr FGDTU6_EL2, x3 +// CHECK-INST: msr FGDTU6_EL2, x3 +// CHECK-ENCODING: encoding: [0xc3,0x34,0x1c,0xd5] +// CHECK-UNKNOWN: d51c34c3 msr FGDTU6_EL2, x3 + +msr FGDTP6_EL12, x3 +// CHECK-INST: msr FGDTP6_EL12, x3 +// CHECK-ENCODING: encoding: [0xc3,0x32,0x1d,0xd5] +// CHECK-UNKNOWN: d51d32c3 msr FGDTP6_EL12, x3 + +msr FGDTU6_EL12, x3 +// CHECK-INST: msr FGDTU6_EL12, x3 +// CHECK-ENCODING: encoding: [0xc3,0x34,0x1d,0xd5] +// CHECK-UNKNOWN: d51d34c3 msr FGDTU6_EL12, x3 + +msr FGDTP6_EL3, x3 +// CHECK-INST: msr FGDTP6_EL3, x3 +// CHECK-ENCODING: encoding: [0xc3,0x32,0x1e,0xd5] +// CHECK-UNKNOWN: d51e32c3 msr FGDTP6_EL3, x3 + +msr FGDTP7_EL1, x3 +// CHECK-INST: msr FGDTP7_EL1, x3 +// CHECK-ENCODING: encoding: [0xe3,0x32,0x18,0xd5] +// CHECK-UNKNOWN: d51832e3 msr FGDTP7_EL1, x3 + +msr FGDTU7_EL1, x3 +// CHECK-INST: msr FGDTU7_EL1, x3 +// CHECK-ENCODING: encoding: [0xe3,0x34,0x18,0xd5] +// CHECK-UNKNOWN: d51834e3 msr FGDTU7_EL1, x3 + +msr FGDTP7_EL2, x3 +// CHECK-INST: msr FGDTP7_EL2, x3 +// CHECK-ENCODING: encoding: [0xe3,0x32,0x1c,0xd5] +// CHECK-UNKNOWN: d51c32e3 msr FGDTP7_EL2, x3 + +msr FGDTU7_EL2, x3 +// CHECK-INST: msr FGDTU7_EL2, x3 +// CHECK-ENCODING: encoding: [0xe3,0x34,0x1c,0xd5] +// CHECK-UNKNOWN: d51c34e3 msr FGDTU7_EL2, x3 + +msr FGDTP7_EL12, x3 +// CHECK-INST: msr FGDTP7_EL12, x3 +// CHECK-ENCODING: encoding: [0xe3,0x32,0x1d,0xd5] +// CHECK-UNKNOWN: d51d32e3 msr FGDTP7_EL12, x3 + +msr FGDTU7_EL12, x3 +// CHECK-INST: msr FGDTU7_EL12, x3 +// CHECK-ENCODING: encoding: [0xe3,0x34,0x1d,0xd5] +// CHECK-UNKNOWN: d51d34e3 msr FGDTU7_EL12, x3 + +msr FGDTP7_EL3, x3 +// CHECK-INST: msr FGDTP7_EL3, x3 +// CHECK-ENCODING: encoding: [0xe3,0x32,0x1e,0xd5] +// CHECK-UNKNOWN: d51e32e3 msr FGDTP7_EL3, x3 + +msr FGDTP8_EL1, x3 +// CHECK-INST: msr FGDTP8_EL1, x3 +// CHECK-ENCODING: encoding: [0x03,0x33,0x18,0xd5] +// CHECK-UNKNOWN: d5183303 msr FGDTP8_EL1, x3 + +msr FGDTU8_EL1, x3 +// CHECK-INST: msr FGDTU8_EL1, x3 +// CHECK-ENCODING: encoding: [0x03,0x35,0x18,0xd5] +// CHECK-UNKNOWN: d5183503 msr FGDTU8_EL1, x3 + +msr FGDTP8_EL2, x3 +// CHECK-INST: msr FGDTP8_EL2, x3 +// CHECK-ENCODING: encoding: [0x03,0x33,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3303 msr FGDTP8_EL2, x3 + +msr FGDTU8_EL2, x3 +// CHECK-INST: msr FGDTU8_EL2, x3 +// CHECK-ENCODING: encoding: [0x03,0x35,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3503 msr FGDTU8_EL2, x3 + +msr FGDTP8_EL12, x3 +// CHECK-INST: msr FGDTP8_EL12, x3 +// CHECK-ENCODING: encoding: [0x03,0x33,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3303 msr FGDTP8_EL12, x3 + +msr FGDTU8_EL12, x3 +// CHECK-INST: msr FGDTU8_EL12, x3 +// CHECK-ENCODING: encoding: [0x03,0x35,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3503 msr FGDTU8_EL12, x3 + +msr FGDTP8_EL3, x3 +// CHECK-INST: msr FGDTP8_EL3, x3 +// CHECK-ENCODING: encoding: [0x03,0x33,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3303 msr FGDTP8_EL3, x3 + +msr FGDTP9_EL1, x3 +// CHECK-INST: msr FGDTP9_EL1, x3 +// CHECK-ENCODING: encoding: [0x23,0x33,0x18,0xd5] +// CHECK-UNKNOWN: d5183323 msr FGDTP9_EL1, x3 + +msr FGDTU9_EL1, x3 +// CHECK-INST: msr FGDTU9_EL1, x3 +// CHECK-ENCODING: encoding: [0x23,0x35,0x18,0xd5] +// CHECK-UNKNOWN: d5183523 msr FGDTU9_EL1, x3 + +msr FGDTP9_EL2, x3 +// CHECK-INST: msr FGDTP9_EL2, x3 +// CHECK-ENCODING: encoding: [0x23,0x33,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3323 msr FGDTP9_EL2, x3 + +msr FGDTU9_EL2, x3 +// CHECK-INST: msr FGDTU9_EL2, x3 +// CHECK-ENCODING: encoding: [0x23,0x35,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3523 msr FGDTU9_EL2, x3 + +msr FGDTP9_EL12, x3 +// CHECK-INST: msr FGDTP9_EL12, x3 +// CHECK-ENCODING: encoding: [0x23,0x33,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3323 msr FGDTP9_EL12, x3 + +msr FGDTU9_EL12, x3 +// CHECK-INST: msr FGDTU9_EL12, x3 +// CHECK-ENCODING: encoding: [0x23,0x35,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3523 msr FGDTU9_EL12, x3 + +msr FGDTP9_EL3, x3 +// CHECK-INST: msr FGDTP9_EL3, x3 +// CHECK-ENCODING: encoding: [0x23,0x33,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3323 msr FGDTP9_EL3, x3 + +msr FGDTP10_EL1, x3 +// CHECK-INST: msr FGDTP10_EL1, x3 +// CHECK-ENCODING: encoding: [0x43,0x33,0x18,0xd5] +// CHECK-UNKNOWN: d5183343 msr FGDTP10_EL1, x3 + +msr FGDTU10_EL1, x3 +// CHECK-INST: msr FGDTU10_EL1, x3 +// CHECK-ENCODING: encoding: [0x43,0x35,0x18,0xd5] +// CHECK-UNKNOWN: d5183543 msr FGDTU10_EL1, x3 + +msr FGDTP10_EL2, x3 +// CHECK-INST: msr FGDTP10_EL2, x3 +// CHECK-ENCODING: encoding: [0x43,0x33,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3343 msr FGDTP10_EL2, x3 + +msr FGDTU10_EL2, x3 +// CHECK-INST: msr FGDTU10_EL2, x3 +// CHECK-ENCODING: encoding: [0x43,0x35,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3543 msr FGDTU10_EL2, x3 + +msr FGDTP10_EL12, x3 +// CHECK-INST: msr FGDTP10_EL12, x3 +// CHECK-ENCODING: encoding: [0x43,0x33,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3343 msr FGDTP10_EL12, x3 + +msr FGDTU10_EL12, x3 +// CHECK-INST: msr FGDTU10_EL12, x3 +// CHECK-ENCODING: encoding: [0x43,0x35,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3543 msr FGDTU10_EL12, x3 + +msr FGDTP10_EL3, x3 +// CHECK-INST: msr FGDTP10_EL3, x3 +// CHECK-ENCODING: encoding: [0x43,0x33,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3343 msr FGDTP10_EL3, x3 + +msr FGDTP11_EL1, x3 +// CHECK-INST: msr FGDTP11_EL1, x3 +// CHECK-ENCODING: encoding: [0x63,0x33,0x18,0xd5] +// CHECK-UNKNOWN: d5183363 msr FGDTP11_EL1, x3 + +msr FGDTU11_EL1, x3 +// CHECK-INST: msr FGDTU11_EL1, x3 +// CHECK-ENCODING: encoding: [0x63,0x35,0x18,0xd5] +// CHECK-UNKNOWN: d5183563 msr FGDTU11_EL1, x3 + +msr FGDTP11_EL2, x3 +// CHECK-INST: msr FGDTP11_EL2, x3 +// CHECK-ENCODING: encoding: [0x63,0x33,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3363 msr FGDTP11_EL2, x3 + +msr FGDTU11_EL2, x3 +// CHECK-INST: msr FGDTU11_EL2, x3 +// CHECK-ENCODING: encoding: [0x63,0x35,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3563 msr FGDTU11_EL2, x3 + +msr FGDTP11_EL12, x3 +// CHECK-INST: msr FGDTP11_EL12, x3 +// CHECK-ENCODING: encoding: [0x63,0x33,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3363 msr FGDTP11_EL12, x3 + +msr FGDTU11_EL12, x3 +// CHECK-INST: msr FGDTU11_EL12, x3 +// CHECK-ENCODING: encoding: [0x63,0x35,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3563 msr FGDTU11_EL12, x3 + +msr FGDTP11_EL3, x3 +// CHECK-INST: msr FGDTP11_EL3, x3 +// CHECK-ENCODING: encoding: [0x63,0x33,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3363 msr FGDTP11_EL3, x3 + +msr FGDTP12_EL1, x3 +// CHECK-INST: msr FGDTP12_EL1, x3 +// CHECK-ENCODING: encoding: [0x83,0x33,0x18,0xd5] +// CHECK-UNKNOWN: d5183383 msr FGDTP12_EL1, x3 + +msr FGDTU12_EL1, x3 +// CHECK-INST: msr FGDTU12_EL1, x3 +// CHECK-ENCODING: encoding: [0x83,0x35,0x18,0xd5] +// CHECK-UNKNOWN: d5183583 msr FGDTU12_EL1, x3 + +msr FGDTP12_EL2, x3 +// CHECK-INST: msr FGDTP12_EL2, x3 +// CHECK-ENCODING: encoding: [0x83,0x33,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3383 msr FGDTP12_EL2, x3 + +msr FGDTU12_EL2, x3 +// CHECK-INST: msr FGDTU12_EL2, x3 +// CHECK-ENCODING: encoding: [0x83,0x35,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3583 msr FGDTU12_EL2, x3 + +msr FGDTP12_EL12, x3 +// CHECK-INST: msr FGDTP12_EL12, x3 +// CHECK-ENCODING: encoding: [0x83,0x33,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3383 msr FGDTP12_EL12, x3 + +msr FGDTU12_EL12, x3 +// CHECK-INST: msr FGDTU12_EL12, x3 +// CHECK-ENCODING: encoding: [0x83,0x35,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3583 msr FGDTU12_EL12, x3 + +msr FGDTP12_EL3, x3 +// CHECK-INST: msr FGDTP12_EL3, x3 +// CHECK-ENCODING: encoding: [0x83,0x33,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3383 msr FGDTP12_EL3, x3 + +msr FGDTP13_EL1, x3 +// CHECK-INST: msr FGDTP13_EL1, x3 +// CHECK-ENCODING: encoding: [0xa3,0x33,0x18,0xd5] +// CHECK-UNKNOWN: d51833a3 msr FGDTP13_EL1, x3 + +msr FGDTU13_EL1, x3 +// CHECK-INST: msr FGDTU13_EL1, x3 +// CHECK-ENCODING: encoding: [0xa3,0x35,0x18,0xd5] +// CHECK-UNKNOWN: d51835a3 msr FGDTU13_EL1, x3 + +msr FGDTP13_EL2, x3 +// CHECK-INST: msr FGDTP13_EL2, x3 +// CHECK-ENCODING: encoding: [0xa3,0x33,0x1c,0xd5] +// CHECK-UNKNOWN: d51c33a3 msr FGDTP13_EL2, x3 + +msr FGDTU13_EL2, x3 +// CHECK-INST: msr FGDTU13_EL2, x3 +// CHECK-ENCODING: encoding: [0xa3,0x35,0x1c,0xd5] +// CHECK-UNKNOWN: d51c35a3 msr FGDTU13_EL2, x3 + +msr FGDTP13_EL12, x3 +// CHECK-INST: msr FGDTP13_EL12, x3 +// CHECK-ENCODING: encoding: [0xa3,0x33,0x1d,0xd5] +// CHECK-UNKNOWN: d51d33a3 msr FGDTP13_EL12, x3 + +msr FGDTU13_EL12, x3 +// CHECK-INST: msr FGDTU13_EL12, x3 +// CHECK-ENCODING: encoding: [0xa3,0x35,0x1d,0xd5] +// CHECK-UNKNOWN: d51d35a3 msr FGDTU13_EL12, x3 + +msr FGDTP13_EL3, x3 +// CHECK-INST: msr FGDTP13_EL3, x3 +// CHECK-ENCODING: encoding: [0xa3,0x33,0x1e,0xd5] +// CHECK-UNKNOWN: d51e33a3 msr FGDTP13_EL3, x3 + +msr FGDTP14_EL1, x3 +// CHECK-INST: msr FGDTP14_EL1, x3 +// CHECK-ENCODING: encoding: [0xc3,0x33,0x18,0xd5] +// CHECK-UNKNOWN: d51833c3 msr FGDTP14_EL1, x3 + +msr FGDTU14_EL1, x3 +// CHECK-INST: msr FGDTU14_EL1, x3 +// CHECK-ENCODING: encoding: [0xc3,0x35,0x18,0xd5] +// CHECK-UNKNOWN: d51835c3 msr FGDTU14_EL1, x3 + +msr FGDTP14_EL2, x3 +// CHECK-INST: msr FGDTP14_EL2, x3 +// CHECK-ENCODING: encoding: [0xc3,0x33,0x1c,0xd5] +// CHECK-UNKNOWN: d51c33c3 msr FGDTP14_EL2, x3 + +msr FGDTU14_EL2, x3 +// CHECK-INST: msr FGDTU14_EL2, x3 +// CHECK-ENCODING: encoding: [0xc3,0x35,0x1c,0xd5] +// CHECK-UNKNOWN: d51c35c3 msr FGDTU14_EL2, x3 + +msr FGDTP14_EL12, x3 +// CHECK-INST: msr FGDTP14_EL12, x3 +// CHECK-ENCODING: encoding: [0xc3,0x33,0x1d,0xd5] +// CHECK-UNKNOWN: d51d33c3 msr FGDTP14_EL12, x3 + +msr FGDTU14_EL12, x3 +// CHECK-INST: msr FGDTU14_EL12, x3 +// CHECK-ENCODING: encoding: [0xc3,0x35,0x1d,0xd5] +// CHECK-UNKNOWN: d51d35c3 msr FGDTU14_EL12, x3 + +msr FGDTP14_EL3, x3 +// CHECK-INST: msr FGDTP14_EL3, x3 +// CHECK-ENCODING: encoding: [0xc3,0x33,0x1e,0xd5] +// CHECK-UNKNOWN: d51e33c3 msr FGDTP14_EL3, x3 + +msr FGDTP15_EL1, x3 +// CHECK-INST: msr FGDTP15_EL1, x3 +// CHECK-ENCODING: encoding: [0xe3,0x33,0x18,0xd5] +// CHECK-UNKNOWN: d51833e3 msr FGDTP15_EL1, x3 + +msr FGDTU15_EL1, x3 +// CHECK-INST: msr FGDTU15_EL1, x3 +// CHECK-ENCODING: encoding: [0xe3,0x35,0x18,0xd5] +// CHECK-UNKNOWN: d51835e3 msr FGDTU15_EL1, x3 + +msr FGDTP15_EL2, x3 +// CHECK-INST: msr FGDTP15_EL2, x3 +// CHECK-ENCODING: encoding: [0xe3,0x33,0x1c,0xd5] +// CHECK-UNKNOWN: d51c33e3 msr FGDTP15_EL2, x3 + +msr FGDTU15_EL2, x3 +// CHECK-INST: msr FGDTU15_EL2, x3 +// CHECK-ENCODING: encoding: [0xe3,0x35,0x1c,0xd5] +// CHECK-UNKNOWN: d51c35e3 msr FGDTU15_EL2, x3 + +msr FGDTP15_EL12, x3 +// CHECK-INST: msr FGDTP15_EL12, x3 +// CHECK-ENCODING: encoding: [0xe3,0x33,0x1d,0xd5] +// CHECK-UNKNOWN: d51d33e3 msr FGDTP15_EL12, x3 + +msr FGDTU15_EL12, x3 +// CHECK-INST: msr FGDTU15_EL12, x3 +// CHECK-ENCODING: encoding: [0xe3,0x35,0x1d,0xd5] +// CHECK-UNKNOWN: d51d35e3 msr FGDTU15_EL12, x3 + +msr FGDTP15_EL3, x3 +// CHECK-INST: msr FGDTP15_EL3, x3 +// CHECK-ENCODING: encoding: [0xe3,0x33,0x1e,0xd5] +// CHECK-UNKNOWN: d51e33e3 msr FGDTP15_EL3, x3 + +msr AFGDTP0_EL1, x0 +// CHECK-INST: msr AFGDTP0_EL1, x0 +// CHECK-ENCODING: encoding: [0x00,0x36,0x18,0xd5] +// CHECK-UNKNOWN: d5183600 msr AFGDTP0_EL1, x0 + +msr AFGDTU0_EL1, x0 +// CHECK-INST: msr AFGDTU0_EL1, x0 +// CHECK-ENCODING: encoding: [0x00,0x38,0x18,0xd5] +// CHECK-UNKNOWN: d5183800 msr AFGDTU0_EL1, x0 + +msr AFGDTP0_EL2, x0 +// CHECK-INST: msr AFGDTP0_EL2, x0 +// CHECK-ENCODING: encoding: [0x00,0x36,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3600 msr AFGDTP0_EL2, x0 + +msr AFGDTU0_EL2, x0 +// CHECK-INST: msr AFGDTU0_EL2, x0 +// CHECK-ENCODING: encoding: [0x00,0x38,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3800 msr AFGDTU0_EL2, x0 + +msr AFGDTP0_EL12, x0 +// CHECK-INST: msr AFGDTP0_EL12, x0 +// CHECK-ENCODING: encoding: [0x00,0x36,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3600 msr AFGDTP0_EL12, x0 + +msr AFGDTU0_EL12, x0 +// CHECK-INST: msr AFGDTU0_EL12, x0 +// CHECK-ENCODING: encoding: [0x00,0x38,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3800 msr AFGDTU0_EL12, x0 + +msr AFGDTP0_EL3, x0 +// CHECK-INST: msr AFGDTP0_EL3, x0 +// CHECK-ENCODING: encoding: [0x00,0x36,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3600 msr AFGDTP0_EL3, x0 + +msr AFGDTP1_EL1, x0 +// CHECK-INST: msr AFGDTP1_EL1, x0 +// CHECK-ENCODING: encoding: [0x20,0x36,0x18,0xd5] +// CHECK-UNKNOWN: d5183620 msr AFGDTP1_EL1, x0 + +msr AFGDTU1_EL1, x0 +// CHECK-INST: msr AFGDTU1_EL1, x0 +// CHECK-ENCODING: encoding: [0x20,0x38,0x18,0xd5] +// CHECK-UNKNOWN: d5183820 msr AFGDTU1_EL1, x0 + +msr AFGDTP1_EL2, x0 +// CHECK-INST: msr AFGDTP1_EL2, x0 +// CHECK-ENCODING: encoding: [0x20,0x36,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3620 msr AFGDTP1_EL2, x0 + +msr AFGDTU1_EL2, x0 +// CHECK-INST: msr AFGDTU1_EL2, x0 +// CHECK-ENCODING: encoding: [0x20,0x38,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3820 msr AFGDTU1_EL2, x0 + +msr AFGDTP1_EL12, x0 +// CHECK-INST: msr AFGDTP1_EL12, x0 +// CHECK-ENCODING: encoding: [0x20,0x36,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3620 msr AFGDTP1_EL12, x0 + +msr AFGDTU1_EL12, x0 +// CHECK-INST: msr AFGDTU1_EL12, x0 +// CHECK-ENCODING: encoding: [0x20,0x38,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3820 msr AFGDTU1_EL12, x0 + +msr AFGDTP1_EL3, x0 +// CHECK-INST: msr AFGDTP1_EL3, x0 +// CHECK-ENCODING: encoding: [0x20,0x36,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3620 msr AFGDTP1_EL3, x0 + +msr AFGDTP2_EL1, x0 +// CHECK-INST: msr AFGDTP2_EL1, x0 +// CHECK-ENCODING: encoding: [0x40,0x36,0x18,0xd5] +// CHECK-UNKNOWN: d5183640 msr AFGDTP2_EL1, x0 + +msr AFGDTU2_EL1, x0 +// CHECK-INST: msr AFGDTU2_EL1, x0 +// CHECK-ENCODING: encoding: [0x40,0x38,0x18,0xd5] +// CHECK-UNKNOWN: d5183840 msr AFGDTU2_EL1, x0 + +msr AFGDTP2_EL2, x0 +// CHECK-INST: msr AFGDTP2_EL2, x0 +// CHECK-ENCODING: encoding: [0x40,0x36,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3640 msr AFGDTP2_EL2, x0 + +msr AFGDTU2_EL2, x0 +// CHECK-INST: msr AFGDTU2_EL2, x0 +// CHECK-ENCODING: encoding: [0x40,0x38,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3840 msr AFGDTU2_EL2, x0 + +msr AFGDTP2_EL12, x0 +// CHECK-INST: msr AFGDTP2_EL12, x0 +// CHECK-ENCODING: encoding: [0x40,0x36,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3640 msr AFGDTP2_EL12, x0 + +msr AFGDTU2_EL12, x0 +// CHECK-INST: msr AFGDTU2_EL12, x0 +// CHECK-ENCODING: encoding: [0x40,0x38,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3840 msr AFGDTU2_EL12, x0 + +msr AFGDTP2_EL3, x0 +// CHECK-INST: msr AFGDTP2_EL3, x0 +// CHECK-ENCODING: encoding: [0x40,0x36,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3640 msr AFGDTP2_EL3, x0 + +msr AFGDTP3_EL1, x0 +// CHECK-INST: msr AFGDTP3_EL1, x0 +// CHECK-ENCODING: encoding: [0x60,0x36,0x18,0xd5] +// CHECK-UNKNOWN: d5183660 msr AFGDTP3_EL1, x0 + +msr AFGDTU3_EL1, x0 +// CHECK-INST: msr AFGDTU3_EL1, x0 +// CHECK-ENCODING: encoding: [0x60,0x38,0x18,0xd5] +// CHECK-UNKNOWN: d5183860 msr AFGDTU3_EL1, x0 + +msr AFGDTP3_EL2, x0 +// CHECK-INST: msr AFGDTP3_EL2, x0 +// CHECK-ENCODING: encoding: [0x60,0x36,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3660 msr AFGDTP3_EL2, x0 + +msr AFGDTU3_EL2, x0 +// CHECK-INST: msr AFGDTU3_EL2, x0 +// CHECK-ENCODING: encoding: [0x60,0x38,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3860 msr AFGDTU3_EL2, x0 + +msr AFGDTP3_EL12, x0 +// CHECK-INST: msr AFGDTP3_EL12, x0 +// CHECK-ENCODING: encoding: [0x60,0x36,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3660 msr AFGDTP3_EL12, x0 + +msr AFGDTU3_EL12, x0 +// CHECK-INST: msr AFGDTU3_EL12, x0 +// CHECK-ENCODING: encoding: [0x60,0x38,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3860 msr AFGDTU3_EL12, x0 + +msr AFGDTP3_EL3, x0 +// CHECK-INST: msr AFGDTP3_EL3, x0 +// CHECK-ENCODING: encoding: [0x60,0x36,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3660 msr AFGDTP3_EL3, x0 + +msr AFGDTP4_EL1, x0 +// CHECK-INST: msr AFGDTP4_EL1, x0 +// CHECK-ENCODING: encoding: [0x80,0x36,0x18,0xd5] +// CHECK-UNKNOWN: d5183680 msr AFGDTP4_EL1, x0 + +msr AFGDTU4_EL1, x0 +// CHECK-INST: msr AFGDTU4_EL1, x0 +// CHECK-ENCODING: encoding: [0x80,0x38,0x18,0xd5] +// CHECK-UNKNOWN: d5183880 msr AFGDTU4_EL1, x0 + +msr AFGDTP4_EL2, x0 +// CHECK-INST: msr AFGDTP4_EL2, x0 +// CHECK-ENCODING: encoding: [0x80,0x36,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3680 msr AFGDTP4_EL2, x0 + +msr AFGDTU4_EL2, x0 +// CHECK-INST: msr AFGDTU4_EL2, x0 +// CHECK-ENCODING: encoding: [0x80,0x38,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3880 msr AFGDTU4_EL2, x0 + +msr AFGDTP4_EL12, x0 +// CHECK-INST: msr AFGDTP4_EL12, x0 +// CHECK-ENCODING: encoding: [0x80,0x36,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3680 msr AFGDTP4_EL12, x0 + +msr AFGDTU4_EL12, x0 +// CHECK-INST: msr AFGDTU4_EL12, x0 +// CHECK-ENCODING: encoding: [0x80,0x38,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3880 msr AFGDTU4_EL12, x0 + +msr AFGDTP4_EL3, x0 +// CHECK-INST: msr AFGDTP4_EL3, x0 +// CHECK-ENCODING: encoding: [0x80,0x36,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3680 msr AFGDTP4_EL3, x0 + +msr AFGDTP5_EL1, x0 +// CHECK-INST: msr AFGDTP5_EL1, x0 +// CHECK-ENCODING: encoding: [0xa0,0x36,0x18,0xd5] +// CHECK-UNKNOWN: d51836a0 msr AFGDTP5_EL1, x0 + +msr AFGDTU5_EL1, x0 +// CHECK-INST: msr AFGDTU5_EL1, x0 +// CHECK-ENCODING: encoding: [0xa0,0x38,0x18,0xd5] +// CHECK-UNKNOWN: d51838a0 msr AFGDTU5_EL1, x0 + +msr AFGDTP5_EL2, x0 +// CHECK-INST: msr AFGDTP5_EL2, x0 +// CHECK-ENCODING: encoding: [0xa0,0x36,0x1c,0xd5] +// CHECK-UNKNOWN: d51c36a0 msr AFGDTP5_EL2, x0 + +msr AFGDTU5_EL2, x0 +// CHECK-INST: msr AFGDTU5_EL2, x0 +// CHECK-ENCODING: encoding: [0xa0,0x38,0x1c,0xd5] +// CHECK-UNKNOWN: d51c38a0 msr AFGDTU5_EL2, x0 + +msr AFGDTP5_EL12, x0 +// CHECK-INST: msr AFGDTP5_EL12, x0 +// CHECK-ENCODING: encoding: [0xa0,0x36,0x1d,0xd5] +// CHECK-UNKNOWN: d51d36a0 msr AFGDTP5_EL12, x0 + +msr AFGDTU5_EL12, x0 +// CHECK-INST: msr AFGDTU5_EL12, x0 +// CHECK-ENCODING: encoding: [0xa0,0x38,0x1d,0xd5] +// CHECK-UNKNOWN: d51d38a0 msr AFGDTU5_EL12, x0 + +msr AFGDTP5_EL3, x0 +// CHECK-INST: msr AFGDTP5_EL3, x0 +// CHECK-ENCODING: encoding: [0xa0,0x36,0x1e,0xd5] +// CHECK-UNKNOWN: d51e36a0 msr AFGDTP5_EL3, x0 + +msr AFGDTP6_EL1, x0 +// CHECK-INST: msr AFGDTP6_EL1, x0 +// CHECK-ENCODING: encoding: [0xc0,0x36,0x18,0xd5] +// CHECK-UNKNOWN: d51836c0 msr AFGDTP6_EL1, x0 + +msr AFGDTU6_EL1, x0 +// CHECK-INST: msr AFGDTU6_EL1, x0 +// CHECK-ENCODING: encoding: [0xc0,0x38,0x18,0xd5] +// CHECK-UNKNOWN: d51838c0 msr AFGDTU6_EL1, x0 + +msr AFGDTP6_EL2, x0 +// CHECK-INST: msr AFGDTP6_EL2, x0 +// CHECK-ENCODING: encoding: [0xc0,0x36,0x1c,0xd5] +// CHECK-UNKNOWN: d51c36c0 msr AFGDTP6_EL2, x0 + +msr AFGDTU6_EL2, x0 +// CHECK-INST: msr AFGDTU6_EL2, x0 +// CHECK-ENCODING: encoding: [0xc0,0x38,0x1c,0xd5] +// CHECK-UNKNOWN: d51c38c0 msr AFGDTU6_EL2, x0 + +msr AFGDTP6_EL12, x0 +// CHECK-INST: msr AFGDTP6_EL12, x0 +// CHECK-ENCODING: encoding: [0xc0,0x36,0x1d,0xd5] +// CHECK-UNKNOWN: d51d36c0 msr AFGDTP6_EL12, x0 + +msr AFGDTU6_EL12, x0 +// CHECK-INST: msr AFGDTU6_EL12, x0 +// CHECK-ENCODING: encoding: [0xc0,0x38,0x1d,0xd5] +// CHECK-UNKNOWN: d51d38c0 msr AFGDTU6_EL12, x0 + +msr AFGDTP6_EL3, x0 +// CHECK-INST: msr AFGDTP6_EL3, x0 +// CHECK-ENCODING: encoding: [0xc0,0x36,0x1e,0xd5] +// CHECK-UNKNOWN: d51e36c0 msr AFGDTP6_EL3, x0 + +msr AFGDTP7_EL1, x0 +// CHECK-INST: msr AFGDTP7_EL1, x0 +// CHECK-ENCODING: encoding: [0xe0,0x36,0x18,0xd5] +// CHECK-UNKNOWN: d51836e0 msr AFGDTP7_EL1, x0 + +msr AFGDTU7_EL1, x0 +// CHECK-INST: msr AFGDTU7_EL1, x0 +// CHECK-ENCODING: encoding: [0xe0,0x38,0x18,0xd5] +// CHECK-UNKNOWN: d51838e0 msr AFGDTU7_EL1, x0 + +msr AFGDTP7_EL2, x0 +// CHECK-INST: msr AFGDTP7_EL2, x0 +// CHECK-ENCODING: encoding: [0xe0,0x36,0x1c,0xd5] +// CHECK-UNKNOWN: d51c36e0 msr AFGDTP7_EL2, x0 + +msr AFGDTU7_EL2, x0 +// CHECK-INST: msr AFGDTU7_EL2, x0 +// CHECK-ENCODING: encoding: [0xe0,0x38,0x1c,0xd5] +// CHECK-UNKNOWN: d51c38e0 msr AFGDTU7_EL2, x0 + +msr AFGDTP7_EL12, x0 +// CHECK-INST: msr AFGDTP7_EL12, x0 +// CHECK-ENCODING: encoding: [0xe0,0x36,0x1d,0xd5] +// CHECK-UNKNOWN: d51d36e0 msr AFGDTP7_EL12, x0 + +msr AFGDTU7_EL12, x0 +// CHECK-INST: msr AFGDTU7_EL12, x0 +// CHECK-ENCODING: encoding: [0xe0,0x38,0x1d,0xd5] +// CHECK-UNKNOWN: d51d38e0 msr AFGDTU7_EL12, x0 + +msr AFGDTP7_EL3, x0 +// CHECK-INST: msr AFGDTP7_EL3, x0 +// CHECK-ENCODING: encoding: [0xe0,0x36,0x1e,0xd5] +// CHECK-UNKNOWN: d51e36e0 msr AFGDTP7_EL3, x0 + +msr AFGDTP8_EL1, x0 +// CHECK-INST: msr AFGDTP8_EL1, x0 +// CHECK-ENCODING: encoding: [0x00,0x37,0x18,0xd5] +// CHECK-UNKNOWN: d5183700 msr AFGDTP8_EL1, x0 + +msr AFGDTU8_EL1, x0 +// CHECK-INST: msr AFGDTU8_EL1, x0 +// CHECK-ENCODING: encoding: [0x00,0x39,0x18,0xd5] +// CHECK-UNKNOWN: d5183900 msr AFGDTU8_EL1, x0 + +msr AFGDTP8_EL2, x0 +// CHECK-INST: msr AFGDTP8_EL2, x0 +// CHECK-ENCODING: encoding: [0x00,0x37,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3700 msr AFGDTP8_EL2, x0 + +msr AFGDTU8_EL2, x0 +// CHECK-INST: msr AFGDTU8_EL2, x0 +// CHECK-ENCODING: encoding: [0x00,0x39,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3900 msr AFGDTU8_EL2, x0 + +msr AFGDTP8_EL12, x0 +// CHECK-INST: msr AFGDTP8_EL12, x0 +// CHECK-ENCODING: encoding: [0x00,0x37,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3700 msr AFGDTP8_EL12, x0 + +msr AFGDTU8_EL12, x0 +// CHECK-INST: msr AFGDTU8_EL12, x0 +// CHECK-ENCODING: encoding: [0x00,0x39,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3900 msr AFGDTU8_EL12, x0 + +msr AFGDTP8_EL3, x0 +// CHECK-INST: msr AFGDTP8_EL3, x0 +// CHECK-ENCODING: encoding: [0x00,0x37,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3700 msr AFGDTP8_EL3, x0 + +msr AFGDTP9_EL1, x0 +// CHECK-INST: msr AFGDTP9_EL1, x0 +// CHECK-ENCODING: encoding: [0x20,0x37,0x18,0xd5] +// CHECK-UNKNOWN: d5183720 msr AFGDTP9_EL1, x0 + +msr AFGDTU9_EL1, x0 +// CHECK-INST: msr AFGDTU9_EL1, x0 +// CHECK-ENCODING: encoding: [0x20,0x39,0x18,0xd5] +// CHECK-UNKNOWN: d5183920 msr AFGDTU9_EL1, x0 + +msr AFGDTP9_EL2, x0 +// CHECK-INST: msr AFGDTP9_EL2, x0 +// CHECK-ENCODING: encoding: [0x20,0x37,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3720 msr AFGDTP9_EL2, x0 + +msr AFGDTU9_EL2, x0 +// CHECK-INST: msr AFGDTU9_EL2, x0 +// CHECK-ENCODING: encoding: [0x20,0x39,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3920 msr AFGDTU9_EL2, x0 + +msr AFGDTP9_EL12, x0 +// CHECK-INST: msr AFGDTP9_EL12, x0 +// CHECK-ENCODING: encoding: [0x20,0x37,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3720 msr AFGDTP9_EL12, x0 + +msr AFGDTU9_EL12, x0 +// CHECK-INST: msr AFGDTU9_EL12, x0 +// CHECK-ENCODING: encoding: [0x20,0x39,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3920 msr AFGDTU9_EL12, x0 + +msr AFGDTP9_EL3, x0 +// CHECK-INST: msr AFGDTP9_EL3, x0 +// CHECK-ENCODING: encoding: [0x20,0x37,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3720 msr AFGDTP9_EL3, x0 + +msr AFGDTP10_EL1, x0 +// CHECK-INST: msr AFGDTP10_EL1, x0 +// CHECK-ENCODING: encoding: [0x40,0x37,0x18,0xd5] +// CHECK-UNKNOWN: d5183740 msr AFGDTP10_EL1, x0 + +msr AFGDTU10_EL1, x0 +// CHECK-INST: msr AFGDTU10_EL1, x0 +// CHECK-ENCODING: encoding: [0x40,0x39,0x18,0xd5] +// CHECK-UNKNOWN: d5183940 msr AFGDTU10_EL1, x0 + +msr AFGDTP10_EL2, x0 +// CHECK-INST: msr AFGDTP10_EL2, x0 +// CHECK-ENCODING: encoding: [0x40,0x37,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3740 msr AFGDTP10_EL2, x0 + +msr AFGDTU10_EL2, x0 +// CHECK-INST: msr AFGDTU10_EL2, x0 +// CHECK-ENCODING: encoding: [0x40,0x39,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3940 msr AFGDTU10_EL2, x0 + +msr AFGDTP10_EL12, x0 +// CHECK-INST: msr AFGDTP10_EL12, x0 +// CHECK-ENCODING: encoding: [0x40,0x37,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3740 msr AFGDTP10_EL12, x0 + +msr AFGDTU10_EL12, x0 +// CHECK-INST: msr AFGDTU10_EL12, x0 +// CHECK-ENCODING: encoding: [0x40,0x39,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3940 msr AFGDTU10_EL12, x0 + +msr AFGDTP10_EL3, x0 +// CHECK-INST: msr AFGDTP10_EL3, x0 +// CHECK-ENCODING: encoding: [0x40,0x37,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3740 msr AFGDTP10_EL3, x0 + +msr AFGDTP11_EL1, x0 +// CHECK-INST: msr AFGDTP11_EL1, x0 +// CHECK-ENCODING: encoding: [0x60,0x37,0x18,0xd5] +// CHECK-UNKNOWN: d5183760 msr AFGDTP11_EL1, x0 + +msr AFGDTU11_EL1, x0 +// CHECK-INST: msr AFGDTU11_EL1, x0 +// CHECK-ENCODING: encoding: [0x60,0x39,0x18,0xd5] +// CHECK-UNKNOWN: d5183960 msr AFGDTU11_EL1, x0 + +msr AFGDTP11_EL2, x0 +// CHECK-INST: msr AFGDTP11_EL2, x0 +// CHECK-ENCODING: encoding: [0x60,0x37,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3760 msr AFGDTP11_EL2, x0 + +msr AFGDTU11_EL2, x0 +// CHECK-INST: msr AFGDTU11_EL2, x0 +// CHECK-ENCODING: encoding: [0x60,0x39,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3960 msr AFGDTU11_EL2, x0 + +msr AFGDTP11_EL12, x0 +// CHECK-INST: msr AFGDTP11_EL12, x0 +// CHECK-ENCODING: encoding: [0x60,0x37,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3760 msr AFGDTP11_EL12, x0 + +msr AFGDTU11_EL12, x0 +// CHECK-INST: msr AFGDTU11_EL12, x0 +// CHECK-ENCODING: encoding: [0x60,0x39,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3960 msr AFGDTU11_EL12, x0 + +msr AFGDTP11_EL3, x0 +// CHECK-INST: msr AFGDTP11_EL3, x0 +// CHECK-ENCODING: encoding: [0x60,0x37,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3760 msr AFGDTP11_EL3, x0 + +msr AFGDTP12_EL1, x0 +// CHECK-INST: msr AFGDTP12_EL1, x0 +// CHECK-ENCODING: encoding: [0x80,0x37,0x18,0xd5] +// CHECK-UNKNOWN: d5183780 msr AFGDTP12_EL1, x0 + +msr AFGDTU12_EL1, x0 +// CHECK-INST: msr AFGDTU12_EL1, x0 +// CHECK-ENCODING: encoding: [0x80,0x39,0x18,0xd5] +// CHECK-UNKNOWN: d5183980 msr AFGDTU12_EL1, x0 + +msr AFGDTP12_EL2, x0 +// CHECK-INST: msr AFGDTP12_EL2, x0 +// CHECK-ENCODING: encoding: [0x80,0x37,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3780 msr AFGDTP12_EL2, x0 + +msr AFGDTU12_EL2, x0 +// CHECK-INST: msr AFGDTU12_EL2, x0 +// CHECK-ENCODING: encoding: [0x80,0x39,0x1c,0xd5] +// CHECK-UNKNOWN: d51c3980 msr AFGDTU12_EL2, x0 + +msr AFGDTP12_EL12, x0 +// CHECK-INST: msr AFGDTP12_EL12, x0 +// CHECK-ENCODING: encoding: [0x80,0x37,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3780 msr AFGDTP12_EL12, x0 + +msr AFGDTU12_EL12, x0 +// CHECK-INST: msr AFGDTU12_EL12, x0 +// CHECK-ENCODING: encoding: [0x80,0x39,0x1d,0xd5] +// CHECK-UNKNOWN: d51d3980 msr AFGDTU12_EL12, x0 + +msr AFGDTP12_EL3, x0 +// CHECK-INST: msr AFGDTP12_EL3, x0 +// CHECK-ENCODING: encoding: [0x80,0x37,0x1e,0xd5] +// CHECK-UNKNOWN: d51e3780 msr AFGDTP12_EL3, x0 + +msr AFGDTP13_EL1, x0 +// CHECK-INST: msr AFGDTP13_EL1, x0 +// CHECK-ENCODING: encoding: [0xa0,0x37,0x18,0xd5] +// CHECK-UNKNOWN: d51837a0 msr AFGDTP13_EL1, x0 + +msr AFGDTU13_EL1, x0 +// CHECK-INST: msr AFGDTU13_EL1, x0 +// CHECK-ENCODING: encoding: [0xa0,0x39,0x18,0xd5] +// CHECK-UNKNOWN: d51839a0 msr AFGDTU13_EL1, x0 + +msr AFGDTP13_EL2, x0 +// CHECK-INST: msr AFGDTP13_EL2, x0 +// CHECK-ENCODING: encoding: [0xa0,0x37,0x1c,0xd5] +// CHECK-UNKNOWN: d51c37a0 msr AFGDTP13_EL2, x0 + +msr AFGDTU13_EL2, x0 +// CHECK-INST: msr AFGDTU13_EL2, x0 +// CHECK-ENCODING: encoding: [0xa0,0x39,0x1c,0xd5] +// CHECK-UNKNOWN: d51c39a0 msr AFGDTU13_EL2, x0 + +msr AFGDTP13_EL12, x0 +// CHECK-INST: msr AFGDTP13_EL12, x0 +// CHECK-ENCODING: encoding: [0xa0,0x37,0x1d,0xd5] +// CHECK-UNKNOWN: d51d37a0 msr AFGDTP13_EL12, x0 + +msr AFGDTU13_EL12, x0 +// CHECK-INST: msr AFGDTU13_EL12, x0 +// CHECK-ENCODING: encoding: [0xa0,0x39,0x1d,0xd5] +// CHECK-UNKNOWN: d51d39a0 msr AFGDTU13_EL12, x0 + +msr AFGDTP13_EL3, x0 +// CHECK-INST: msr AFGDTP13_EL3, x0 +// CHECK-ENCODING: encoding: [0xa0,0x37,0x1e,0xd5] +// CHECK-UNKNOWN: d51e37a0 msr AFGDTP13_EL3, x0 + +msr AFGDTP14_EL1, x0 +// CHECK-INST: msr AFGDTP14_EL1, x0 +// CHECK-ENCODING: encoding: [0xc0,0x37,0x18,0xd5] +// CHECK-UNKNOWN: d51837c0 msr AFGDTP14_EL1, x0 + +msr AFGDTU14_EL1, x0 +// CHECK-INST: msr AFGDTU14_EL1, x0 +// CHECK-ENCODING: encoding: [0xc0,0x39,0x18,0xd5] +// CHECK-UNKNOWN: d51839c0 msr AFGDTU14_EL1, x0 + +msr AFGDTP14_EL2, x0 +// CHECK-INST: msr AFGDTP14_EL2, x0 +// CHECK-ENCODING: encoding: [0xc0,0x37,0x1c,0xd5] +// CHECK-UNKNOWN: d51c37c0 msr AFGDTP14_EL2, x0 + +msr AFGDTU14_EL2, x0 +// CHECK-INST: msr AFGDTU14_EL2, x0 +// CHECK-ENCODING: encoding: [0xc0,0x39,0x1c,0xd5] +// CHECK-UNKNOWN: d51c39c0 msr AFGDTU14_EL2, x0 + +msr AFGDTP14_EL12, x0 +// CHECK-INST: msr AFGDTP14_EL12, x0 +// CHECK-ENCODING: encoding: [0xc0,0x37,0x1d,0xd5] +// CHECK-UNKNOWN: d51d37c0 msr AFGDTP14_EL12, x0 + +msr AFGDTU14_EL12, x0 +// CHECK-INST: msr AFGDTU14_EL12, x0 +// CHECK-ENCODING: encoding: [0xc0,0x39,0x1d,0xd5] +// CHECK-UNKNOWN: d51d39c0 msr AFGDTU14_EL12, x0 + +msr AFGDTP14_EL3, x0 +// CHECK-INST: msr AFGDTP14_EL3, x0 +// CHECK-ENCODING: encoding: [0xc0,0x37,0x1e,0xd5] +// CHECK-UNKNOWN: d51e37c0 msr AFGDTP14_EL3, x0 + +msr AFGDTP15_EL1, x0 +// CHECK-INST: msr AFGDTP15_EL1, x0 +// CHECK-ENCODING: encoding: [0xe0,0x37,0x18,0xd5] +// CHECK-UNKNOWN: d51837e0 msr AFGDTP15_EL1, x0 + +msr AFGDTU15_EL1, x0 +// CHECK-INST: msr AFGDTU15_EL1, x0 +// CHECK-ENCODING: encoding: [0xe0,0x39,0x18,0xd5] +// CHECK-UNKNOWN: d51839e0 msr AFGDTU15_EL1, x0 + +msr AFGDTP15_EL2, x0 +// CHECK-INST: msr AFGDTP15_EL2, x0 +// CHECK-ENCODING: encoding: [0xe0,0x37,0x1c,0xd5] +// CHECK-UNKNOWN: d51c37e0 msr AFGDTP15_EL2, x0 + +msr AFGDTU15_EL2, x0 +// CHECK-INST: msr AFGDTU15_EL2, x0 +// CHECK-ENCODING: encoding: [0xe0,0x39,0x1c,0xd5] +// CHECK-UNKNOWN: d51c39e0 msr AFGDTU15_EL2, x0 + +msr AFGDTP15_EL12, x0 +// CHECK-INST: msr AFGDTP15_EL12, x0 +// CHECK-ENCODING: encoding: [0xe0,0x37,0x1d,0xd5] +// CHECK-UNKNOWN: d51d37e0 msr AFGDTP15_EL12, x0 + +msr AFGDTU15_EL12, x0 +// CHECK-INST: msr AFGDTU15_EL12, x0 +// CHECK-ENCODING: encoding: [0xe0,0x39,0x1d,0xd5] +// CHECK-UNKNOWN: d51d39e0 msr AFGDTU15_EL12, x0 + +msr AFGDTP15_EL3, x0 +// CHECK-INST: msr AFGDTP15_EL3, x0 +// CHECK-ENCODING: encoding: [0xe0,0x37,0x1e,0xd5] +// CHECK-UNKNOWN: d51e37e0 msr AFGDTP15_EL3, x0 + +msr TPMIN0_EL0, x2 +// CHECK-INST: msr TPMIN0_EL0, x2 +// CHECK-ENCODING: encoding: [0x82,0x22,0x1b,0xd5] +// CHECK-UNKNOWN: d51b2282 msr TPMIN0_EL0, x2 + +msr TPMAX0_EL0, x2 +// CHECK-INST: msr TPMAX0_EL0, x2 +// CHECK-ENCODING: encoding: [0xa2,0x22,0x1b,0xd5] +// CHECK-UNKNOWN: d51b22a2 msr TPMAX0_EL0, x2 + +msr TPMIN1_EL0, x2 +// CHECK-INST: msr TPMIN1_EL0, x2 +// CHECK-ENCODING: encoding: [0xc2,0x22,0x1b,0xd5] +// CHECK-UNKNOWN: d51b22c2 msr TPMIN1_EL0, x2 + +msr TPMAX1_EL0, x2 +// CHECK-INST: msr TPMAX1_EL0, x2 +// CHECK-ENCODING: encoding: [0xe2,0x22,0x1b,0xd5] +// CHECK-UNKNOWN: d51b22e2 msr TPMAX1_EL0, x2 + +msr TPMIN0_EL1, x2 +// CHECK-INST: msr TPMIN0_EL1, x2 +// CHECK-ENCODING: encoding: [0x82,0x22,0x18,0xd5] +// CHECK-UNKNOWN: d5182282 msr TPMIN0_EL1, x2 + +msr TPMAX0_EL1, x2 +// CHECK-INST: msr TPMAX0_EL1, x2 +// CHECK-ENCODING: encoding: [0xa2,0x22,0x18,0xd5] +// CHECK-UNKNOWN: d51822a2 msr TPMAX0_EL1, x2 + +msr TPMIN1_EL1, x2 +// CHECK-INST: msr TPMIN1_EL1, x2 +// CHECK-ENCODING: encoding: [0xc2,0x22,0x18,0xd5] +// CHECK-UNKNOWN: d51822c2 msr TPMIN1_EL1, x2 + +msr TPMAX1_EL1, x2 +// CHECK-INST: msr TPMAX1_EL1, x2 +// CHECK-ENCODING: encoding: [0xe2,0x22,0x18,0xd5] +// CHECK-UNKNOWN: d51822e2 msr TPMAX1_EL1, x2 + +msr TPMIN0_EL2, x2 +// CHECK-INST: msr TPMIN0_EL2, x2 +// CHECK-ENCODING: encoding: [0x82,0x22,0x1c,0xd5] +// CHECK-UNKNOWN: d51c2282 msr TPMIN0_EL2, x2 + +msr TPMAX0_EL2, x2 +// CHECK-INST: msr TPMAX0_EL2, x2 +// CHECK-ENCODING: encoding: [0xa2,0x22,0x1c,0xd5] +// CHECK-UNKNOWN: d51c22a2 msr TPMAX0_EL2, x2 + +msr TPMIN1_EL2, x2 +// CHECK-INST: msr TPMIN1_EL2, x2 +// CHECK-ENCODING: encoding: [0xc2,0x22,0x1c,0xd5] +// CHECK-UNKNOWN: d51c22c2 msr TPMIN1_EL2, x2 + +msr TPMAX1_EL2, x2 +// CHECK-INST: msr TPMAX1_EL2, x2 +// CHECK-ENCODING: encoding: [0xe2,0x22,0x1c,0xd5] +// CHECK-UNKNOWN: d51c22e2 msr TPMAX1_EL2, x2 + +msr TPMIN0_EL12, x2 +// CHECK-INST: msr TPMIN0_EL12, x2 +// CHECK-ENCODING: encoding: [0x82,0x22,0x1d,0xd5] +// CHECK-UNKNOWN: d51d2282 msr TPMIN0_EL12, x2 + +msr TPMAX0_EL12, x2 +// CHECK-INST: msr TPMAX0_EL12, x2 +// CHECK-ENCODING: encoding: [0xa2,0x22,0x1d,0xd5] +// CHECK-UNKNOWN: d51d22a2 msr TPMAX0_EL12, x2 + +msr TPMIN1_EL12, x2 +// CHECK-INST: msr TPMIN1_EL12, x2 +// CHECK-ENCODING: encoding: [0xc2,0x22,0x1d,0xd5] +// CHECK-UNKNOWN: d51d22c2 msr TPMIN1_EL12, x2 + +msr TPMAX1_EL12, x2 +// CHECK-INST: msr TPMAX1_EL12, x2 +// CHECK-ENCODING: encoding: [0xe2,0x22,0x1d,0xd5] +// CHECK-UNKNOWN: d51d22e2 msr TPMAX1_EL12, x2 + + +tchangef x0, x1 +// CHECK-INST: tchangef x0, x1 +// CHECK-ENCODING: [0x20,0x00,0x80,0xd5] +// CHECK-UNKNOWN: d5800020 +// CHECK-ERROR: error: instruction requires: poe2 + +tchangef x0, x1, nB +// CHECK-INST: tchangef x0, x1, nb +// CHECK-ENCODING: [0x20,0x00,0x82,0xd5] +// CHECK-UNKNOWN: d5820020 +// CHECK-ERROR: error: instruction requires: poe2 + +tchangef x1, #3 +// CHECK-INST: tchangef x1, #3 +// CHECK-ENCODING: [0x61,0x00,0x90,0xd5] +// CHECK-UNKNOWN: d5900061 +// CHECK-ERROR: error: instruction requires: poe2 + +tchangef x1, #3, nB +// CHECK-INST: tchangef x1, #3, nb +// CHECK-ENCODING: [0x61,0x00,0x92,0xd5] +// CHECK-UNKNOWN: d5920061 +// CHECK-ERROR: error: instruction requires: poe2 + +tchangeb x0, x1 +// CHECK-INST: tchangeb x0, x1 +// CHECK-ENCODING: [0x20,0x00,0x84,0xd5] +// CHECK-UNKNOWN: d5840020 +// CHECK-ERROR: error: instruction requires: poe2 + +tchangeb x0, x1, nB +// CHECK-INST: tchangeb x0, x1, nb +// CHECK-ENCODING: [0x20,0x00,0x86,0xd5] +// CHECK-UNKNOWN: d5860020 +// CHECK-ERROR: error: instruction requires: poe2 + +tchangeb x1, #9 +// CHECK-INST: tchangeb x1, #9 +// CHECK-ENCODING: [0x21,0x01,0x94,0xd5] +// CHECK-UNKNOWN: d5940121 +// CHECK-ERROR: error: instruction requires: poe2 + +tchangeb x1, #9, nB +// CHECK-INST: tchangeb x1, #9, nb +// CHECK-ENCODING: [0x21,0x01,0x96,0xd5] +// CHECK-UNKNOWN: d5960121 +// CHECK-ERROR: error: instruction requires: poe2 + + +PLBI ALLE3 +// CHECK-INST: plbi alle3 +// CHECK-ENCODING: [0x1f,0xa7,0x0e,0xd5] +// CHECK-UNKNOWN: d50ea71f sys #6, c10, c7, #0 +// CHECK-ERROR: error: PLBI ALLE3 requires: poe2 + +PLBI ALLE3IS +// CHECK-INST: plbi alle3is +// CHECK-ENCODING: [0x1f,0xa3,0x0e,0xd5] +// CHECK-UNKNOWN: d50ea31f sys #6, c10, c3, #0 +// CHECK-ERROR: error: PLBI ALLE3IS requires: poe2 + +PLBI ALLE3OS +// CHECK-INST: plbi alle3os +// CHECK-ENCODING: [0x1f,0xa1,0x0e,0xd5] +// CHECK-UNKNOWN: d50ea11f sys #6, c10, c1, #0 +// CHECK-ERROR: error: PLBI ALLE3OS requires: poe2 + +PLBI ALLE2 +// CHECK-INST: plbi alle2 +// CHECK-ENCODING: [0x1f,0xa7,0x0c,0xd5] +// CHECK-UNKNOWN: d50ca71f sys #4, c10, c7, #0 +// CHECK-ERROR: error: PLBI ALLE2 requires: poe2 + +PLBI ALLE2IS +// CHECK-INST: plbi alle2is +// CHECK-ENCODING: [0x1f,0xa3,0x0c,0xd5] +// CHECK-UNKNOWN: d50ca31f sys #4, c10, c3, #0 +// CHECK-ERROR: error: PLBI ALLE2IS requires: poe2 + +PLBI ALLE2OS +// CHECK-INST: plbi alle2os +// CHECK-ENCODING: [0x1f,0xa1,0x0c,0xd5] +// CHECK-UNKNOWN: d50ca11f sys #4, c10, c1, #0 +// CHECK-ERROR: error: PLBI ALLE2OS requires: poe2 + +PLBI ALLE1 +// CHECK-INST: plbi alle1 +// CHECK-ENCODING: [0x9f,0xa7,0x0c,0xd5] +// CHECK-UNKNOWN: d50ca79f sys #4, c10, c7, #4 +// CHECK-ERROR: error: PLBI ALLE1 requires: poe2 + +PLBI ALLE1IS +// CHECK-INST: plbi alle1is +// CHECK-ENCODING: [0x9f,0xa3,0x0c,0xd5] +// CHECK-UNKNOWN: d50ca39f sys #4, c10, c3, #4 +// CHECK-ERROR: error: PLBI ALLE1IS requires: poe2 + +PLBI ALLE1OS +// CHECK-INST: plbi alle1os +// CHECK-ENCODING: [0x9f,0xa1,0x0c,0xd5] +// CHECK-UNKNOWN: d50ca19f sys #4, c10, c1, #4 +// CHECK-ERROR: error: PLBI ALLE1OS requires: poe2 + +PLBI VMALLE1 +// CHECK-INST: plbi vmalle1 +// CHECK-ENCODING: [0x1f,0xa7,0x08,0xd5] +// CHECK-UNKNOWN: d508a71f sys #0, c10, c7, #0 +// CHECK-ERROR: error: PLBI VMALLE1 requires: poe2 + +PLBI VMALLE1IS +// CHECK-INST: plbi vmalle1is +// CHECK-ENCODING: [0x1f,0xa3,0x08,0xd5] +// CHECK-UNKNOWN: d508a31f sys #0, c10, c3, #0 +// CHECK-ERROR: error: PLBI VMALLE1IS requires: poe2 + +PLBI VMALLE1OS +// CHECK-INST: plbi vmalle1os +// CHECK-ENCODING: [0x1f,0xa1,0x08,0xd5] +// CHECK-UNKNOWN: d508a11f sys #0, c10, c1, #0 +// CHECK-ERROR: error: PLBI VMALLE1OS requires: poe2 + +PLBI ASIDE1, X3 +// CHECK-INST: plbi aside1, x3 +// CHECK-ENCODING: [0x43,0xa7,0x08,0xd5] +// CHECK-UNKNOWN: d508a743 sys #0, c10, c7, #2, x3 +// CHECK-ERROR: error: PLBI ASIDE1 requires: poe2 + +PLBI ASIDE1IS, X3 +// CHECK-INST: plbi aside1is, x3 +// CHECK-ENCODING: [0x43,0xa3,0x08,0xd5] +// CHECK-UNKNOWN: d508a343 sys #0, c10, c3, #2, x3 +// CHECK-ERROR: error: PLBI ASIDE1IS requires: poe2 + +PLBI ASIDE1OS, X3 +// CHECK-INST: plbi aside1os, x3 +// CHECK-ENCODING: [0x43,0xa1,0x08,0xd5] +// CHECK-UNKNOWN: d508a143 sys #0, c10, c1, #2, x3 +// CHECK-ERROR: error: PLBI ASIDE1OS requires: poe2 + +PLBI PERME3, X3 +// CHECK-INST: plbi perme3, x3 +// CHECK-ENCODING: [0x23,0xa7,0x0e,0xd5] +// CHECK-UNKNOWN: d50ea723 sys #6, c10, c7, #1, x3 +// CHECK-ERROR: error: PLBI PERME3 requires: poe2 + +PLBI PERME3IS, X3 +// CHECK-INST: plbi perme3is, x3 +// CHECK-ENCODING: [0x23,0xa3,0x0e,0xd5] +// CHECK-UNKNOWN: d50ea323 sys #6, c10, c3, #1, x3 +// CHECK-ERROR: error: PLBI PERME3IS requires: poe2 + +PLBI PERME3OS, X3 +// CHECK-INST: plbi perme3os, x3 +// CHECK-ENCODING: [0x23,0xa1,0x0e,0xd5] +// CHECK-UNKNOWN: d50ea123 sys #6, c10, c1, #1, x3 +// CHECK-ERROR: error: PLBI PERME3OS requires: poe2 + +PLBI PERME2, X3 +// CHECK-INST: plbi perme2, x3 +// CHECK-ENCODING: [0x23,0xa7,0x0c,0xd5] +// CHECK-UNKNOWN: d50ca723 sys #4, c10, c7, #1, x3 +// CHECK-ERROR: error: PLBI PERME2 requires: poe2 + +PLBI PERME2IS, X3 +// CHECK-INST: plbi perme2is, x3 +// CHECK-ENCODING: [0x23,0xa3,0x0c,0xd5] +// CHECK-UNKNOWN: d50ca323 sys #4, c10, c3, #1, x3 +// CHECK-ERROR: error: PLBI PERME2IS requires: poe2 + +PLBI PERME2OS, X3 +// CHECK-INST: plbi perme2os, x3 +// CHECK-ENCODING: [0x23,0xa1,0x0c,0xd5] +// CHECK-UNKNOWN: d50ca123 sys #4, c10, c1, #1, x3 +// CHECK-ERROR: error: PLBI PERME2OS requires: poe2 + +PLBI PERME1, X3 +// CHECK-INST: plbi perme1, x3 +// CHECK-ENCODING: [0x23,0xa7,0x08,0xd5] +// CHECK-UNKNOWN: d508a723 sys #0, c10, c7, #1, x3 +// CHECK-ERROR: error: PLBI PERME1 requires: poe2 + +PLBI PERME1IS, X3 +// CHECK-INST: plbi perme1is, x3 +// CHECK-ENCODING: [0x23,0xa3,0x08,0xd5] +// CHECK-UNKNOWN: d508a323 sys #0, c10, c3, #1, x3 +// CHECK-ERROR: error: PLBI PERME1IS requires: poe2 + +PLBI PERME1OS, X3 +// CHECK-INST: plbi perme1os, x3 +// CHECK-ENCODING: [0x23,0xa1,0x08,0xd5] +// CHECK-UNKNOWN: d508a123 sys #0, c10, c1, #1, x3 +// CHECK-ERROR: error: PLBI PERME1OS requires: poe2 + +PLBI ALLE3NXS +// CHECK-INST: plbi alle3nxs +// CHECK-ENCODING: [0x1f,0xaf,0x0e,0xd5] +// CHECK-UNKNOWN: d50eaf1f sys #6, c10, c15, #0 +// CHECK-ERROR: error: PLBI ALLE3NXS requires: poe2 + +PLBI ALLE3ISNXS +// CHECK-INST: plbi alle3isnxs +// CHECK-ENCODING: [0x1f,0xab,0x0e,0xd5] +// CHECK-UNKNOWN: d50eab1f sys #6, c10, c11, #0 +// CHECK-ERROR: error: PLBI ALLE3ISNXS requires: poe2 + +PLBI ALLE3OSNXS +// CHECK-INST: plbi alle3osnxs +// CHECK-ENCODING: [0x1f,0xa9,0x0e,0xd5] +// CHECK-UNKNOWN: d50ea91f sys #6, c10, c9, #0 +// CHECK-ERROR: error: PLBI ALLE3OSNXS requires: poe2 + +PLBI ALLE2NXS +// CHECK-INST: plbi alle2nxs +// CHECK-ENCODING: [0x1f,0xaf,0x0c,0xd5] +// CHECK-UNKNOWN: d50caf1f sys #4, c10, c15, #0 +// CHECK-ERROR: error: PLBI ALLE2NXS requires: poe2 + +PLBI ALLE2ISNXS +// CHECK-INST: plbi alle2isnxs +// CHECK-ENCODING: [0x1f,0xab,0x0c,0xd5] +// CHECK-UNKNOWN: d50cab1f sys #4, c10, c11, #0 +// CHECK-ERROR: error: PLBI ALLE2ISNXS requires: poe2 + +PLBI ALLE2OSNXS +// CHECK-INST: plbi alle2osnxs +// CHECK-ENCODING: [0x1f,0xa9,0x0c,0xd5] +// CHECK-UNKNOWN: d50ca91f sys #4, c10, c9, #0 +// CHECK-ERROR: error: PLBI ALLE2OSNXS requires: poe2 + +PLBI ALLE1NXS +// CHECK-INST: plbi alle1nxs +// CHECK-ENCODING: [0x9f,0xaf,0x0c,0xd5] +// CHECK-UNKNOWN: d50caf9f sys #4, c10, c15, #4 +// CHECK-ERROR: error: PLBI ALLE1NXS requires: poe2 + +PLBI ALLE1ISNXS +// CHECK-INST: plbi alle1isnxs +// CHECK-ENCODING: [0x9f,0xab,0x0c,0xd5] +// CHECK-UNKNOWN: d50cab9f sys #4, c10, c11, #4 +// CHECK-ERROR: error: PLBI ALLE1ISNXS requires: poe2 + +PLBI ALLE1OSNXS +// CHECK-INST: plbi alle1osnxs +// CHECK-ENCODING: [0x9f,0xa9,0x0c,0xd5] +// CHECK-UNKNOWN: d50ca99f sys #4, c10, c9, #4 +// CHECK-ERROR: error: PLBI ALLE1OSNXS requires: poe2 + +PLBI VMALLE1NXS +// CHECK-INST: plbi vmalle1nxs +// CHECK-ENCODING: [0x1f,0xaf,0x08,0xd5] +// CHECK-UNKNOWN: d508af1f sys #0, c10, c15, #0 +// CHECK-ERROR: error: PLBI VMALLE1NXS requires: poe2 + +PLBI VMALLE1ISNXS +// CHECK-INST: plbi vmalle1isnxs +// CHECK-ENCODING: [0x1f,0xab,0x08,0xd5] +// CHECK-UNKNOWN: d508ab1f sys #0, c10, c11, #0 +// CHECK-ERROR: error: PLBI VMALLE1ISNXS requires: poe2 + +PLBI VMALLE1OSNXS +// CHECK-INST: plbi vmalle1osnxs +// CHECK-ENCODING: [0x1f,0xa9,0x08,0xd5] +// CHECK-UNKNOWN: d508a91f sys #0, c10, c9, #0 +// CHECK-ERROR: error: PLBI VMALLE1OSNXS requires: poe2 + +PLBI ASIDE1NXS, X3 +// CHECK-INST: plbi aside1nxs, x3 +// CHECK-ENCODING: [0x43,0xaf,0x08,0xd5] +// CHECK-UNKNOWN: d508af43 sys #0, c10, c15, #2, x3 +// CHECK-ERROR: error: PLBI ASIDE1NXS requires: poe2 + +PLBI ASIDE1ISNXS, X3 +// CHECK-INST: plbi aside1isnxs, x3 +// CHECK-ENCODING: [0x43,0xab,0x08,0xd5] +// CHECK-UNKNOWN: d508ab43 sys #0, c10, c11, #2, x3 +// CHECK-ERROR: error: PLBI ASIDE1ISNXS requires: poe2 + +PLBI ASIDE1OSNXS, X3 +// CHECK-INST: plbi aside1osnxs, x3 +// CHECK-ENCODING: [0x43,0xa9,0x08,0xd5] +// CHECK-UNKNOWN: d508a943 sys #0, c10, c9, #2, x3 +// CHECK-ERROR: error: PLBI ASIDE1OSNXS requires: poe2 + +PLBI PERME3NXS, X3 +// CHECK-INST: plbi perme3nxs, x3 +// CHECK-ENCODING: [0x23,0xaf,0x0e,0xd5] +// CHECK-UNKNOWN: d50eaf23 sys #6, c10, c15, #1, x3 +// CHECK-ERROR: error: PLBI PERME3NXS requires: poe2 + +PLBI PERME3ISNXS, X3 +// CHECK-INST: plbi perme3isnxs, x3 +// CHECK-ENCODING: [0x23,0xab,0x0e,0xd5] +// CHECK-UNKNOWN: d50eab23 sys #6, c10, c11, #1, x3 +// CHECK-ERROR: error: PLBI PERME3ISNXS requires: poe2 + +PLBI PERME3OSNXS, X3 +// CHECK-INST: plbi perme3osnxs, x3 +// CHECK-ENCODING: [0x23,0xa9,0x0e,0xd5] +// CHECK-UNKNOWN: d50ea923 sys #6, c10, c9, #1, x3 +// CHECK-ERROR: error: PLBI PERME3OSNXS requires: poe2 + +PLBI PERME2NXS, X3 +// CHECK-INST: plbi perme2nxs, x3 +// CHECK-ENCODING: [0x23,0xaf,0x0c,0xd5] +// CHECK-UNKNOWN: d50caf23 sys #4, c10, c15, #1, x3 +// CHECK-ERROR: error: PLBI PERME2NXS requires: poe2 + +PLBI PERME2ISNXS, X3 +// CHECK-INST: plbi perme2isnxs, x3 +// CHECK-ENCODING: [0x23,0xab,0x0c,0xd5] +// CHECK-UNKNOWN: d50cab23 sys #4, c10, c11, #1, x3 +// CHECK-ERROR: error: PLBI PERME2ISNXS requires: poe2 + +PLBI PERME2OSNXS, X3 +// CHECK-INST: plbi perme2osnxs, x3 +// CHECK-ENCODING: [0x23,0xa9,0x0c,0xd5] +// CHECK-UNKNOWN: d50ca923 sys #4, c10, c9, #1, x3 +// CHECK-ERROR: error: PLBI PERME2OSNXS requires: poe2 + +PLBI PERME1NXS, X3 +// CHECK-INST: plbi perme1nxs, x3 +// CHECK-ENCODING: [0x23,0xaf,0x08,0xd5] +// CHECK-UNKNOWN: d508af23 sys #0, c10, c15, #1, x3 +// CHECK-ERROR: error: PLBI PERME1NXS requires: poe2 + +PLBI PERME1ISNXS, X3 +// CHECK-INST: plbi perme1isnxs, x3 +// CHECK-ENCODING: [0x23,0xab,0x08,0xd5] +// CHECK-UNKNOWN: d508ab23 sys #0, c10, c11, #1, x3 +// CHECK-ERROR: error: PLBI PERME1ISNXS requires: poe2 + +PLBI PERME1OSNXS, X3 +// CHECK-INST: plbi perme1osnxs, x3 +// CHECK-ENCODING: [0x23,0xa9,0x08,0xd5] +// CHECK-UNKNOWN: d508a923 sys #0, c10, c9, #1, x3 +// CHECK-ERROR: error: PLBI PERME1OSNXS requires: poe2 + +PLBI PERMAE1, X3 +// CHECK-INST: plbi permae1, x3 +// CHECK-ENCODING: [0x63,0xa7,0x08,0xd5] +// CHECK-UNKNOWN: d508a763 sys #0, c10, c7, #3, x3 +// CHECK-ERROR: error: PLBI PERMAE1 requires: poe2 + +PLBI PERMAE1IS, X3 +// CHECK-INST: plbi permae1is, x3 +// CHECK-ENCODING: [0x63,0xa3,0x08,0xd5] +// CHECK-UNKNOWN: d508a363 sys #0, c10, c3, #3, x3 +// CHECK-ERROR: error: PLBI PERMAE1IS requires: poe2 + +PLBI PERMAE1OS, X3 +// CHECK-INST: plbi permae1os, x3 +// CHECK-ENCODING: [0x63,0xa1,0x08,0xd5] +// CHECK-UNKNOWN: d508a163 sys #0, c10, c1, #3, x3 +// CHECK-ERROR: error: PLBI PERMAE1OS requires: poe2 + +PLBI PERMAE1NXS, X3 +// CHECK-INST: plbi permae1nxs, x3 +// CHECK-ENCODING: [0x63,0xaf,0x08,0xd5] +// CHECK-UNKNOWN: d508af63 sys #0, c10, c15, #3, x3 +// CHECK-ERROR: error: PLBI PERMAE1NXS requires: poe2 + +PLBI PERMAE1ISNXS, X3 +// CHECK-INST: plbi permae1isnxs, x3 +// CHECK-ENCODING: [0x63,0xab,0x08,0xd5] +// CHECK-UNKNOWN: d508ab63 sys #0, c10, c11, #3, x3 +// CHECK-ERROR: error: PLBI PERMAE1ISNXS requires: poe2 + +PLBI PERMAE1OSNXS, X3 +// CHECK-INST: plbi permae1osnxs, x3 +// CHECK-ENCODING: [0x63,0xa9,0x08,0xd5] +// CHECK-UNKNOWN: d508a963 sys #0, c10, c9, #3, x3 +// CHECK-ERROR: error: PLBI PERMAE1OSNXS requires: poe2 diff --git a/llvm/test/MC/AArch64/arm-tev.s b/llvm/test/MC/AArch64/arm-tev.s new file mode 100644 index 0000000000000..e80210f060088 --- /dev/null +++ b/llvm/test/MC/AArch64/arm-tev.s @@ -0,0 +1,41 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+tev < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+tev < %s \ +// RUN: | llvm-objdump -d --mattr=+tev --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+tev < %s \ +// RUN: | llvm-objdump -d --mattr=-tev --no-print-imm-hex - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+tev < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+tev -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +//------------------------------------------------------------------------------ +// TIndex Exception-like Vector (FEAT_TEV). +//------------------------------------------------------------------------------ + +TENTER #32 +// CHECK-INST: tenter #32 +// CHECK-ENCODING: [0x00,0x04,0xe0,0xd4] +// CHECK-UNKNOWN: d4e00400 +// CHECK-ERROR: error: instruction requires: tev + +TENTER #32, NB +// CHECK-INST: tenter #32, nb +// CHECK-ENCODING: [0x00,0x04,0xe2,0xd4] +// CHECK-UNKNOWN: d4e20400 +// CHECK-ERROR: error: instruction requires: tev + +TEXIT +// CHECK-INST: texit +// CHECK-ENCODING: [0xe0,0x03,0xff,0xd6] +// CHECK-UNKNOWN: d6ff03e0 +// CHECK-ERROR: error: instruction requires: tev + +TEXIT NB +// CHECK-INST: texit nb +// CHECK-ENCODING: [0xe0,0x07,0xff,0xd6] +// CHECK-UNKNOWN: d6ff07e0 +// CHECK-ERROR: error: instruction requires: tev diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp index 328013e961411..17d84bbe36c6f 100644 --- a/llvm/unittests/TargetParser/TargetParserTest.cpp +++ b/llvm/unittests/TargetParser/TargetParserTest.cpp @@ -1453,6 +1453,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { AArch64::AEK_SVE2P3, AArch64::AEK_SVE_B16MM, AArch64::AEK_F16MM, AArch64::AEK_F16F32DOT, AArch64::AEK_F16F32MM, AArch64::AEK_MOPS_GO, + AArch64::AEK_POE2, AArch64::AEK_TEV, + AArch64::AEK_BTIE, }; std::vector Features; @@ -1577,6 +1579,9 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) { EXPECT_TRUE(llvm::is_contained(Features, "+f16f32dot")); EXPECT_TRUE(llvm::is_contained(Features, "+f16f32mm")); EXPECT_TRUE(llvm::is_contained(Features, "+mops-go")); + EXPECT_TRUE(llvm::is_contained(Features, "+poe2")); + EXPECT_TRUE(llvm::is_contained(Features, "+tev")); + EXPECT_TRUE(llvm::is_contained(Features, "+btie")); // Assuming we listed every extension above, this should produce the same // result. @@ -1756,6 +1761,9 @@ TEST(TargetParserTest, AArch64ArchExtFeature) { {"f16f32dot", "nof16f32dot", "+f16f32dot", "-f16f32dot"}, {"f16f32mm", "nof16f32mm", "+f16f32mm", "-f16f32mm"}, {"mops-go", "nomops-go", "+mops-go", "-mops-go"}, + {"poe2", "nopoe2", "+poe2", "-poe2"}, + {"tev", "notev", "+tev", "-tev"}, + {"btie", "nobtie", "+btie", "-btie"}, }; for (unsigned i = 0; i < std::size(ArchExt); i++) { From c021e168b39725f445cb7f2704e08543139fdc90 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 14 Nov 2025 10:50:26 +0000 Subject: [PATCH 19/22] [AArch64][SME] Handle SME state around TLS-descriptor calls (#155608) This patch ensures we switch out of streaming mode before TLS-descriptor calls. ZA state will also be preserved when using the new SME ABI lowering (`-aarch64-new-sme-abi`). Fixes #152165 --- .../Target/AArch64/AArch64ISelLowering.cpp | 34 +++- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 4 +- llvm/lib/Target/AArch64/MachineSMEABIPass.cpp | 14 +- llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll | 159 ++++++++++++++++++ 5 files changed, 205 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 8aea0d23ffc0a..f5f732da99349 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9602,8 +9602,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // using a chain can result in incorrect scheduling. The markers refer to // the position just before the CALLSEQ_START (though occur after as // CALLSEQ_START lacks in-glue). - Chain = DAG.getNode(*ZAMarkerNode, DL, DAG.getVTList(MVT::Other), - {Chain, Chain.getValue(1)}); + Chain = + DAG.getNode(*ZAMarkerNode, DL, DAG.getVTList(MVT::Other, MVT::Glue), + {Chain, Chain.getValue(1)}); } } @@ -10608,16 +10609,41 @@ SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); + auto &MF = DAG.getMachineFunction(); + auto *FuncInfo = MF.getInfo(); + SDValue Glue; SDValue Chain = DAG.getEntryNode(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SMECallAttrs TLSCallAttrs(FuncInfo->getSMEFnAttrs(), {}, SMEAttrs::Normal); + bool RequiresSMChange = TLSCallAttrs.requiresSMChange(); + + auto ChainAndGlue = [](SDValue Chain) -> std::pair { + return {Chain, Chain.getValue(1)}; + }; + + if (RequiresSMChange) + std::tie(Chain, Glue) = + ChainAndGlue(changeStreamingMode(DAG, DL, /*Enable=*/false, Chain, Glue, + getSMToggleCondition(TLSCallAttrs))); + unsigned Opcode = DAG.getMachineFunction().getInfo()->hasELFSignedGOT() ? AArch64ISD::TLSDESC_AUTH_CALLSEQ : AArch64ISD::TLSDESC_CALLSEQ; - Chain = DAG.getNode(Opcode, DL, NodeTys, {Chain, SymAddr}); - SDValue Glue = Chain.getValue(1); + SDValue Ops[] = {Chain, SymAddr, Glue}; + std::tie(Chain, Glue) = ChainAndGlue(DAG.getNode( + Opcode, DL, NodeTys, Glue ? ArrayRef(Ops) : ArrayRef(Ops).drop_back())); + + if (TLSCallAttrs.requiresLazySave()) + std::tie(Chain, Glue) = ChainAndGlue(DAG.getNode( + AArch64ISD::REQUIRES_ZA_SAVE, DL, NodeTys, {Chain, Chain.getValue(1)})); + + if (RequiresSMChange) + std::tie(Chain, Glue) = + ChainAndGlue(changeStreamingMode(DAG, DL, /*Enable=*/true, Chain, Glue, + getSMToggleCondition(TLSCallAttrs))); return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 6366624d4499b..011911a4419ad 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1042,11 +1042,11 @@ def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>; // offset of a variable into X0, using the TLSDesc model. def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ", SDT_AArch64TLSDescCallSeq, - [SDNPOutGlue, SDNPHasChain, SDNPVariadic]>; + [SDNPOutGlue, SDNPOptInGlue, SDNPHasChain, SDNPVariadic]>; def AArch64tlsdesc_auth_callseq : SDNode<"AArch64ISD::TLSDESC_AUTH_CALLSEQ", SDT_AArch64TLSDescCallSeq, - [SDNPOutGlue, SDNPHasChain, SDNPVariadic]>; + [SDNPOutGlue, SDNPOptInGlue, SDNPHasChain, SDNPVariadic]>; def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge", SDT_AArch64WrapperLarge>; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 752b185832c30..5bb70ee11b06d 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -113,12 +113,12 @@ def CommitZASavePseudo def AArch64_inout_za_use : SDNode<"AArch64ISD::INOUT_ZA_USE", SDTypeProfile<0, 0,[]>, - [SDNPHasChain, SDNPInGlue]>; + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; def : Pat<(AArch64_inout_za_use), (InOutZAUsePseudo)>; def AArch64_requires_za_save : SDNode<"AArch64ISD::REQUIRES_ZA_SAVE", SDTypeProfile<0, 0,[]>, - [SDNPHasChain, SDNPInGlue]>; + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; def : Pat<(AArch64_requires_za_save), (RequiresZASavePseudo)>; def AArch64_sme_state_alloc diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index 7cb500394cec2..24d30c731b945 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -381,6 +381,17 @@ static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) { LiveUnits.addReg(AArch64::W0_HI); } +[[maybe_unused]] bool isCallStartOpcode(unsigned Opc) { + switch (Opc) { + case AArch64::TLSDESC_CALLSEQ: + case AArch64::TLSDESC_AUTH_CALLSEQ: + case AArch64::ADJCALLSTACKDOWN: + return true; + default: + return false; + } +} + FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() || SMEFnAttrs.hasZAState()) && @@ -424,8 +435,7 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { // Note: We treat Agnostic ZA as inout_za with an alternate save/restore. auto [NeededState, InsertPt] = getZAStateBeforeInst( *TRI, MI, /*ZAOffAtReturn=*/SMEFnAttrs.hasPrivateZAInterface()); - assert((InsertPt == MBBI || - InsertPt->getOpcode() == AArch64::ADJCALLSTACKDOWN) && + assert((InsertPt == MBBI || isCallStartOpcode(InsertPt->getOpcode())) && "Unexpected state change insertion point!"); // TODO: Do something to avoid state changes where NZCV is live. if (MBBI == FirstTerminatorInsertPt) diff --git a/llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll b/llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll new file mode 100644 index 0000000000000..f72ccadea5dba --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -aarch64-new-sme-abi -relocation-model=pic < %s | FileCheck %s + +@x = external thread_local local_unnamed_addr global i32, align 4 + +define i32 @load_tls_streaming_compat() nounwind "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: load_tls_streaming_compat: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: mrs x8, SVCR +; CHECK-NEXT: tbz w8, #0, .LBB0_2 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: smstop sm +; CHECK-NEXT: .LBB0_2: // %entry +; CHECK-NEXT: adrp x0, :tlsdesc:x +; CHECK-NEXT: ldr x1, [x0, :tlsdesc_lo12:x] +; CHECK-NEXT: add x0, x0, :tlsdesc_lo12:x +; CHECK-NEXT: .tlsdesccall x +; CHECK-NEXT: blr x1 +; CHECK-NEXT: tbz w8, #0, .LBB0_4 +; CHECK-NEXT: // %bb.3: // %entry +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB0_4: // %entry +; CHECK-NEXT: mrs x8, TPIDR_EL0 +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr w0, [x8, x0] +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) + %1 = load i32, ptr %0, align 4 + ret i32 %1 +} + +define i32 @load_tls_streaming() nounwind "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: load_tls_streaming: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: adrp x0, :tlsdesc:x +; CHECK-NEXT: ldr x1, [x0, :tlsdesc_lo12:x] +; CHECK-NEXT: add x0, x0, :tlsdesc_lo12:x +; CHECK-NEXT: .tlsdesccall x +; CHECK-NEXT: blr x1 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: mrs x8, TPIDR_EL0 +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr w0, [x8, x0] +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) + %1 = load i32, ptr %0, align 4 + ret i32 %1 +} + +define i32 @load_tls_shared_za() nounwind "aarch64_inout_za" { +; CHECK-LABEL: load_tls_shared_za: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: msub x9, x8, x8, x9 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: sub x10, x29, #16 +; CHECK-NEXT: stp x9, x8, [x29, #-16] +; CHECK-NEXT: msr TPIDR2_EL0, x10 +; CHECK-NEXT: adrp x0, :tlsdesc:x +; CHECK-NEXT: ldr x1, [x0, :tlsdesc_lo12:x] +; CHECK-NEXT: add x0, x0, :tlsdesc_lo12:x +; CHECK-NEXT: .tlsdesccall x +; CHECK-NEXT: blr x1 +; CHECK-NEXT: mrs x8, TPIDR_EL0 +; CHECK-NEXT: ldr w0, [x8, x0] +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x9, TPIDR2_EL0 +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: cbnz x9, .LBB2_2 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB2_2: // %entry +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) + %1 = load i32, ptr %0, align 4 + ret i32 %1 +} + +define i32 @load_tls_streaming_shared_za() nounwind "aarch64_inout_za" "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: load_tls_streaming_shared_za: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #64 +; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: msub x9, x8, x8, x9 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stp x9, x8, [x29, #-80] +; CHECK-NEXT: smstop sm +; CHECK-NEXT: sub x8, x29, #80 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: adrp x0, :tlsdesc:x +; CHECK-NEXT: ldr x1, [x0, :tlsdesc_lo12:x] +; CHECK-NEXT: add x0, x0, :tlsdesc_lo12:x +; CHECK-NEXT: .tlsdesccall x +; CHECK-NEXT: blr x1 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: mrs x8, TPIDR_EL0 +; CHECK-NEXT: ldr w0, [x8, x0] +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x9, TPIDR2_EL0 +; CHECK-NEXT: sub x0, x29, #80 +; CHECK-NEXT: cbnz x9, .LBB3_2 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB3_2: // %entry +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: sub sp, x29, #64 +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @x) + %1 = load i32, ptr %0, align 4 + ret i32 %1 +} From 3890c97fc028535b67910f35eb8920112697c9f1 Mon Sep 17 00:00:00 2001 From: Kerang Mao Date: Fri, 14 Nov 2025 18:55:05 +0800 Subject: [PATCH 20/22] [InferAddressSpaces] Fix bad `addrspacecast` insertion for phinode (#163528) The IR verifier will carsh if there is any instructions located before phi-node. The `infer-address-spaces` pass would like to insert `addrspacecast` before phi-node in some corner cases. Indeed, since the operand pointer(phi-node's incoming value) has been determined to `NewAS` by the pass, it is safe to `addrspacecast` it immediately after the position where defined it. Co-authored-by: Kerang Mao --- .../Transforms/Scalar/InferAddressSpaces.cpp | 39 +++++++++++++ .../AMDGPU/phinode-address-infer.ll | 55 ++++++++++++++++++ .../NVPTX/phinode-address-infer.ll | 57 +++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll create mode 100644 llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp index 3ad87545953ff..352a1b331001a 100644 --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -617,6 +617,41 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const { return Postorder; } +// Inserts an addrspacecast for a phi node operand, handling the proper +// insertion position based on the operand type. +static Value *phiNodeOperandWithNewAddressSpace(AddrSpaceCastInst *NewI, + Value *Operand) { + auto InsertBefore = [NewI](auto It) { + NewI->insertBefore(It); + NewI->setDebugLoc(It->getDebugLoc()); + return NewI; + }; + + if (auto *Arg = dyn_cast(Operand)) { + // For arguments, insert the cast at the beginning of entry block. + // Consider inserting at the dominating block for better placement. + Function *F = Arg->getParent(); + auto InsertI = F->getEntryBlock().getFirstNonPHIIt(); + return InsertBefore(InsertI); + } + + // No check for Constant here, as constants are already handled. + assert(isa(Operand)); + + Instruction *OpInst = cast(Operand); + if (LLVM_UNLIKELY(OpInst->getOpcode() == Instruction::PHI)) { + // If the operand is defined by another PHI node, insert after the first + // non-PHI instruction at the corresponding basic block. + auto InsertI = OpInst->getParent()->getFirstNonPHIIt(); + return InsertBefore(InsertI); + } + + // Otherwise, insert immediately after the operand definition. + NewI->insertAfter(OpInst->getIterator()); + NewI->setDebugLoc(OpInst->getDebugLoc()); + return NewI; +} + // A helper function for cloneInstructionWithNewAddressSpace. Returns the clone // of OperandUse.get() in the new address space. If the clone is not ready yet, // returns poison in the new address space as a placeholder. @@ -642,6 +677,10 @@ static Value *operandWithNewAddressSpaceOrCreatePoison( unsigned NewAS = I->second; Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Operand->getType(), NewAS); auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy); + + if (LLVM_UNLIKELY(Inst->getOpcode() == Instruction::PHI)) + return phiNodeOperandWithNewAddressSpace(NewI, Operand); + NewI->insertBefore(Inst->getIterator()); NewI->setDebugLoc(Inst->getDebugLoc()); return NewI; diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll new file mode 100644 index 0000000000000..319c26a24b271 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/phinode-address-infer.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -S -passes='require,infer-address-spaces' %s | FileCheck %s + +define void @test(ptr %lhs_ptr, ptr %rhs_ptr) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[LHS_PTR:%.*]], ptr [[RHS_PTR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[PTR_1:%.*]] = load ptr, ptr [[LHS_PTR]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[PTR_1]] to ptr addrspace(3) +; CHECK-NEXT: [[BOOL_1:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[PTR_1]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[BOOL_1]]) +; CHECK-NEXT: [[PTR_2:%.*]] = load ptr, ptr [[RHS_PTR]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR_2]] to ptr addrspace(3) +; CHECK-NEXT: [[BOOL_2:%.*]] = tail call i1 @llvm.amdgcn.is.shared(ptr [[PTR_2]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[BOOL_2]]) +; CHECK-NEXT: br i1 poison, label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr null, align 4 +; CHECK-NEXT: br label %[[IF_SINK_SPLIT:.*]] +; CHECK: [[IF_ELSE]]: +; CHECK-NEXT: [[V2:%.*]] = load i32, ptr null, align 4 +; CHECK-NEXT: br label %[[IF_SINK_SPLIT]] +; CHECK: [[IF_SINK_SPLIT]]: +; CHECK-NEXT: [[PTR_SINK:%.*]] = phi ptr addrspace(3) [ [[TMP0]], %[[IF_THEN]] ], [ [[TMP1]], %[[IF_ELSE]] ] +; CHECK-NEXT: [[V_SINK:%.*]] = phi i32 [ [[V1]], %[[IF_THEN]] ], [ [[V2]], %[[IF_ELSE]] ] +; CHECK-NEXT: store i32 [[V_SINK]], ptr addrspace(3) [[PTR_SINK]], align 4 +; CHECK-NEXT: ret void +; +entry: + %ptr.1 = load ptr, ptr %lhs_ptr, align 8 + %bool.1 = tail call i1 @llvm.amdgcn.is.shared(ptr %ptr.1) + tail call void @llvm.assume(i1 %bool.1) + + %ptr.2 = load ptr, ptr %rhs_ptr, align 8 + %bool.2 = tail call i1 @llvm.amdgcn.is.shared(ptr %ptr.2) + tail call void @llvm.assume(i1 %bool.2) + br i1 poison, label %if.then, label %if.else + +if.then: ; preds = %entry + %v1 = load i32, ptr null, align 4 + br label %if.sink.split + +if.else: ; preds = %entry + %v2 = load i32, ptr null, align 4 + br label %if.sink.split + +if.sink.split: ; preds = %if.else, %if.then + %ptr.sink = phi ptr [ %ptr.1, %if.then ], [ %ptr.2, %if.else ] + %v.sink = phi i32 [ %v1, %if.then ], [ %v2, %if.else ] + store i32 %v.sink, ptr %ptr.sink, align 4 + ret void +} + +declare void @llvm.assume(i1 noundef) +declare i1 @llvm.amdgcn.is.shared(ptr) diff --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll new file mode 100644 index 0000000000000..5a3a8b9117b35 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/phinode-address-infer.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -passes='require,infer-address-spaces' %s | FileCheck %s + +;;; Handle write corner case for infer-address-spaces with phi-nodes. The +;;; verifier will crash if we insert `addrspacecast` before phi-node. + +declare void @llvm.assume(i1 noundef) +declare i1 @llvm.nvvm.isspacep.shared(ptr) +declare i1 @llvm.nvvm.isspacep.global(ptr) + +define void @phinode_instr() { +; CHECK-LABEL: @phinode_instr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_1:%.*]] = load ptr, ptr null, align 8 +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[PTR_1]] to ptr addrspace(3) +; CHECK-NEXT: [[BOOL_1:%.*]] = tail call i1 @llvm.nvvm.isspacep.shared(ptr [[PTR_1]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[BOOL_1]]) +; CHECK-NEXT: br label [[IF_SINK_SPLIT:%.*]] +; CHECK: if.sink.split: +; CHECK-NEXT: [[PTR_SINK:%.*]] = phi ptr addrspace(3) [ [[TMP0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr addrspace(3) [[PTR_SINK]], align 4 +; CHECK-NEXT: ret void +; +entry: + %ptr.1 = load ptr, ptr null, align 8 + %bool.1 = tail call i1 @llvm.nvvm.isspacep.shared(ptr %ptr.1) + tail call void @llvm.assume(i1 %bool.1) + br label %if.sink.split + +if.sink.split: ; preds = %entry + %ptr.sink = phi ptr [ %ptr.1, %entry ] + store i32 1, ptr %ptr.sink, align 4 + ret void +} + +define void @phinode_argument(ptr %lhs_ptr) { +; CHECK-LABEL: @phinode_argument( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[LHS_PTR:%.*]] to ptr addrspace(1) +; CHECK-NEXT: [[BOOL_1:%.*]] = tail call i1 @llvm.nvvm.isspacep.global(ptr [[LHS_PTR]]) +; CHECK-NEXT: tail call void @llvm.assume(i1 [[BOOL_1]]) +; CHECK-NEXT: br label [[IF_SINK_SPLIT:%.*]] +; CHECK: if.sink.split: +; CHECK-NEXT: [[PTR_SINK:%.*]] = phi ptr addrspace(1) [ [[TMP0]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr addrspace(1) [[PTR_SINK]], align 4 +; CHECK-NEXT: ret void +; +entry: + %bool.1 = tail call i1 @llvm.nvvm.isspacep.global(ptr %lhs_ptr) + tail call void @llvm.assume(i1 %bool.1) + br label %if.sink.split + +if.sink.split: ; preds = %entry + %ptr.sink = phi ptr [ %lhs_ptr, %entry ] + store i32 1, ptr %ptr.sink, align 4 + ret void +} From 72c69aefbae8bfb087622e642acbd0cba7578747 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 14 Nov 2025 11:00:57 +0000 Subject: [PATCH 21/22] [AMDGPU] Make use of getFunction and getMF. NFC. (#167872) --- .../Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 +- .../AMDGPU/AMDGPUInstructionSelector.cpp | 6 ++-- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 2 +- .../AMDGPU/AMDGPULowerKernelArguments.cpp | 2 +- .../AMDGPU/AMDGPULowerKernelAttributes.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 4 +-- .../AMDGPU/AMDGPUPreLegalizerCombiner.cpp | 3 +- .../AMDGPU/AMDGPUPreloadKernelArguments.cpp | 6 ++-- .../AMDGPU/AMDGPUPrintfRuntimeBinding.cpp | 2 +- .../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 6 ++-- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 18 ++++++------ .../AMDGPU/AMDGPURewriteOutArguments.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp | 2 +- llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 2 +- llvm/lib/Target/AMDGPU/GCNRegPressure.h | 6 ++-- llvm/lib/Target/AMDGPU/R600InstrInfo.cpp | 8 ++--- llvm/lib/Target/AMDGPU/R600MCInstLower.cpp | 2 +- llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 4 +-- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 4 +-- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 4 +-- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 29 +++++++++---------- llvm/lib/Target/AMDGPU/SIInstrInfo.h | 4 +-- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 2 +- llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 4 +-- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 4 +-- 28 files changed, 67 insertions(+), 69 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp index c28c25fe5ac9e..2bdaddaa11761 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -65,7 +65,7 @@ recursivelyVisitUsers(GlobalValue &GV, continue; if (Instruction *I = dyn_cast(U)) { - Function *F = I->getParent()->getParent(); + Function *F = I->getFunction(); if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { // FIXME: This is a horrible hack. We should always respect noinline, // and just let us hit the error when we can't handle this. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp index 5700468e2420e..ddc675bbb8fb7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp @@ -1968,7 +1968,7 @@ class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy { int NumBits = 0; auto TRI = TII->getRegisterInfo(); - auto &MRI = MI->getParent()->getParent()->getRegInfo(); + auto &MRI = MI->getMF()->getRegInfo(); for (auto &Elt : Collection) { auto Op = Elt->getInstr()->getOperand(0); auto Size = diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index f5081a9d2dd56..3fbdab7ec4ed2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1216,7 +1216,7 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute( const SmallVectorImpl &Ins) const { const MachineFunction &MF = State.getMachineFunction(); const Function &Fn = MF.getFunction(); - LLVMContext &Ctx = Fn.getParent()->getContext(); + LLVMContext &Ctx = Fn.getContext(); const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(); CallingConv::ID CC = Fn.getCallingConv(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 15ed60b46a9c0..650df2a87506a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1998,7 +1998,7 @@ bool AMDGPUInstructionSelector::selectDSAppendConsume(MachineInstr &MI, } bool AMDGPUInstructionSelector::selectInitWholeWave(MachineInstr &MI) const { - MachineFunction *MF = MI.getParent()->getParent(); + MachineFunction *MF = MI.getMF(); SIMachineFunctionInfo *MFInfo = MF->getInfo(); MFInfo->setInitWholeWave(); @@ -3690,7 +3690,7 @@ bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic( MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3; MI.setDesc(TII.get(MI.getOperand(OpcodeOpIdx).getImm())); MI.removeOperand(OpcodeOpIdx); - MI.addImplicitDefUseOperands(*MI.getParent()->getParent()); + MI.addImplicitDefUseOperands(*MI.getMF()); return constrainSelectedInstRegOperands(MI, TII, TRI, RBI); } @@ -3793,7 +3793,7 @@ bool AMDGPUInstructionSelector::selectSMFMACIntrin(MachineInstr &MI) const { MI.removeOperand(4); // VDst_In MI.removeOperand(1); // Intrinsic ID MI.addOperand(VDst_In); // Readd VDst_In to the end - MI.addImplicitDefUseOperands(*MI.getParent()->getParent()); + MI.addImplicitDefUseOperands(*MI.getMF()); const MCInstrDesc &MCID = MI.getDesc(); if (MCID.getOperandConstraint(0, MCOI::EARLY_CLOBBER) != -1) { MI.getOperand(0).setIsEarlyClobber(true); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index aa755344d3325..821d7f38fcb41 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -845,7 +845,7 @@ bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { return false; } } - LLVMContext &context = CI->getParent()->getParent()->getContext(); + LLVMContext &context = CI->getContext(); Constant *nval; if (getArgType(FInfo) == AMDGPULibFunc::F32) { SmallVector FVal; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index dec781d71c54e..755b44c0ca93a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -64,7 +64,7 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) { return false; const GCNSubtarget &ST = TM.getSubtarget(F); - LLVMContext &Ctx = F.getParent()->getContext(); + LLVMContext &Ctx = F.getContext(); const DataLayout &DL = F.getDataLayout(); BasicBlock &EntryBlock = *F.begin(); IRBuilder<> Builder(&EntryBlock, getInsertPt(EntryBlock)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp index fed7a13a69bc4..248d7dcc9ec3e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp @@ -98,7 +98,7 @@ static void annotateGridSizeLoadWithRangeMD(LoadInst *Load, } static bool processUse(CallInst *CI, bool IsV5OrAbove) { - Function *F = CI->getParent()->getParent(); + Function *F = CI->getFunction(); auto *MD = F->getMetadata("reqd_work_group_size"); const bool HasReqdWorkGroupSize = MD && MD->getNumOperands() == 3; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 844649ebb9ae6..dee3dff3bf575 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -243,7 +243,7 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { int MCOpcode = TII->pseudoToMCOpcode(Opcode); if (MCOpcode == -1) { - LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); + LLVMContext &C = MI->getMF()->getFunction().getContext(); C.emitError("AMDGPUMCInstLower::lower - Pseudo instruction doesn't have " "a target-specific version: " + Twine(MI->getOpcode())); } @@ -332,7 +332,7 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) { StringRef Err; if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) { - LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); + LLVMContext &C = MI->getMF()->getFunction().getContext(); C.emitError("Illegal instruction detected: " + Err); MI->print(errs()); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp index 6e54737065d20..4a70c5d6e78f6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -177,8 +177,7 @@ void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16( MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const { Register Src = MatchInfo.Origin; - assert(MI.getParent()->getParent()->getRegInfo().getType(Src) == - LLT::scalar(64)); + assert(MI.getMF()->getRegInfo().getType(Src) == LLT::scalar(64)); const LLT S32 = LLT::scalar(32); auto Unmerge = B.buildUnmerge(S32, Src); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp index ffbbf63969427..7d6e3edc75e1f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernelArguments.cpp @@ -127,7 +127,7 @@ class PreloadKernelArgInfo { // will also be preloaded even if that data is unused. Function *cloneFunctionWithPreloadImplicitArgs(unsigned LastPreloadIndex) { FunctionType *FT = F.getFunctionType(); - LLVMContext &Ctx = F.getParent()->getContext(); + LLVMContext &Ctx = F.getContext(); SmallVector FTypes(FT->param_begin(), FT->param_end()); for (unsigned I = 0; I <= LastPreloadIndex; ++I) FTypes.push_back(getHiddenArgType(Ctx, HiddenArg(I))); @@ -196,7 +196,7 @@ class PreloadKernelArgInfo { SmallVector, 4> ImplicitArgLoads; for (auto *U : ImplicitArgPtr->users()) { Instruction *CI = dyn_cast(U); - if (!CI || CI->getParent()->getParent() != &F) + if (!CI || CI->getFunction() != &F) continue; for (auto *U : CI->users()) { @@ -213,7 +213,7 @@ class PreloadKernelArgInfo { continue; // FIXME: Expand handle merged loads. - LLVMContext &Ctx = F.getParent()->getContext(); + LLVMContext &Ctx = F.getContext(); Type *LoadTy = Load->getType(); HiddenArg HA = getHiddenArgFromOffset(Offset); if (HA == END_HIDDEN_ARGS || LoadTy != getHiddenArgType(Ctx, HA)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp index f5e14c71b02d9..416de901ef19b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -129,7 +129,7 @@ static StringRef getAsConstantStr(Value *V) { static void diagnoseInvalidFormatString(const CallBase *CI) { CI->getContext().diagnose(DiagnosticInfoUnsupported( - *CI->getParent()->getParent(), + *CI->getFunction(), "printf format string must be a trivially resolved constant string " "global variable", CI->getDebugLoc())); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index ddabd25894414..bb95265a794a0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -1378,7 +1378,7 @@ bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) { auto visitUsers = [&](const GlobalVariable *GV, const Constant *Val) -> bool { for (const User *U : Val->users()) { if (const Instruction *Use = dyn_cast(U)) { - if (Use->getParent()->getParent() == &F) + if (Use->getFunction() == &F) return true; } else { const Constant *C = cast(U); @@ -1489,7 +1489,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I, const DataLayout &DL = Mod->getDataLayout(); IRBuilder<> Builder(&I); - const Function &ContainingFunction = *I.getParent()->getParent(); + const Function &ContainingFunction = *I.getFunction(); CallingConv::ID CC = ContainingFunction.getCallingConv(); // Don't promote the alloca to LDS for shader calling conventions as the work @@ -1544,7 +1544,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I, LLVM_DEBUG(dbgs() << "Promoting alloca to local memory\n"); - Function *F = I.getParent()->getParent(); + Function *F = I.getFunction(); Type *GVTy = ArrayType::get(I.getAllocatedType(), WorkGroupSize); GlobalVariable *GV = new GlobalVariable( diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 90d319f578f44..7ed026ee5f69e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -468,7 +468,7 @@ RegisterBankInfo::InstructionMappings AMDGPURegisterBankInfo::getInstrAlternativeMappings( const MachineInstr &MI) const { - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -2409,7 +2409,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( if (DstBank == &AMDGPU::VCCRegBank) break; - MachineFunction *MF = MI.getParent()->getParent(); + MachineFunction *MF = MI.getMF(); ApplyRegBankMapping ApplyBank(B, *this, MRI, DstBank); LegalizerHelper Helper(*MF, ApplyBank, B); @@ -2489,7 +2489,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( // There is no VALU abs instruction so we need to replace it with a sub and // max combination. if (SrcBank && SrcBank == &AMDGPU::VGPRRegBank) { - MachineFunction *MF = MI.getParent()->getParent(); + MachineFunction *MF = MI.getMF(); ApplyRegBankMapping Apply(B, *this, MRI, &AMDGPU::VGPRRegBank); LegalizerHelper Helper(*MF, Apply, B); @@ -3604,7 +3604,7 @@ unsigned AMDGPURegisterBankInfo::getMappingType(const MachineRegisterInfo &MRI, } bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const { - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) @@ -3620,7 +3620,7 @@ bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const { const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const { - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); SmallVector OpdsMapping(MI.getNumOperands()); @@ -3638,7 +3638,7 @@ AMDGPURegisterBankInfo::getDefaultMappingSOP(const MachineInstr &MI) const { const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const { - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); SmallVector OpdsMapping(MI.getNumOperands()); @@ -3662,7 +3662,7 @@ AMDGPURegisterBankInfo::getDefaultMappingVOP(const MachineInstr &MI) const { const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const { - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); SmallVector OpdsMapping(MI.getNumOperands()); @@ -3741,7 +3741,7 @@ AMDGPURegisterBankInfo::getValueMappingForPtr(const MachineRegisterInfo &MRI, const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); SmallVector OpdsMapping(2); unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); @@ -3831,7 +3831,7 @@ AMDGPURegisterBankInfo::getAGPROpMapping(Register Reg, // const RegisterBankInfo::InstructionMapping & AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); if (MI.isCopy() || MI.getOpcode() == AMDGPU::G_FREEZE) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp index 4b1f80c777827..a2e16c7f873f7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp @@ -299,7 +299,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) { if (Replacements.empty()) return false; - LLVMContext &Ctx = F.getParent()->getContext(); + LLVMContext &Ctx = F.getContext(); StructType *NewRetTy = StructType::create(Ctx, ReturnTypes, F.getName()); FunctionType *NewFuncTy = FunctionType::get(NewRetTy, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 26e0b3dfc2e8a..c7528f993da1e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -282,7 +282,7 @@ bool AMDGPUSubtarget::isSingleLaneExecution(const Function &Func) const { } bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const { - Function *Kernel = I->getParent()->getParent(); + Function *Kernel = I->getFunction(); unsigned MinSize = 0; unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second; bool IdQuery = false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp index 4a9437b37aa39..8695a25b10227 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp @@ -523,7 +523,7 @@ static void replacesUsesOfGlobalInFunction(Function *Func, GlobalVariable *GV, auto ReplaceUsesLambda = [Func](const Use &U) -> bool { auto *V = U.getUser(); if (auto *Inst = dyn_cast(V)) { - auto *Func1 = Inst->getParent()->getParent(); + auto *Func1 = Inst->getFunction(); if (Func == Func1) return true; } diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 0c5e3d0837800..ee5f9b0019db6 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -581,7 +581,7 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) { bool GCNDownwardRPTracker::reset(const MachineInstr &MI, const LiveRegSet *LiveRegsCopy) { - MRI = &MI.getParent()->getParent()->getRegInfo(); + MRI = &MI.getMF()->getRegInfo(); LastTrackedMI = nullptr; MBBEnd = MI.getParent()->end(); NextMI = &MI; diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index 4b22c68ef01c5..f54874d2a5b40 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -463,7 +463,7 @@ getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) { } llvm::sort(Indexes); - auto &MRI = (*R.begin())->getParent()->getParent()->getRegInfo(); + auto &MRI = (*R.begin())->getMF()->getRegInfo(); DenseMap LiveRegMap; SmallVector LiveIdxs, SRLiveIdxs; for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { @@ -493,13 +493,13 @@ getLiveRegMap(Range &&R, bool After, LiveIntervals &LIS) { inline GCNRPTracker::LiveRegSet getLiveRegsAfter(const MachineInstr &MI, const LiveIntervals &LIS) { return getLiveRegs(LIS.getInstructionIndex(MI).getDeadSlot(), LIS, - MI.getParent()->getParent()->getRegInfo()); + MI.getMF()->getRegInfo()); } inline GCNRPTracker::LiveRegSet getLiveRegsBefore(const MachineInstr &MI, const LiveIntervals &LIS) { return getLiveRegs(LIS.getInstructionIndex(MI).getBaseIndex(), LIS, - MI.getParent()->getParent()->getRegInfo()); + MI.getMF()->getRegInfo()); } template diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index 01040854e1577..7f805e67c62ec 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -176,7 +176,7 @@ bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { } bool R600InstrInfo::usesVertexCache(const MachineInstr &MI) const { - const MachineFunction *MF = MI.getParent()->getParent(); + const MachineFunction *MF = MI.getMF(); return !AMDGPU::isCompute(MF->getFunction().getCallingConv()) && usesVertexCache(MI.getOpcode()); } @@ -186,7 +186,7 @@ bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { } bool R600InstrInfo::usesTextureCache(const MachineInstr &MI) const { - const MachineFunction *MF = MI.getParent()->getParent(); + const MachineFunction *MF = MI.getMF(); return (AMDGPU::isCompute(MF->getFunction().getCallingConv()) && usesVertexCache(MI.getOpcode())) || usesTextureCache(MI.getOpcode()); @@ -948,7 +948,7 @@ bool R600InstrInfo::PredicateInstruction(MachineInstr &MI, .setReg(Pred[2].getReg()); MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_W)) .setReg(Pred[2].getReg()); - MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); + MachineInstrBuilder MIB(*MI.getMF(), MI); MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit); return true; } @@ -956,7 +956,7 @@ bool R600InstrInfo::PredicateInstruction(MachineInstr &MI, if (PIdx != -1) { MachineOperand &PMO = MI.getOperand(PIdx); PMO.setReg(Pred[2].getReg()); - MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); + MachineInstrBuilder MIB(*MI.getMF(), MI); MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit); return true; } diff --git a/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp index 48b4e7f0d07be..ac6508c2322ce 100644 --- a/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp @@ -55,7 +55,7 @@ void R600AsmPrinter::emitInstruction(const MachineInstr *MI) { StringRef Err; if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) { - LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext(); + LLVMContext &C = MI->getMF()->getFunction().getContext(); C.emitError("Illegal instruction detected: " + Err); MI->print(errs()); } diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 7793907c032d2..9a8710becba39 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -238,7 +238,7 @@ static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI, const SIRegisterInfo *TRI, const SIInstrInfo *TII) { - MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); auto &Src = MI.getOperand(1); Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = Src.getReg(); @@ -930,7 +930,7 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI, // s_mov_b32. if (isSafeToFoldImmIntoCopy(&MI, MRI->getVRegDef(SrcReg), TII, SMovOp, Imm)) { MI.getOperand(1).ChangeToImmediate(Imm); - MI.addImplicitDefUseOperands(*MI.getParent()->getParent()); + MI.addImplicitDefUseOperands(*MI.getMF()); MI.setDesc(TII->get(SMovOp)); return true; } diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 2c00e23d113cb..f4d38c0c3d8c3 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1327,7 +1327,7 @@ void SIFoldOperandsImpl::foldOperand( if (MovOp == AMDGPU::V_MOV_B16_t16_e64) { const auto &SrcOp = UseMI->getOperand(UseOpIdx); MachineOperand NewSrcOp(SrcOp); - MachineFunction *MF = UseMI->getParent()->getParent(); + MachineFunction *MF = UseMI->getMF(); UseMI->removeOperand(1); UseMI->addOperand(*MF, MachineOperand::CreateImm(0)); // src0_modifiers UseMI->addOperand(NewSrcOp); // src0 @@ -1780,7 +1780,7 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI, if (CopiesToReplace.empty() && FoldList.empty()) return Changed; - MachineFunction *MF = MI.getParent()->getParent(); + MachineFunction *MF = MI.getMF(); // Make sure we add EXEC uses to any new v_mov instructions created. for (MachineInstr *Copy : CopiesToReplace) Copy->addImplicitDefUseOperands(*MF); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 86e31e702f9b0..56f2abba12a01 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4062,7 +4062,7 @@ bool SITargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { if (!CI->isTailCall()) return false; - const Function *ParentFn = CI->getParent()->getParent(); + const Function *ParentFn = CI->getFunction(); if (AMDGPU::isEntryFunctionCC(ParentFn->getCallingConv())) return false; return true; @@ -17432,7 +17432,7 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const { const SIInstrInfo *TII = getSubtarget()->getInstrInfo(); - MachineFunction *MF = MI.getParent()->getParent(); + MachineFunction *MF = MI.getMF(); MachineRegisterInfo &MRI = MF->getRegInfo(); if (TII->isVOP3(MI.getOpcode())) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index eafb579b1a2ee..3bf820a0024e7 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -162,7 +162,7 @@ bool SIInstrInfo::resultDependsOnExec(const MachineInstr &MI) const { if (!DstReg.isVirtual()) return true; - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); for (MachineInstr &Use : MRI.use_nodbg_instructions(DstReg)) { switch (Use.getOpcode()) { case AMDGPU::S_AND_SAVEEXEC_B32: @@ -3984,7 +3984,7 @@ static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm, MachineInstr **DefMI = nullptr) { if (!MO->isReg()) return false; - const MachineFunction *MF = MO->getParent()->getParent()->getParent(); + const MachineFunction *MF = MO->getParent()->getMF(); const MachineRegisterInfo &MRI = MF->getRegInfo(); return getFoldableImm(MO->getReg(), MRI, Imm, DefMI); } @@ -4999,7 +4999,7 @@ bool SIInstrInfo::verifyCopy(const MachineInstr &MI, bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const { uint16_t Opcode = MI.getOpcode(); - const MachineFunction *MF = MI.getParent()->getParent(); + const MachineFunction *MF = MI.getMF(); const MachineRegisterInfo &MRI = MF->getRegInfo(); // FIXME: At this point the COPY verify is done only for non-ssa forms. @@ -5805,7 +5805,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::STRICT_WWM: return AMDGPU::STRICT_WWM; case AMDGPU::STRICT_WQM: return AMDGPU::STRICT_WQM; case AMDGPU::S_MOV_B32: { - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); return MI.getOperand(1).isReg() || RI.isAGPR(MRI, MI.getOperand(0).getReg()) ? AMDGPU::COPY : AMDGPU::V_MOV_B32_e32; @@ -6080,8 +6080,7 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI, Register Reg = MI.getOperand(OpNo).getReg(); if (Reg.isVirtual()) { - const MachineRegisterInfo &MRI = - MI.getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); return MRI.getRegClass(Reg); } return RI.getPhysRegBaseClass(Reg); @@ -6172,7 +6171,7 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC = MRI.getRegClass(Reg); if (MO.getSubReg()) { - const MachineFunction *MF = MO.getParent()->getParent()->getParent(); + const MachineFunction *MF = MO.getParent()->getMF(); const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF); if (!SuperRC) return false; @@ -6184,7 +6183,7 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx, const MachineOperand &MO) const { - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); const MCOperandInfo OpInfo = MI.getDesc().operands()[OpIdx]; unsigned Opc = MI.getOpcode(); @@ -6286,7 +6285,7 @@ bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand( bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx, const MachineOperand *MO) const { - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); const MCInstrDesc &InstDesc = MI.getDesc(); const MCOperandInfo &OpInfo = InstDesc.operands()[OpIdx]; @@ -7182,7 +7181,7 @@ extractRsrcPtr(const SIInstrInfo &TII, MachineInstr &MI, MachineOperand &Rsrc) { MachineBasicBlock * SIInstrInfo::legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT) const { - MachineFunction &MF = *MI.getParent()->getParent(); + MachineFunction &MF = *MI.getMF(); MachineRegisterInfo &MRI = MF.getRegInfo(); MachineBasicBlock *CreatedBB = nullptr; @@ -9314,7 +9313,7 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(const MachineOperand &Op, int SCCIdx = MI.findRegisterUseOperandIdx(AMDGPU::SCC, &RI, false); if (SCCIdx != -1) { if (MI.isCopy()) { - MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); Register DestReg = MI.getOperand(0).getReg(); MRI.replaceRegWith(DestReg, NewCond); @@ -9426,7 +9425,7 @@ Register SIInstrInfo::findUsedSGPR(const MachineInstr &MI, return SGPRReg; Register UsedSGPRs[3] = {Register()}; - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); for (unsigned i = 0; i < 3; ++i) { int Idx = OpIndices[i]; @@ -9676,7 +9675,7 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { return getInstBundleSize(MI); case TargetOpcode::INLINEASM: case TargetOpcode::INLINEASM_BR: { - const MachineFunction *MF = MI.getParent()->getParent(); + const MachineFunction *MF = MI.getMF(); const char *AsmStr = MI.getOperand(0).getSymbolName(); return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(), &ST); } @@ -9811,7 +9810,7 @@ bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI, // needed by the prolog. However, the insertions for scalar registers can // always be placed at the BB top as they are independent of the exec mask // value. - const MachineFunction *MF = MI.getParent()->getParent(); + const MachineFunction *MF = MI.getMF(); bool IsNullOrVectorRegister = true; if (Reg) { const MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -10598,7 +10597,7 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const { return InstructionUniformity::Default; } - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); const AMDGPURegisterBankInfo *RBI = ST.getRegBankInfo(); // FIXME: It's conceptually broken to report this for an instruction, and not diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index d24dfd657ddcc..b12d9525a7605 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1173,13 +1173,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { bool isVGPRCopy(const MachineInstr &MI) const { assert(isCopyInstr(MI)); Register Dest = MI.getOperand(0).getReg(); - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); return !RI.isSGPRReg(MRI, Dest); } bool hasVGPRUses(const MachineInstr &MI) const { - const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineFunction &MF = *MI.getMF(); const MachineRegisterInfo &MRI = MF.getRegInfo(); return llvm::any_of(MI.explicit_uses(), [&MRI, this](const MachineOperand &MO) { diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 6ab8d5521ebdb..0dac6d2d7ee4b 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -777,7 +777,7 @@ getSynchronizeAddrSpaceMD(const MachineInstr &MI) { void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI, const char *Msg) const { - const Function &Func = MI->getParent()->getParent()->getFunction(); + const Function &Func = MI->getMF()->getFunction(); Func.getContext().diagnose( DiagnosticInfoUnsupported(Func, Msg, MI->getDebugLoc())); } diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 86ca22cfeffd8..acc4b3f0a68b4 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -118,7 +118,7 @@ class SDWAOperand { MachineInstr *getParentInst() const { return Target->getParent(); } MachineRegisterInfo *getMRI() const { - return &getParentInst()->getParent()->getParent()->getRegInfo(); + return &getParentInst()->getMF()->getRegInfo(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1284,7 +1284,7 @@ bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, // Clone the instruction to allow revoking changes // made to MI during the processing of the operands // if the conversion fails. - SDWAInst = MI.getParent()->getParent()->CloneMachineInstr(&MI); + SDWAInst = MI.getMF()->CloneMachineInstr(&MI); MI.getParent()->insert(MI.getIterator(), SDWAInst); } else { SDWAInst = createSDWAVersion(MI); diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index aaf7a921c2981..5484fab3efdcc 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1951,7 +1951,7 @@ void SIRegisterInfo::buildSpillLoadStore( void SIRegisterInfo::addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const { - const MachineFunction *MF = MIB->getParent()->getParent(); + const MachineFunction *MF = MIB->getMF(); const SIMachineFunctionInfo *FuncInfo = MF->getInfo(); uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(BlockReg); Register BaseVGPR = getSubReg(BlockReg, AMDGPU::sub0); @@ -2321,7 +2321,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex( bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { - MachineFunction *MF = MI->getParent()->getParent(); + MachineFunction *MF = MI->getMF(); MachineBasicBlock *MBB = MI->getParent(); SIMachineFunctionInfo *MFI = MF->getInfo(); MachineFrameInfo &FrameInfo = MF->getFrameInfo(); From 00000dcf2c7452d13940568f62d8887c95c78054 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Fri, 14 Nov 2025 11:13:19 +0000 Subject: [PATCH 22/22] [LoopInterchange] Fix tests with loops that have BTC=0. NFC. (#167748) Precommit test fixups for #167113 --- .../interchanged-loop-nest-4.ll | 3 +- .../LoopInterchange/lcssa-phi-outer-latch.ll | 7 +- .../pr43176-move-to-new-latch.ll | 70 ++++++++++--------- .../Transforms/LoopInterchange/pr43326.ll | 52 +++++++------- .../Transforms/LoopInterchange/pr57148.ll | 6 +- .../reductions-across-inner-and-outer-loop.ll | 6 +- 6 files changed, 76 insertions(+), 68 deletions(-) diff --git a/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-4.ll b/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-4.ll index 70fff161154d8..f61054409937c 100644 --- a/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-4.ll +++ b/llvm/test/Transforms/LoopInterchange/interchanged-loop-nest-4.ll @@ -36,7 +36,8 @@ for.body10.i.i.i: ; preds = %for.body10.i.i.i, % %arrayidx.i.i.i = getelementptr [6 x ptr], ptr @g_78, i16 0, i16 %storemerge56.i.i.i store ptr @g_75, ptr %arrayidx.i.i.i, align 1 %sub.i.i.i = add nsw i16 %storemerge56.i.i.i, -1 - br i1 true, label %for.inc14.i.i.i, label %for.body10.i.i.i + %cmp = icmp sgt i16 %sub.i.i.i, 0 + br i1 %cmp, label %for.body10.i.i.i, label %for.inc14.i.i.i for.inc14.i.i.i: ; preds = %for.body10.i.i.i %add15.i.i.i = add nuw nsw i16 %l_105.18.i.i.i, 1 diff --git a/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll b/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll index a5e3accaf8e10..92ce3288b4529 100644 --- a/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll +++ b/llvm/test/Transforms/LoopInterchange/lcssa-phi-outer-latch.ll @@ -31,11 +31,13 @@ define i16 @main(ptr %a) { ; CHECK-NEXT: br label %[[INNER_LATCH:.*]] ; CHECK: [[INNER_LATCH]]: ; CHECK-NEXT: [[J_NEXT:%.*]] = add i16 [[J]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[J]], 42 ; CHECK-NEXT: br label %[[OUTER_BODY:.*]] ; CHECK: [[INNER_LATCH_SPLIT]]: ; CHECK-NEXT: [[NEW_COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[OUTER_LATCH]] ] ; CHECK-NEXT: [[TMP1]] = add i16 [[J]], 1 -; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[INNER_HEADER]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[J]], 42 +; CHECK-NEXT: br i1 [[TMP2]], label %[[INNER_HEADER]], label %[[EXIT:.*]] ; CHECK: [[OUTER_BODY]]: ; CHECK-NEXT: br label %[[OUTER_LATCH]] ; CHECK: [[OUTER_LATCH]]: @@ -62,7 +64,8 @@ inner.header: inner.latch: %j.next = add i16 %j, 1 - br i1 true, label %outer.body, label %inner.header + %cmp = icmp slt i16 %j, 42 + br i1 %cmp, label %inner.header, label %outer.body outer.body: %new.cond.lcssa = phi i16 [ %cond, %inner.latch ] diff --git a/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll b/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll index 6b25c3bc9a4ba..f02ee1a0ced19 100644 --- a/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll +++ b/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll @@ -21,33 +21,34 @@ define void @test1() { entry: - br label %for.body + br label %outer.header -for.body: ; preds = %for.inc3, %entry - %inc41 = phi i32 [ %inc4, %for.inc3 ], [ undef, %entry ] - br label %for.body2 +outer.header: + %i = phi i32 [ %i.next, %outer.latch ], [ 0, %entry ] + br label %inner.header -for.body2: ; preds = %for.inc, %for.body - %lsr.iv = phi i32 [ %lsr.iv.next, %for.inc ], [ 1, %for.body ] - br label %for.inc +inner.header: + %j = phi i32 [ %j.next, %inner.latch ], [ 1, %outer.header ] + br label %inner.latch -for.inc: ; preds = %for.body2 - %idxprom = sext i32 %inc41 to i64 +inner.latch: + %idxprom = sext i32 %i to i64 %arrayidx = getelementptr inbounds [5 x i32], ptr @b, i64 0, i64 %idxprom %0 = load i32, ptr %arrayidx, align 4 store i32 undef, ptr %arrayidx, align 4 - %cmp = icmp slt i32 %lsr.iv, 4 - %lsr.iv.next = add nuw nsw i32 %lsr.iv, 1 - br i1 %cmp, label %for.body2, label %for.cond1.for.end_crit_edge + %cmp = icmp slt i32 %j, 4 + %j.next = add nuw nsw i32 %j, 1 + br i1 %cmp, label %inner.header, label %outer.body -for.cond1.for.end_crit_edge: ; preds = %for.inc - br label %for.inc3 +outer.body: + br label %outer.latch -for.inc3: ; preds = %for.cond1.for.end_crit_edge - %inc4 = add nsw i32 %inc41, 1 - br i1 false, label %for.body, label %for.cond.for.end5_crit_edge +outer.latch: + %i.next = add nsw i32 %i, 1 + %cmp2 = icmp slt i32 %i, 4 + br i1 %cmp2, label %outer.header, label %exit -for.cond.for.end5_crit_edge: ; preds = %for.inc3 +exit: ret void } @@ -61,33 +62,34 @@ for.cond.for.end5_crit_edge: ; preds = %for.inc3 define void @test2() { entry: - br label %for.body + br label %outer.header -for.body: ; preds = %for.inc3, %entry - %inc41 = phi i32 [ %inc4, %for.inc3 ], [ undef, %entry ] - br label %for.body2 +outer.header: + %i = phi i32 [ %i.next, %outer.latch ], [ 0, %entry ] + br label %inner.header -for.body2: ; preds = %for.inc, %for.body - %lsr.iv = phi i32 [ %lsr.iv.next, %for.inc ], [ 1, %for.body ] - br label %for.inc +inner.header: + %lsr.iv = phi i32 [ %lsr.iv.next, %inner.latch ], [ 1, %outer.header ] + br label %inner.latch -for.inc: ; preds = %for.body2 - %idxprom = sext i32 %inc41 to i64 +inner.latch: + %idxprom = sext i32 %i to i64 %arrayidx = getelementptr inbounds [5 x i32], ptr @b, i64 0, i64 %idxprom %0 = load i32, ptr %arrayidx, align 4 %cmp = icmp slt i32 %lsr.iv, 4 %cmp.zext = zext i1 %cmp to i32 store i32 %cmp.zext, ptr %arrayidx, align 4 %lsr.iv.next = add nuw nsw i32 %lsr.iv, 1 - br i1 %cmp, label %for.body2, label %for.cond1.for.end_crit_edge + br i1 %cmp, label %inner.header, label %outer.body -for.cond1.for.end_crit_edge: ; preds = %for.inc - br label %for.inc3 +outer.body: + br label %outer.latch -for.inc3: ; preds = %for.cond1.for.end_crit_edge - %inc4 = add nsw i32 %inc41, 1 - br i1 false, label %for.body, label %for.cond.for.end5_crit_edge +outer.latch: + %i.next = add nsw i32 %i, 1 + %cmp2 = icmp slt i32 %i, 4 + br i1 %cmp2, label %outer.header, label %exit -for.cond.for.end5_crit_edge: ; preds = %for.inc3 +exit: ret void } diff --git a/llvm/test/Transforms/LoopInterchange/pr43326.ll b/llvm/test/Transforms/LoopInterchange/pr43326.ll index c25c4fadd3042..cc4f07c722dd9 100644 --- a/llvm/test/Transforms/LoopInterchange/pr43326.ll +++ b/llvm/test/Transforms/LoopInterchange/pr43326.ll @@ -25,58 +25,58 @@ define void @pr43326() { entry: %0 = load i32, ptr @a %tobool.not2 = icmp eq i32 %0, 0 - br i1 %tobool.not2, label %for.end14, label %for.body.lr.ph + br i1 %tobool.not2, label %for.end14, label %outer.preheader -for.body.lr.ph: ; preds = %entry +outer.preheader: ; preds = %entry %d.promoted = load i32, ptr @d %a.promoted = load i32, ptr @a - br label %for.body + br label %outer.header -for.body: ; preds = %for.body.lr.ph, %for.inc12 - %inc1312 = phi i32 [ %a.promoted, %for.body.lr.ph ], [ %inc13, %for.inc12 ] - %xor.lcssa.lcssa11 = phi i32 [ %d.promoted, %for.body.lr.ph ], [ %xor.lcssa.lcssa, %for.inc12 ] - br label %for.body3 +outer.header: ; preds = %outer.preheader, %for.inc12 + %inc1312 = phi i32 [ %a.promoted, %outer.preheader ], [ %inc13, %for.inc12 ] + %xor.lcssa.lcssa11 = phi i32 [ %d.promoted, %outer.preheader ], [ %xor.lcssa.lcssa, %for.inc12 ] + br label %inner1.header -for.body3: ; preds = %for.body, %for.inc10 - %xor.lcssa9 = phi i32 [ %xor.lcssa.lcssa11, %for.body ], [ %xor.lcssa, %for.inc10 ] - %dec7 = phi i8 [ 0, %for.body ], [ %dec, %for.inc10 ] - %idxprom8 = sext i8 %dec7 to i64 - br label %for.body7 +inner1.header: ; preds = %outer.header, %for.inc10 + %xor.lcssa9 = phi i32 [ %xor.lcssa.lcssa11, %outer.header ], [ %xor.lcssa, %for.inc10 ] + %j = phi i8 [ 0, %outer.header ], [ %j.next, %for.inc10 ] + %idxprom8 = sext i8 %j to i64 + br label %inner2.header -for.body7: ; preds = %for.body3, %for.inc - %xor5 = phi i32 [ %xor.lcssa9, %for.body3 ], [ %xor, %for.inc ] - %inc4 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ] - %idxprom = sext i32 %inc4 to i64 +inner2.header: ; preds = %inner1.header, %for.inc + %xor5 = phi i32 [ %xor.lcssa9, %inner1.header ], [ %xor, %for.inc ] + %k = phi i32 [ 0, %inner1.header ], [ %k.next, %for.inc ] + %idxprom = sext i32 %k to i64 %arrayidx9 = getelementptr inbounds [1 x [1 x i32]], ptr @e, i64 0, i64 %idxprom, i64 %idxprom8 %1 = load i32, ptr %arrayidx9 %xor = xor i32 %xor5, %1 br label %for.inc -for.inc: ; preds = %for.body7 - %inc = add nsw i32 %inc4, 1 - %cmp5 = icmp slt i32 %inc, 1 - br i1 %cmp5, label %for.body7, label %for.end +for.inc: ; preds = %inner2.header + %k.next = add nsw i32 %k, 1 + %cmp5 = icmp slt i32 %k.next, 42 + br i1 %cmp5, label %inner2.header, label %for.end for.end: ; preds = %for.inc %xor.lcssa = phi i32 [ %xor, %for.inc ] - %inc.lcssa = phi i32 [ %inc, %for.inc ] + %inc.lcssa = phi i32 [ %k.next, %for.inc ] br label %for.inc10 for.inc10: ; preds = %for.end - %dec = add i8 %dec7, -1 - %cmp = icmp sgt i8 %dec, -1 - br i1 %cmp, label %for.body3, label %for.end11 + %j.next = add i8 %j, -1 + %cmp = icmp sgt i8 %j.next, -1 + br i1 %cmp, label %inner1.header, label %for.end11 for.end11: ; preds = %for.inc10 %xor.lcssa.lcssa = phi i32 [ %xor.lcssa, %for.inc10 ] - %dec.lcssa = phi i8 [ %dec, %for.inc10 ] + %dec.lcssa = phi i8 [ %j.next, %for.inc10 ] %inc.lcssa.lcssa = phi i32 [ %inc.lcssa, %for.inc10 ] br label %for.inc12 for.inc12: ; preds = %for.end11 %inc13 = add nsw i32 %inc1312, 1 %tobool.not = icmp eq i32 %inc13, 0 - br i1 %tobool.not, label %for.cond.for.end14_crit_edge, label %for.body + br i1 %tobool.not, label %for.cond.for.end14_crit_edge, label %outer.header for.cond.for.end14_crit_edge: ; preds = %for.inc12 %inc13.lcssa = phi i32 [ %inc13, %for.inc12 ] diff --git a/llvm/test/Transforms/LoopInterchange/pr57148.ll b/llvm/test/Transforms/LoopInterchange/pr57148.ll index 0d4194762a692..747dbbcb4a44e 100644 --- a/llvm/test/Transforms/LoopInterchange/pr57148.ll +++ b/llvm/test/Transforms/LoopInterchange/pr57148.ll @@ -126,7 +126,8 @@ define void @test2() { ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK80]], label [[VECTOR_BODY85]] ; CHECK: middle.block80: ; CHECK-NEXT: [[INC66]] = add nuw nsw i16 [[J_165]], 1 -; CHECK-NEXT: br i1 true, label [[FOR_COND75_PREHEADER:%.*]], label [[FOR_COND37_PREHEADER]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[INC66]], 42 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND75_PREHEADER:%.*]], label [[FOR_COND37_PREHEADER]] ; CHECK: for.inc68: ; CHECK-NEXT: [[INC69]] = add nuw nsw i16 [[I_166]], 1 ; CHECK-NEXT: [[EXITCOND77_NOT:%.*]] = icmp eq i16 [[INC69]], 2 @@ -156,7 +157,8 @@ vector.body85: ; preds = %vector.body85, %for middle.block80: ; preds = %vector.body85 %inc66 = add nuw nsw i16 %j.165, 1 - br i1 true, label %for.inc68, label %for.cond37.preheader + %cmp = icmp slt i16 %inc66, 42 + br i1 %cmp, label %for.inc68, label %for.cond37.preheader for.inc68: ; preds = %middle.block80 %inc69 = add nuw nsw i16 %i.166, 1 diff --git a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll index 27d99e05e84ee..51fda4cf1ebe1 100644 --- a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll +++ b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll @@ -301,13 +301,13 @@ for.body3: ; preds = %for.body3, %outer.h %vB = load float, ptr %arrayidx6 %float.inner.inc.inc = fadd fast float %float.inner.inc, %vB %iv.inner.next = add nuw nsw i64 %iv.inner, 1 - %exitcond = icmp eq i64 %iv.inner.next, 100 - br i1 %exitcond, label %outer.inc, label %for.body3 + %exitcond = icmp slt i64 %iv.inner.next, 100 + br i1 %exitcond, label %for.body3, label %outer.inc outer.inc: ; preds = %for.body3 %float.inner.lcssa = phi float [ %float.inner.inc.inc, %for.body3 ] %iv.outer.next = add nsw i64 %iv.outer, 1 - %cmp = icmp eq i64 %iv.outer.next, 100 + %cmp = icmp slt i64 %iv.outer.next, 100 br i1 %cmp, label %outer.header, label %for.exit for.exit: ; preds = %outer.inc