-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64][SME2][SVE2p1] Add PNR_3b regclass #67785
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This patch adds the PNR_3b regclass for predicate-as-counter registers 0-7 and allows the Upl ASM constraint to use this register class.
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 ChangesThis patch adds the PNR_3b regclass for predicate-as-counter registers 0-7 and allows the Upl ASM constraint to use this register class. Patch is 23.25 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67785.diff 9 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 662306c0dadf0d4..06fe64d10a1c13e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10063,7 +10063,8 @@ getPredicateRegisterClass(PredicateConstraint Constraint, EVT VT) {
return VT == MVT::aarch64svcount ? &AArch64::PNR_p8to15RegClass
: &AArch64::PPR_p8to15RegClass;
case PredicateConstraint::Upl:
- return VT == MVT::aarch64svcount ? nullptr : &AArch64::PPR_3bRegClass;
+ return VT == MVT::aarch64svcount ? &AArch64::PNR_3bRegClass
+ : &AArch64::PPR_3bRegClass;
case PredicateConstraint::Upa:
return VT == MVT::aarch64svcount ? &AArch64::PNRRegClass
: &AArch64::PPRRegClass;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index e3d77ef37a8ac02..eb26591908fd79c 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -915,6 +915,7 @@ class PNRClass<int firstreg, int lastreg> : RegisterClass<
}
def PNR : PNRClass<0, 15>;
+def PNR_3b : PNRClass<0, 7>;
def PNR_p8to15 : PNRClass<8, 15>;
// SVE predicate-as-counter operand
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll
index d531c29da7551a9..ef8e46653640862 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll
@@ -26,7 +26,7 @@ define void @asm_simple_register_clobber() {
define i64 @asm_register_early_clobber() {
; CHECK-LABEL: name: asm_register_early_clobber
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"mov $0, 7; mov $1, 7", 1 /* sideeffect attdialect */, 2686987 /* regdef-ec:GPR64common */, def early-clobber %0, 2686987 /* regdef-ec:GPR64common */, def early-clobber %1, !0
+ ; CHECK-NEXT: INLINEASM &"mov $0, 7; mov $1, 7", 1 /* sideeffect attdialect */, 2752523 /* regdef-ec:GPR64common */, def early-clobber %0, 2752523 /* regdef-ec:GPR64common */, def early-clobber %1, !0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY %0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[COPY1]]
@@ -54,7 +54,7 @@ entry:
define i32 @test_single_register_output() nounwind ssp {
; CHECK-LABEL: name: test_single_register_output
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %0
+ ; CHECK-NEXT: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
@@ -66,7 +66,7 @@ entry:
define i64 @test_single_register_output_s64() nounwind ssp {
; CHECK-LABEL: name: test_single_register_output_s64
; CHECK: bb.1.entry:
- ; CHECK-NEXT: INLINEASM &"mov $0, 7", 0 /* attdialect */, 2686986 /* regdef:GPR64common */, def %0
+ ; CHECK-NEXT: INLINEASM &"mov $0, 7", 0 /* attdialect */, 2752522 /* regdef:GPR64common */, def %0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY %0
; CHECK-NEXT: $x0 = COPY [[COPY]](s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
@@ -79,7 +79,7 @@ entry:
define float @test_multiple_register_outputs_same() #0 {
; CHECK-LABEL: name: test_multiple_register_outputs_same
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %0, 1638410 /* regdef:GPR32common */, def %1
+ ; CHECK-NEXT: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %0, 1703946 /* regdef:GPR32common */, def %1
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]]
@@ -96,7 +96,7 @@ define float @test_multiple_register_outputs_same() #0 {
define double @test_multiple_register_outputs_mixed() #0 {
; CHECK-LABEL: name: test_multiple_register_outputs_mixed
; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %0, 2490378 /* regdef:FPR64 */, def %1
+ ; CHECK-NEXT: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %0, 2555914 /* regdef:FPR64 */, def %1
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1
; CHECK-NEXT: $d0 = COPY [[COPY1]](s64)
@@ -125,7 +125,7 @@ define zeroext i8 @test_register_output_trunc(ptr %src) nounwind {
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK-NEXT: INLINEASM &"mov ${0:w}, 32", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %1
+ ; CHECK-NEXT: INLINEASM &"mov ${0:w}, 32", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
@@ -155,7 +155,7 @@ define void @test_input_register_imm() {
; CHECK: bb.1 (%ir-block.0):
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY [[C]](s64)
- ; CHECK-NEXT: INLINEASM &"mov x0, $0", 1 /* sideeffect attdialect */, 2686985 /* reguse:GPR64common */, [[COPY]]
+ ; CHECK-NEXT: INLINEASM &"mov x0, $0", 1 /* sideeffect attdialect */, 2752521 /* reguse:GPR64common */, [[COPY]]
; CHECK-NEXT: RET_ReallyLR
call void asm sideeffect "mov x0, $0", "r"(i64 42)
ret void
@@ -190,7 +190,7 @@ define zeroext i8 @test_input_register(ptr %src) nounwind {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]](p0)
- ; CHECK-NEXT: INLINEASM &"ldtrb ${0:w}, [$1]", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %1, 2686985 /* reguse:GPR64common */, [[COPY1]]
+ ; CHECK-NEXT: INLINEASM &"ldtrb ${0:w}, [$1]", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %1, 2752521 /* reguse:GPR64common */, [[COPY1]]
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
@@ -207,7 +207,7 @@ define i32 @test_memory_constraint(ptr %a) nounwind {
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
- ; CHECK-NEXT: INLINEASM &"ldr $0, $1", 8 /* mayload attdialect */, 1638410 /* regdef:GPR32common */, def %1, 262158 /* mem:m */, [[COPY]](p0)
+ ; CHECK-NEXT: INLINEASM &"ldr $0, $1", 8 /* mayload attdialect */, 1703946 /* regdef:GPR32common */, def %1, 262158 /* mem:m */, [[COPY]](p0)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1
; CHECK-NEXT: $w0 = COPY [[COPY1]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
@@ -221,7 +221,7 @@ define i16 @test_anyext_input() {
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY [[ANYEXT]](s32)
- ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1638410 /* regdef:GPR32common */, def %0, 1638409 /* reguse:GPR32common */, [[COPY]]
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1703946 /* regdef:GPR32common */, def %0, 1703945 /* reguse:GPR32common */, [[COPY]]
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
@@ -237,7 +237,7 @@ define i16 @test_anyext_input_with_matching_constraint() {
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32common = COPY [[ANYEXT]](s32)
- ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1638410 /* regdef:GPR32common */, def %0, 2147483657 /* reguse tiedto:$0 */, [[COPY]](tied-def 3)
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1703946 /* regdef:GPR32common */, def %0, 2147483657 /* reguse tiedto:$0 */, [[COPY]](tied-def 3)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll
index c224b0a259fcd18..59eb80ae6146b3f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll
@@ -71,7 +71,7 @@ define void @test2() #0 personality ptr @__gcc_personality_v0 {
; CHECK-NEXT: G_INVOKE_REGION_START
; CHECK-NEXT: EH_LABEL <mcsymbol >
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY [[DEF]](p0)
- ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2686985 /* reguse:GPR64common */, [[COPY]]
+ ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2752521 /* reguse:GPR64common */, [[COPY]]
; CHECK-NEXT: EH_LABEL <mcsymbol >
; CHECK-NEXT: G_BR %bb.2
; CHECK-NEXT: {{ $}}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir
index f75731e351c7826..e77fac19e0a78a5 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir
@@ -57,7 +57,7 @@ tracksRegLiveness: true
body: |
bb.1:
; CHECK-LABEL: name: inlineasm_virt_reg_output
- ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:PPR2_with_psub_in_PNR_p8to15_and_PPR2_with_psub1_in_PPR_3b */, def %0
+ ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:PPR2_with_psub_in_PNR_3b_and_PPR2_with_psub1_in_PPR_p8to15 */, def %0
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0
; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
@@ -75,7 +75,7 @@ tracksRegLiveness: true
body: |
bb.1:
; CHECK-LABEL: name: inlineasm_virt_mixed_types
- ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1310730 /* regdef:PPR2_with_psub_in_PNR_p8to15_and_PPR2_with_psub1_in_PPR_3b */, def %0, 2162698 /* regdef:WSeqPairsClass_with_subo32_in_GPR32common */, def %1
+ ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1310730 /* regdef:PPR2_with_psub_in_PNR_3b_and_PPR2_with_psub1_in_PPR_p8to15 */, def %0, 2162698 /* regdef:WSeqPairsClass */, def %1
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr(s64) = COPY %1
; CHECK-NEXT: $d0 = COPY [[COPY1]](s64)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
index 98e7ad740681e68..604e00b2cce7fd6 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
@@ -1,11 +1,11 @@
; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sme2 -stop-after=finalize-isel | FileCheck %s
-define dso_local void @UphPNR(target("aarch64.svcount") %predcnt) {
+define void @UphPNR(target("aarch64.svcount") %predcnt) {
entry:
; CHECK: %0:ppr = COPY $p0
; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2)
; CHECK: %1:pnr_p8to15 = COPY %0
-; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, 393225 /* reguse:PNR_p8to15 */, %1
+; CHECK: INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, 458761 /* reguse:PNR_p8to15 */, %1
; CHECK: RET_ReallyLR
%predcnt.addr = alloca target("aarch64.svcount"), align 2
store target("aarch64.svcount") %predcnt, ptr %predcnt.addr, align 2
@@ -14,7 +14,7 @@ entry:
ret void
}
-define dso_local void @UpaPNR(target("aarch64.svcount") %predcnt) {
+define void @UpaPNR(target("aarch64.svcount") %predcnt) {
entry:
; CHECK: %0:ppr = COPY $p0
; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2)
@@ -26,4 +26,18 @@ entry:
%0 = load target("aarch64.svcount"), ptr %predcnt.addr, align 2
call void asm sideeffect "ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", "@3Upa"(target("aarch64.svcount") %0)
ret void
-}
\ No newline at end of file
+}
+
+define void @UplPNR(target("aarch64.svcount") %predcnt) {
+entry:
+; CHECK: %0:ppr = COPY $p0
+; CHECK: STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2)
+; CHECK: %1:pnr_3b = COPY %0
+; CHECK: INLINEASM &"fadd z0.h, p0/m, z0.h, #0.5", 1 /* sideeffect attdialect */, 393225 /* reguse:PNR_3b */, %1
+; CHECK: RET_ReallyLR
+ %predcnt.addr = alloca target("aarch64.svcount"), align 2
+ store target("aarch64.svcount") %predcnt, ptr %predcnt.addr, align 2
+ %0 = load target("aarch64.svcount"), ptr %predcnt.addr, align 2
+ call void asm sideeffect "fadd z0.h, p0/m, z0.h, #0.5", "@3Upl"(target("aarch64.svcount") %0)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AArch64/callbr-asm-outputs-indirect-isel.ll b/llvm/test/CodeGen/AArch64/callbr-asm-outputs-indirect-isel.ll
index ed02fdfc996a433..89745f4df4cde40 100644
--- a/llvm/test/CodeGen/AArch64/callbr-asm-outputs-indirect-isel.ll
+++ b/llvm/test/CodeGen/AArch64/callbr-asm-outputs-indirect-isel.ll
@@ -18,7 +18,7 @@ define i32 @test0() {
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM_BR &"# $0", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %5, 13 /* imm */, %bb.1
+ ; CHECK-NEXT: INLINEASM_BR &"# $0", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %5, 13 /* imm */, %bb.1
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %5
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
@@ -31,7 +31,7 @@ define i32 @test0() {
; CHECK-NEXT: bb.2.direct:
; CHECK-NEXT: successors: %bb.4(0x80000000), %bb.3(0x00000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM_BR &"# $0", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %7, 13 /* imm */, %bb.3
+ ; CHECK-NEXT: INLINEASM_BR &"# $0", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %7, 13 /* imm */, %bb.3
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY %7
; CHECK-NEXT: B %bb.4
; CHECK-NEXT: {{ $}}
@@ -107,7 +107,7 @@ define i32 @dont_split1() {
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %1, 13 /* imm */, %bb.2
+ ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %1, 13 /* imm */, %bb.2
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %1
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
@@ -168,7 +168,7 @@ define i32 @dont_split3() {
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %0, 13 /* imm */, %bb.2
+ ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %0, 13 /* imm */, %bb.2
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.x:
@@ -195,7 +195,7 @@ define i32 @split_me0() {
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1
+ ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
@@ -245,7 +245,7 @@ define i32 @split_me1(i1 %z) {
; CHECK-NEXT: bb.1.w:
; CHECK-NEXT: successors: %bb.3(0x80000000), %bb.2(0x00000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %5, 13 /* imm */, %bb.2, 13 /* imm */, %bb.2
+ ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %5, 13 /* imm */, %bb.2, 13 /* imm */, %bb.2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32all = COPY %5
; CHECK-NEXT: B %bb.3
; CHECK-NEXT: {{ $}}
@@ -298,7 +298,7 @@ define i32 @split_me2(i1 %z) {
; CHECK-NEXT: bb.1.w:
; CHECK-NEXT: successors: %bb.3(0x80000000), %bb.2(0x00000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %6, 13 /* imm */, %bb.2, 13 /* imm */, %bb.2
+ ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %6, 13 /* imm */, %bb.2, 13 /* imm */, %bb.2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY %6
; CHECK-NEXT: B %bb.3
; CHECK-NEXT: {{ $}}
@@ -341,7 +341,7 @@ define i32 @dont_split4() {
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.2
+ ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.2
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3
; CHECK-NEXT: B %bb.1
; CHECK-NEXT: {{ $}}
@@ -380,7 +380,7 @@ define i32 @dont_split5() {
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1
+ ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
@@ -411,7 +411,7 @@ define i32 @split_me3() {
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1
+ ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
@@ -458,7 +458,7 @@ define i32 @dont_split6(i32 %0) {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr32all = PHI [[COPY]], %bb.0, %2, %bb.2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32common = COPY [[PHI]]
- ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3), 13 /* imm */, %bb.2
+ ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3), 13 /* imm */, %bb.2
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32all = COPY %4
; CHECK-NEXT: B %bb.3
; CHECK-NEXT: {{ $}}
@@ -493,7 +493,7 @@ define i32 @split_me4() {
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1638410 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1
+ ; CHECK-NEXT: INLINEASM_BR &"", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %3, 13 /* imm */, %bb.1
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32all = COPY %3
; CHECK-NEXT: B %bb.2
; CHECK-NEXT: {{ $}}
@@ -524,7 +524,7 @@ define i32 @...
[truncated]
|
%predcnt.addr = alloca target("aarch64.svcount"), align 2 | ||
store target("aarch64.svcount") %predcnt, ptr %predcnt.addr, align 2 | ||
%0 = load target("aarch64.svcount"), ptr %predcnt.addr, align 2 | ||
call void asm sideeffect "fadd z0.h, p0/m, z0.h, #0.5", "@3Upl"(target("aarch64.svcount") %0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you need to change 'p0/m' to '$0/m' so that it's actually using the input operand, right? That's what we seem to do in the test above.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Beautiful!
This patch adds the PNR_3b regclass for predicate-as-counter registers 0-7 and allows the Upl ASM constraint to use this register class.