diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index a8f61c4511d0f..d24795a78ed45 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3131,6 +3131,29 @@ let Predicates = [HasSVEorSME] in { def : Pat<(vector_extract (nxv2f64 ZPR:$Zs), (i64 0)), (f64 (EXTRACT_SUBREG ZPR:$Zs, dsub))>; } + + multiclass sve_predicated_add { + def : Pat<(nxv16i8 (add ZPR:$op, (extend (nxv16i1 PPR:$pred)))), + (ADD_ZPmZ_B PPR:$pred, ZPR:$op, (DUP_ZI_B value, 0))>; + def : Pat<(nxv8i16 (add ZPR:$op, (extend (nxv8i1 PPR:$pred)))), + (ADD_ZPmZ_H PPR:$pred, ZPR:$op, (DUP_ZI_H value, 0))>; + def : Pat<(nxv4i32 (add ZPR:$op, (extend (nxv4i1 PPR:$pred)))), + (ADD_ZPmZ_S PPR:$pred, ZPR:$op, (DUP_ZI_S value, 0))>; + def : Pat<(nxv2i64 (add ZPR:$op, (extend (nxv2i1 PPR:$pred)))), + (ADD_ZPmZ_D PPR:$pred, ZPR:$op, (DUP_ZI_D value, 0))>; + } + + defm : sve_predicated_add; + defm : sve_predicated_add; + + def : Pat<(nxv16i8 (sub ZPR:$op, (sext (nxv16i1 PPR:$pred)))), + (SUB_ZPmZ_B PPR:$pred, ZPR:$op, (DUP_ZI_B 255, 0))>; + def : Pat<(nxv8i16 (sub ZPR:$op, (sext (nxv8i1 PPR:$pred)))), + (SUB_ZPmZ_H PPR:$pred, ZPR:$op, (DUP_ZI_H 255, 0))>; + def : Pat<(nxv4i32 (sub ZPR:$op, (sext (nxv4i1 PPR:$pred)))), + (SUB_ZPmZ_S PPR:$pred, ZPR:$op, (DUP_ZI_S 255, 0))>; + def : Pat<(nxv2i64 (sub ZPR:$op, (sext (nxv2i1 PPR:$pred)))), + (SUB_ZPmZ_D PPR:$pred, ZPR:$op, (DUP_ZI_D 255, 0))>; } // End HasSVEorSME let Predicates = [HasSVE, HasMatMulInt8] in { diff --git a/llvm/test/CodeGen/AArch64/predicated-add-sub.ll b/llvm/test/CodeGen/AArch64/predicated-add-sub.ll index 9a778fb9650b2..884ee19fa01b5 100644 --- a/llvm/test/CodeGen/AArch64/predicated-add-sub.ll +++ b/llvm/test/CodeGen/AArch64/predicated-add-sub.ll @@ -6,8 +6,8 @@ target triple = "aarch64-unknown-linux" define @zext.add.8xi8( %a, %v) #0 { ; CHECK-LABEL: zext.add.8xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #1 // =0x1 +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %extend = zext %v to %result = add %a, %extend @@ -17,8 +17,8 @@ define @zext.add.8xi8( %a, define @zext.add.4xi16( %a, %v) #0 { ; CHECK-LABEL: zext.add.4xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #1 // =0x1 +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %extend = zext %v to %result = add %a, %extend @@ -28,8 +28,8 @@ define @zext.add.4xi16( %a, @zext.add.2xi32( %a, %v) #0 { ; CHECK-LABEL: zext.add.2xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #1 // =0x1 +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %extend = zext %v to %result = add %a, %extend @@ -39,8 +39,8 @@ define @zext.add.2xi32( %a, @zext.add.16xi8( %a, %v) #0 { ; CHECK-LABEL: zext.add.16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: mov z1.b, #1 // =0x1 +; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %extend = zext %v to %result = add %a, %extend @@ -50,8 +50,8 @@ define @zext.add.16xi8( %a, @zext.add.8xi16( %a, %v) #0 { ; CHECK-LABEL: zext.add.8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #1 // =0x1 +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %extend = zext %v to %result = add %a, %extend @@ -61,8 +61,8 @@ define @zext.add.8xi16( %a, @zext.add.4xi32( %a, %v) #0 { ; CHECK-LABEL: zext.add.4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #1 // =0x1 +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %extend = zext %v to %result = add %a, %extend @@ -72,8 +72,8 @@ define @zext.add.4xi32( %a, @zext.add.2xi64( %a, %v) #0 { ; CHECK-LABEL: zext.add.2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #1 // =0x1 +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %extend = zext %v to %result = add %a, %extend @@ -83,12 +83,11 @@ define @zext.add.2xi64( %a, @zext.add.8xi32( %a, %v) #0 { ; CHECK-LABEL: zext.add.8xi32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.s, #1 // =0x1 ; CHECK-NEXT: punpkhi p1.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: mov z2.s, p1/z, #1 // =0x1 -; CHECK-NEXT: mov z3.s, p0/z, #1 // =0x1 -; CHECK-NEXT: add z0.s, z0.s, z3.s -; CHECK-NEXT: add z1.s, z1.s, z2.s +; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s +; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s ; CHECK-NEXT: ret %extend = zext %v to %result = add %a, %extend @@ -100,18 +99,15 @@ define @zext.add.16xi32( %a, %v to %result = add %a, %extend @@ -121,8 +117,8 @@ define @zext.add.16xi32( %a, @zext.sub.8xi8( %a, %v) #0 { ; CHECK-LABEL: zext.sub.8xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %extend = zext %v to %result = sub %a, %extend @@ -132,8 +128,8 @@ define @zext.sub.8xi8( %a, define @zext.sub.4xi16( %a, %v) #0 { ; CHECK-LABEL: zext.sub.4xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %extend = zext %v to %result = sub %a, %extend @@ -143,8 +139,8 @@ define @zext.sub.4xi16( %a, @zext.sub.2xi32( %a, %v) #0 { ; CHECK-LABEL: zext.sub.2xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %extend = zext %v to %result = sub %a, %extend @@ -154,8 +150,8 @@ define @zext.sub.2xi32( %a, @zext.sub.16xi8( %a, %v) #0 { ; CHECK-LABEL: zext.sub.16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: mov z1.b, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %extend = zext %v to %result = sub %a, %extend @@ -165,8 +161,8 @@ define @zext.sub.16xi8( %a, @zext.sub.8xi16( %a, %v) #0 { ; CHECK-LABEL: zext.sub.8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %extend = zext %v to %result = sub %a, %extend @@ -176,8 +172,8 @@ define @zext.sub.8xi16( %a, @zext.sub.4xi32( %a, %v) #0 { ; CHECK-LABEL: zext.sub.4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %extend = zext %v to %result = sub %a, %extend @@ -187,8 +183,8 @@ define @zext.sub.4xi32( %a, @zext.sub.2xi64( %a, %v) #0 { ; CHECK-LABEL: zext.sub.2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %extend = zext %v to %result = sub %a, %extend @@ -198,12 +194,11 @@ define @zext.sub.2xi64( %a, @zext.sub.8xi32( %a, %v) #0 { ; CHECK-LABEL: zext.sub.8xi32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: punpklo p1.h, p0.b ; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z2.s -; CHECK-NEXT: add z1.s, z1.s, z3.s +; CHECK-NEXT: add z0.s, p1/m, z0.s, z2.s +; CHECK-NEXT: add z1.s, p0/m, z1.s, z2.s ; CHECK-NEXT: ret %extend = zext %v to %result = sub %a, %extend @@ -214,19 +209,16 @@ define @zext.sub.16xi32( %a, %v to %result = sub %a, %extend @@ -236,8 +228,8 @@ define @zext.sub.16xi32( %a, @sext.add.8xi8( %a, %v) #0 { ; CHECK-LABEL: sext.add.8xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %extend = sext %v to %result = add %a, %extend @@ -247,8 +239,8 @@ define @sext.add.8xi8( %a, define @sext.add.4xi16( %a, %v) #0 { ; CHECK-LABEL: sext.add.4xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %extend = sext %v to %result = add %a, %extend @@ -258,8 +250,8 @@ define @sext.add.4xi16( %a, @sext.add.2xi32( %a, %v) #0 { ; CHECK-LABEL: sext.add.2xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %extend = sext %v to %result = add %a, %extend @@ -269,8 +261,8 @@ define @sext.add.2xi32( %a, @sext.add.16xi8( %a, %v) #0 { ; CHECK-LABEL: sext.add.16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: mov z1.b, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %extend = sext %v to %result = add %a, %extend @@ -280,8 +272,8 @@ define @sext.add.16xi8( %a, @sext.add.8xi16( %a, %v) #0 { ; CHECK-LABEL: sext.add.8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %extend = sext %v to %result = add %a, %extend @@ -291,8 +283,8 @@ define @sext.add.8xi16( %a, @sext.add.4xi32( %a, %v) #0 { ; CHECK-LABEL: sext.add.4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %extend = sext %v to %result = add %a, %extend @@ -302,8 +294,8 @@ define @sext.add.4xi32( %a, @sext.add.2xi64( %a, %v) #0 { ; CHECK-LABEL: sext.add.2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %extend = sext %v to %result = add %a, %extend @@ -313,12 +305,11 @@ define @sext.add.2xi64( %a, @sext.add.8xi32( %a, %v) #0 { ; CHECK-LABEL: sext.add.8xi32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: punpkhi p1.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z0.s, z0.s, z3.s -; CHECK-NEXT: add z1.s, z1.s, z2.s +; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s +; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s ; CHECK-NEXT: ret %extend = sext %v to %result = add %a, %extend @@ -330,18 +321,15 @@ define @sext.add.16xi32( %a, %v to %result = add %a, %extend @@ -351,8 +339,8 @@ define @sext.add.16xi32( %a, @sext.sub.8xi8( %a, %v) #0 { ; CHECK-LABEL: sext.sub.8xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %extend = sext %v to %result = sub %a, %extend @@ -362,8 +350,8 @@ define @sext.sub.8xi8( %a, define @sext.sub.4xi16( %a, %v) #0 { ; CHECK-LABEL: sext.sub.4xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %extend = sext %v to %result = sub %a, %extend @@ -373,8 +361,8 @@ define @sext.sub.4xi16( %a, @sext.sub.2xi32( %a, %v) #0 { ; CHECK-LABEL: sext.sub.2xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %extend = sext %v to %result = sub %a, %extend @@ -384,8 +372,8 @@ define @sext.sub.2xi32( %a, @sext.sub.16xi8( %a, %v) #0 { ; CHECK-LABEL: sext.sub.16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.b, z0.b, z1.b +; CHECK-NEXT: mov z1.b, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret %extend = sext %v to %result = sub %a, %extend @@ -395,8 +383,8 @@ define @sext.sub.16xi8( %a, @sext.sub.8xi16( %a, %v) #0 { ; CHECK-LABEL: sext.sub.8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: mov z1.h, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %extend = sext %v to %result = sub %a, %extend @@ -406,8 +394,8 @@ define @sext.sub.8xi16( %a, @sext.sub.4xi32( %a, %v) #0 { ; CHECK-LABEL: sext.sub.4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %extend = sext %v to %result = sub %a, %extend @@ -417,8 +405,8 @@ define @sext.sub.4xi32( %a, @sext.sub.2xi64( %a, %v) #0 { ; CHECK-LABEL: sext.sub.2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %extend = sext %v to %result = sub %a, %extend @@ -428,12 +416,11 @@ define @sext.sub.2xi64( %a, @sext.sub.8xi32( %a, %v) #0 { ; CHECK-LABEL: sext.sub.8xi32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: punpkhi p1.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: sub z0.s, z0.s, z3.s -; CHECK-NEXT: sub z1.s, z1.s, z2.s +; CHECK-NEXT: sub z1.s, p1/m, z1.s, z2.s +; CHECK-NEXT: sub z0.s, p0/m, z0.s, z2.s ; CHECK-NEXT: ret %extend = sext %v to %result = sub %a, %extend @@ -445,18 +432,15 @@ define @sext.sub.16xi32( %a, %v to %result = sub %a, %extend