[LLVM][SVE][CodeGen] Fix incorrect isel for signed saturating instructions. #88136

paulwalker-arm · 2024-04-09T15:10:43Z

The immediate forms of SQADD and SQSUB interpret their immediate
operand as unsigned and thus effectively only support positive
immediate operands.

The original code is only wrong for the i8 cases because they
previously accepted all values, however, the new patterns enable more
uses and this I've added tests for the larger element types as well.

…tions. The immediate forms of SQADD and SQSUB interpret their immediate operand as unsigned and thus effectively only support positive immediate operands. The original code is only wrong for the i8 cases because they previously accepted all values, however, I've also added tests for the larger element types just in case.

llvmbot · 2024-04-09T15:11:19Z

@llvm/pr-subscribers-backend-aarch64

Author: Paul Walker (paulwalker-arm)

Changes

The immediate forms of SQADD and SQSUB interpret their immediate
operand as unsigned and thus effectively only support positive
immediate operands.

The original code is only wrong for the i8 cases because they
previously accepted all values, however, I've also added tests for
the larger element types just in case.

Full diff: https://github.com/llvm/llvm-project/pull/88136.diff

4 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp (+50)
(modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+2-2)
(modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+17)
(modified) llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll (+112)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 51bec3604026b0..7ef0bb9230aac5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -258,6 +258,11 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
     return SelectSVEAddSubImm(N, VT, Imm, Shift);
   }
 
+  template <MVT::SimpleValueType VT>
+  bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
+    return SelectSVEAddSubSSatImm(N, VT, Imm, Shift);
+  }
+
   template <MVT::SimpleValueType VT>
   bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
     return SelectSVECpyDupImm(N, VT, Imm, Shift);
@@ -484,6 +489,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
   bool SelectCMP_SWAP(SDNode *N);
 
   bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
+  bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
   bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
   bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
 
@@ -4014,6 +4020,50 @@ bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
   return false;
 }
 
+bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
+                                                 SDValue &Imm, SDValue &Shift) {
+  if (!isa<ConstantSDNode>(N))
+    return false;
+
+  SDLoc DL(N);
+  int64_t Val = cast<ConstantSDNode>(N)
+                    ->getAPIntValue()
+                    .trunc(VT.getFixedSizeInBits())
+                    .getSExtValue();
+
+  // Signed saturating instructions treat their immediate operand as unsigned.
+  if (Val < 0)
+    return false;
+
+  switch (VT.SimpleTy) {
+  case MVT::i8:
+    // All positive immediates are supported.
+    Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
+    Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
+    return true;
+  case MVT::i16:
+  case MVT::i32:
+  case MVT::i64:
+    // Support 8bit positive immediates.
+    if (Val <= 255) {
+      Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
+      Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
+      return true;
+    }
+    // Support 16bit positive immediates that are a multiple of 256.
+    if (Val <= 65280 && Val % 256 == 0) {
+      Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
+      Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
+      return true;
+    }
+    break;
+  default:
+    break;
+  }
+
+  return false;
+}
+
 bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
                                              SDValue &Shift) {
   if (!isa<ConstantSDNode>(N))
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a519d81362a73a..b2ced36caa9733 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -554,9 +554,9 @@ let Predicates = [HasSVEorSME] in {
   defm ADD_ZI   : sve_int_arith_imm0<0b000, "add", add>;
   defm SUB_ZI   : sve_int_arith_imm0<0b001, "sub", sub>;
   defm SUBR_ZI  : sve_int_arith_imm0<0b011, "subr", AArch64subr>;
-  defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd", saddsat>;
+  defm SQADD_ZI : sve_int_arith_imm0_ssat<0b100, "sqadd", saddsat>;
   defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>;
-  defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat>;
+  defm SQSUB_ZI : sve_int_arith_imm0_ssat<0b110, "sqsub", ssubsat>;
   defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>;
 
   defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", AArch64mad_m1, "MLA_ZPmZZ", /*isReverseInstr*/ 1>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index ee8292fdd8839a..1daea6f7add1b9 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -249,6 +249,11 @@ def SVEAddSubImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i16>", [
 def SVEAddSubImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubImm<MVT::i32>", []>;
 def SVEAddSubImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubImm<MVT::i64>", []>;
 
+def SVEAddSubSSatImm8Pat  : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i8>", []>;
+def SVEAddSubSSatImm16Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i16>", []>;
+def SVEAddSubSSatImm32Pat : ComplexPattern<i32, 2, "SelectSVEAddSubSSatImm<MVT::i32>", []>;
+def SVEAddSubSSatImm64Pat : ComplexPattern<i64, 2, "SelectSVEAddSubSSatImm<MVT::i64>", []>;
+
 def SVECpyDupImm8Pat  : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i8>", []>;
 def SVECpyDupImm16Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i16>", []>;
 def SVECpyDupImm32Pat : ComplexPattern<i32, 2, "SelectSVECpyDupImm<MVT::i32>", []>;
@@ -4775,6 +4780,18 @@ multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
   def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubImm64Pat, !cast<Instruction>(NAME # _D)>;
 }
 
+multiclass sve_int_arith_imm0_ssat<bits<3> opc, string asm, SDPatternOperator op> {
+  def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8,  addsub_imm8_opt_lsl_i8>;
+  def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>;
+  def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>;
+  def _D : sve_int_arith_imm0<0b11, opc, asm, ZPR64, addsub_imm8_opt_lsl_i64>;
+
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv16i8, op, ZPR8,  i32, SVEAddSubSSatImm8Pat,  !cast<Instruction>(NAME # _B)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv8i16, op, ZPR16, i32, SVEAddSubSSatImm16Pat, !cast<Instruction>(NAME # _H)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv4i32, op, ZPR32, i32, SVEAddSubSSatImm32Pat, !cast<Instruction>(NAME # _S)>;
+  def : SVE_1_Op_Imm_OptLsl_Pat<nxv2i64, op, ZPR64, i64, SVEAddSubSSatImm64Pat, !cast<Instruction>(NAME # _D)>;
+}
+
 class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
                         ZPRRegOp zprty, Operand immtype>
 : I<(outs zprty:$Zdn), (ins zprty:$_Zdn, immtype:$imm),
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
index c70006d988c19b..0813aa1d50df16 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
@@ -1059,6 +1059,20 @@ define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) {
   ret <vscale x 16 x i8> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 16 x i8> @sqadd_b_negimm(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sqadd_b_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.b, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqadd z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0
+  %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %splat)
+  ret <vscale x 16 x i8> %out
+}
+
 define <vscale x 8 x i16> @sqadd_h_lowimm(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqadd_h_lowimm:
 ; CHECK:       // %bb.0:
@@ -1083,6 +1097,20 @@ define <vscale x 8 x i16> @sqadd_h_highimm(<vscale x 8 x i16> %a) {
   ret <vscale x 8 x i16> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 8 x i16> @sqadd_h_negimm(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqadd_h_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqadd z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 8 x i16> undef, i16 -1, i32 0
+  %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %out
+}
+
 define <vscale x 4 x i32> @sqadd_s_lowimm(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqadd_s_lowimm:
 ; CHECK:       // %bb.0:
@@ -1107,6 +1135,20 @@ define <vscale x 4 x i32> @sqadd_s_highimm(<vscale x 4 x i32> %a) {
   ret <vscale x 4 x i32> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 4 x i32> @sqadd_s_negimm(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqadd_s_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqadd z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 4 x i32> undef, i32 -1, i32 0
+  %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %splat)
+  ret <vscale x 4 x i32> %out
+}
+
 define <vscale x 2 x i64> @sqadd_d_lowimm(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqadd_d_lowimm:
 ; CHECK:       // %bb.0:
@@ -1131,6 +1173,20 @@ define <vscale x 2 x i64> @sqadd_d_highimm(<vscale x 2 x i64> %a) {
   ret <vscale x 2 x i64> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 2 x i64> @sqadd_d_negimm(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqadd_d_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqadd z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 2 x i64> undef, i64 -1, i32 0
+  %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %splat)
+  ret <vscale x 2 x i64> %out
+}
+
 ; SQSUB
 
 define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
@@ -1145,6 +1201,20 @@ define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) {
   ret <vscale x 16 x i8> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 16 x i8> @sqsub_b_negimm(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: sqsub_b_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.b, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqsub z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0
+  %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8> %a,
+                                                                   <vscale x 16 x i8> %splat)
+  ret <vscale x 16 x i8> %out
+}
+
 define <vscale x 8 x i16> @sqsub_h_lowimm(<vscale x 8 x i16> %a) {
 ; CHECK-LABEL: sqsub_h_lowimm:
 ; CHECK:       // %bb.0:
@@ -1169,6 +1239,20 @@ define <vscale x 8 x i16> @sqsub_h_highimm(<vscale x 8 x i16> %a) {
   ret <vscale x 8 x i16> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 8 x i16> @sqsub_h_negimm(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqsub_h_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.h, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqsub z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 8 x i16> undef, i16 -1, i32 0
+  %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a,
+                                                                   <vscale x 8 x i16> %splat)
+  ret <vscale x 8 x i16> %out
+}
+
 define <vscale x 4 x i32> @sqsub_s_lowimm(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: sqsub_s_lowimm:
 ; CHECK:       // %bb.0:
@@ -1193,6 +1277,20 @@ define <vscale x 4 x i32> @sqsub_s_highimm(<vscale x 4 x i32> %a) {
   ret <vscale x 4 x i32> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 4 x i32> @sqsub_s_negimm(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqsub_s_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.s, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqsub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 4 x i32> undef, i32 -1, i32 0
+  %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a,
+                                                                   <vscale x 4 x i32> %splat)
+  ret <vscale x 4 x i32> %out
+}
+
 define <vscale x 2 x i64> @sqsub_d_lowimm(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: sqsub_d_lowimm:
 ; CHECK:       // %bb.0:
@@ -1217,6 +1315,20 @@ define <vscale x 2 x i64> @sqsub_d_highimm(<vscale x 2 x i64> %a) {
   ret <vscale x 2 x i64> %out
 }
 
+; Immediate instruction form only supports positive values.
+define <vscale x 2 x i64> @sqsub_d_negimm(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqsub_d_negimm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z1.d, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqsub z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 2 x i64> undef, i64 -1, i32 0
+  %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a,
+                                                                   <vscale x 2 x i64> %splat)
+  ret <vscale x 2 x i64> %out
+}
+
 ; UQADD
 
 define <vscale x 16 x i8> @uqadd_b_lowimm(<vscale x 16 x i8> %a) {

kmclaughlin-arm · 2024-04-10T16:26:22Z

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

+  if (Val < 0)
+    return false;
+
+  switch (VT.SimpleTy) {


This switch looks the same as the one in SelectSVEAddSubImm above, would it be possible to use the same function for the signed instructions and pass an extra Signed flag for checking Val < 0?

I thought the same but whilst the switch is the same, the type of Val is different. For SelectSVEAddSubImm it is unsigned but here it is signed. The new function is essentially the top half of SelectSVECpyDupImm and the bottom half of SelectSVEAddSubImm with a new bit in the middle.

david-arm · 2024-04-11T06:56:39Z

llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll

+; CHECK-NEXT:    mov z1.b, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    sqadd z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %elt = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0


Perhaps I'm missing something, but the encoding of SQADD permits a 8-bit immediate that is treated as unsigned, i.e. all values in the range [0,255]. The documentation says:

The immediate is an unsigned value in the range 0 to 255

So I don't see why we can't lower a splat of immediates <i8 0xFF, i8 0xFF, ...> (i.e. a splat of the value 255) to the immediate form of sqadd?

Splat is not a signed operation and thus is just copies the least significant 8-bits across all lanes. SQADD is a signed operation and thus it interprets 0xFF as -1. Whereas the immediate form treats its immediate operand as unsigned and thus 0xFF is treated as 255. For a normal add this would not matter because the 8-bit result would be the same. However for saturating adds the first (correct) interpretation will result in $reg - 1 saturating at -128 whereas the latter will always incorrectly result in $reg + 255 ==> 127 because that is the maximum positive value you can represent in 8-bits.

Thinking about it a little more we should be able to invert negative constants along with the operation (i.e. "sqadd A, #-1" -> "sqsub A, #1". The question is whether that is something I should do within this patch? Or whether we're happy to keep the bug fix separate?

davemgreen · 2024-04-11T07:30:39Z

This certainly sounds like one of those problems that is easy to get wrong. It sounds like the generation of the intrinsics in clang might need to change too?

paulwalker-arm · 2024-04-11T09:46:59Z

This certainly sounds like one of those problems that is easy to get wrong. It sounds like the generation of the intrinsics in clang might need to change too?

Please can you elaborate if I've misunderstood? We don't typically expose immediate forms to Clang unless that's the only instruction format available, which is not the case for sqadd/sqsub. We leave it as a code generation optimisation to use them when appropriate.

paulwalker-arm · 2024-04-11T12:11:28Z

Given it involved editing largely the same code I've included the changes to ensure optimal code generation. Please let me know if you prefer the bug fix to land separately.

david-arm · 2024-04-11T13:31:27Z

llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

+                    .trunc(VT.getFixedSizeInBits())
+                    .getSExtValue();
+
+  // Signed saturating instructions treat their immediate operand as unsigned.


Is it worth expanding this comment a little to say something like

// Signed saturating instructions treat their immediate operand as unsigned,
// whereas the intrinsics are defined to work on signed operands.

david-arm

LGTM!

davemgreen · 2024-04-12T07:21:18Z

This certainly sounds like one of those problems that is easy to get wrong. It sounds like the generation of the intrinsics in clang might need to change too?

Please can you elaborate if I've misunderstood? We don't typically expose immediate forms to Clang unless that's the only instruction format available, which is not the case for sqadd/sqsub. We leave it as a code generation optimisation to use them when appropriate.

Hi. It was sqadd_n_s8 that I was thinking of, but looking again it looks like the argument is signed, not unsigned. There are some details in the documentation for the intrinsic about it producing sqadd or sqsub depending on the range of the value, so if that is now happening this sounds good to me!

…tions. (llvm#88136) The immediate forms of SQADD and SQSUB interpret their immediate operand as unsigned and thus effectively only support positive immediate operands. The original code is only wrong for the i8 cases because they previously accepted all values, however, the new patterns enable more uses and this I've added tests for the larger element types as well.

paulwalker-arm added 2 commits April 9, 2024 14:55

Add tests to highlight signed saturating add/sub bug.

284e314

llvmbot added the backend:AArch64 label Apr 9, 2024

paulwalker-arm requested review from kmclaughlin-arm and david-arm April 9, 2024 15:31

kmclaughlin-arm reviewed Apr 10, 2024

View reviewed changes

david-arm reviewed Apr 11, 2024

View reviewed changes

Add patterns to negate unsupported immediate and invert the operation.

c2bae3e

david-arm reviewed Apr 11, 2024

View reviewed changes

Improve comment as to why negative values are rejected.

f756d7a

david-arm approved these changes Apr 11, 2024

View reviewed changes

paulwalker-arm merged commit d48ebac into llvm:main Apr 15, 2024
4 checks passed

paulwalker-arm deleted the sve-signed-sat-bug-fix branch April 15, 2024 11:24

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[LLVM][SVE][CodeGen] Fix incorrect isel for signed saturating instructions. #88136

[LLVM][SVE][CodeGen] Fix incorrect isel for signed saturating instructions. #88136

paulwalker-arm commented Apr 9, 2024 •

edited

llvmbot commented Apr 9, 2024

kmclaughlin-arm Apr 10, 2024

paulwalker-arm Apr 10, 2024 •

edited

david-arm Apr 11, 2024

paulwalker-arm Apr 11, 2024 •

edited

paulwalker-arm Apr 11, 2024

davemgreen commented Apr 11, 2024

paulwalker-arm commented Apr 11, 2024 •

edited

paulwalker-arm commented Apr 11, 2024

david-arm Apr 11, 2024

paulwalker-arm Apr 11, 2024

david-arm left a comment

davemgreen commented Apr 12, 2024

[LLVM][SVE][CodeGen] Fix incorrect isel for signed saturating instructions. #88136

[LLVM][SVE][CodeGen] Fix incorrect isel for signed saturating instructions. #88136

Conversation

paulwalker-arm commented Apr 9, 2024 • edited

llvmbot commented Apr 9, 2024

kmclaughlin-arm Apr 10, 2024

Choose a reason for hiding this comment

paulwalker-arm Apr 10, 2024 • edited

Choose a reason for hiding this comment

david-arm Apr 11, 2024

Choose a reason for hiding this comment

paulwalker-arm Apr 11, 2024 • edited

Choose a reason for hiding this comment

paulwalker-arm Apr 11, 2024

Choose a reason for hiding this comment

davemgreen commented Apr 11, 2024

paulwalker-arm commented Apr 11, 2024 • edited

paulwalker-arm commented Apr 11, 2024

david-arm Apr 11, 2024

Choose a reason for hiding this comment

paulwalker-arm Apr 11, 2024

Choose a reason for hiding this comment

david-arm left a comment

Choose a reason for hiding this comment

davemgreen commented Apr 12, 2024

paulwalker-arm commented Apr 9, 2024 •

edited

paulwalker-arm Apr 10, 2024 •

edited

paulwalker-arm Apr 11, 2024 •

edited

paulwalker-arm commented Apr 11, 2024 •

edited