Skip to content

Commit

Permalink
Revert 374373: [Codegen] Alter the default promotion for saturating a…
Browse files Browse the repository at this point in the history
…dds and subs

This commit is not extending the promoted integers as it should. Reverting
whilst I look into the details.

llvm-svn: 374592
  • Loading branch information
davemgreen committed Oct 11, 2019
1 parent f30ae71 commit 7c30af8
Show file tree
Hide file tree
Showing 17 changed files with 769 additions and 483 deletions.
92 changes: 31 additions & 61 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
Expand Up @@ -642,78 +642,48 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
}

SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
// If the promoted type is legal, we can convert this to:
// 1. ANY_EXTEND iN to iM
// 2. SHL by M-N
// 3. [US][ADD|SUB]SAT
// 4. L/ASHR by M-N
// Else it is more efficient to convert this to a min and a max
// operation in the higher precision arithmetic.
// For promoting iN -> iM, this can be expanded by
// 1. ANY_EXTEND iN to iM
// 2. SHL by M-N
// 3. [US][ADD|SUB]SAT
// 4. L/ASHR by M-N
SDLoc dl(N);
SDValue Op1 = N->getOperand(0);
SDValue Op2 = N->getOperand(1);
unsigned OldBits = Op1.getScalarValueSizeInBits();

unsigned Opcode = N->getOpcode();
unsigned ShiftOp;
switch (Opcode) {
case ISD::SADDSAT:
case ISD::SSUBSAT:
ShiftOp = ISD::SRA;
break;
case ISD::UADDSAT:
case ISD::USUBSAT:
ShiftOp = ISD::SRL;
break;
default:
llvm_unreachable("Expected opcode to be signed or unsigned saturation "
"addition or subtraction");
}

SDValue Op1Promoted = GetPromotedInteger(Op1);
SDValue Op2Promoted = GetPromotedInteger(Op2);

EVT PromotedType = Op1Promoted.getValueType();
unsigned NewBits = PromotedType.getScalarSizeInBits();

if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
unsigned ShiftOp;
switch (Opcode) {
case ISD::SADDSAT:
case ISD::SSUBSAT:
ShiftOp = ISD::SRA;
break;
case ISD::UADDSAT:
case ISD::USUBSAT:
ShiftOp = ISD::SRL;
break;
default:
llvm_unreachable("Expected opcode to be signed or unsigned saturation "
"addition or subtraction");
}

unsigned SHLAmount = NewBits - OldBits;
EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
Op1Promoted =
DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
Op2Promoted =
DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);

SDValue Result =
DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
} else {
if (Opcode == ISD::USUBSAT) {
SDValue Max =
DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted);
return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted);
}

if (Opcode == ISD::UADDSAT) {
APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
SDValue Add =
DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
}

unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits);
APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits);
SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);
SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
SDValue Result =
DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
return Result;
}
unsigned SHLAmount = NewBits - OldBits;
EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
Op1Promoted =
DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
Op2Promoted =
DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);

SDValue Result =
DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
}

SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
Expand Down
45 changes: 24 additions & 21 deletions llvm/test/CodeGen/AArch64/sadd_sat.ll
Expand Up @@ -39,13 +39,14 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
define i16 @func16(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: func16:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w0, w1
; CHECK-NEXT: mov w9, #32767
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: csel w8, w8, w9, lt
; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768
; CHECK-NEXT: mov w9, #-32768
; CHECK-NEXT: csel w0, w8, w9, gt
; CHECK-NEXT: lsl w8, w0, #16
; CHECK-NEXT: adds w10, w8, w1, lsl #16
; CHECK-NEXT: mov w9, #2147483647
; CHECK-NEXT: cmp w10, #0 // =0
; CHECK-NEXT: cinv w9, w9, ge
; CHECK-NEXT: adds w8, w8, w1, lsl #16
; CHECK-NEXT: csel w8, w9, w8, vs
; CHECK-NEXT: asr w0, w8, #16
; CHECK-NEXT: ret
%tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %y);
ret i16 %tmp;
Expand All @@ -54,13 +55,14 @@ define i16 @func16(i16 %x, i16 %y) nounwind {
define i8 @func8(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: func8:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w0, w1
; CHECK-NEXT: mov w9, #127
; CHECK-NEXT: cmp w8, #127 // =127
; CHECK-NEXT: csel w8, w8, w9, lt
; CHECK-NEXT: cmn w8, #128 // =128
; CHECK-NEXT: mov w9, #-128
; CHECK-NEXT: csel w0, w8, w9, gt
; CHECK-NEXT: lsl w8, w0, #24
; CHECK-NEXT: adds w10, w8, w1, lsl #24
; CHECK-NEXT: mov w9, #2147483647
; CHECK-NEXT: cmp w10, #0 // =0
; CHECK-NEXT: cinv w9, w9, ge
; CHECK-NEXT: adds w8, w8, w1, lsl #24
; CHECK-NEXT: csel w8, w9, w8, vs
; CHECK-NEXT: asr w0, w8, #24
; CHECK-NEXT: ret
%tmp = call i8 @llvm.sadd.sat.i8(i8 %x, i8 %y);
ret i8 %tmp;
Expand All @@ -69,13 +71,14 @@ define i8 @func8(i8 %x, i8 %y) nounwind {
define i4 @func3(i4 %x, i4 %y) nounwind {
; CHECK-LABEL: func3:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w0, w1
; CHECK-NEXT: mov w9, #7
; CHECK-NEXT: cmp w8, #7 // =7
; CHECK-NEXT: csel w8, w8, w9, lt
; CHECK-NEXT: cmn w8, #8 // =8
; CHECK-NEXT: mov w9, #-8
; CHECK-NEXT: csel w0, w8, w9, gt
; CHECK-NEXT: lsl w8, w0, #28
; CHECK-NEXT: adds w10, w8, w1, lsl #28
; CHECK-NEXT: mov w9, #2147483647
; CHECK-NEXT: cmp w10, #0 // =0
; CHECK-NEXT: cinv w9, w9, ge
; CHECK-NEXT: adds w8, w8, w1, lsl #28
; CHECK-NEXT: csel w8, w9, w8, vs
; CHECK-NEXT: asr w0, w8, #28
; CHECK-NEXT: ret
%tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y);
ret i4 %tmp;
Expand Down
109 changes: 72 additions & 37 deletions llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
Expand Up @@ -236,23 +236,30 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
; CHECK-NEXT: ldrb w9, [x1]
; CHECK-NEXT: ldrb w10, [x0, #1]
; CHECK-NEXT: ldrb w11, [x1, #1]
; CHECK-NEXT: ldrb w12, [x0, #2]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: ldrb w8, [x1, #2]
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: ldrb w8, [x0, #2]
; CHECK-NEXT: ldrb w9, [x1, #2]
; CHECK-NEXT: mov v0.h[1], w10
; CHECK-NEXT: ldrb w9, [x0, #3]
; CHECK-NEXT: ldrb w10, [x1, #3]
; CHECK-NEXT: mov v1.h[1], w11
; CHECK-NEXT: ldrb w10, [x0, #3]
; CHECK-NEXT: ldrb w11, [x1, #3]
; CHECK-NEXT: mov v0.h[2], w8
; CHECK-NEXT: mov v1.h[2], w9
; CHECK-NEXT: mov v0.h[3], w10
; CHECK-NEXT: mov v1.h[3], w11
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: movi v1.4h, #127
; CHECK-NEXT: smin v0.4h, v0.4h, v1.4h
; CHECK-NEXT: mvni v1.4h, #127
; CHECK-NEXT: smax v0.4h, v0.4h, v1.4h
; CHECK-NEXT: mov v0.h[2], w12
; CHECK-NEXT: mov v1.h[2], w8
; CHECK-NEXT: mov v0.h[3], w9
; CHECK-NEXT: mov v1.h[3], w10
; CHECK-NEXT: shl v1.4h, v1.4h, #8
; CHECK-NEXT: shl v0.4h, v0.4h, #8
; CHECK-NEXT: add v3.4h, v0.4h, v1.4h
; CHECK-NEXT: cmlt v4.4h, v3.4h, #0
; CHECK-NEXT: mvni v2.4h, #128, lsl #8
; CHECK-NEXT: cmlt v1.4h, v1.4h, #0
; CHECK-NEXT: cmgt v0.4h, v0.4h, v3.4h
; CHECK-NEXT: mvn v5.8b, v4.8b
; CHECK-NEXT: bsl v2.8b, v4.8b, v5.8b
; CHECK-NEXT: eor v0.8b, v1.8b, v0.8b
; CHECK-NEXT: bsl v0.8b, v2.8b, v3.8b
; CHECK-NEXT: sshr v0.4h, v0.4h, #8
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: str s0, [x2]
; CHECK-NEXT: ret
Expand All @@ -271,14 +278,21 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind {
; CHECK-NEXT: ldrb w10, [x0, #1]
; CHECK-NEXT: ldrb w11, [x1, #1]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: fmov s2, w9
; CHECK-NEXT: mov v0.s[1], w10
; CHECK-NEXT: mov v1.s[1], w11
; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-NEXT: movi v1.2s, #127
; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mvni v1.2s, #127
; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mov v2.s[1], w11
; CHECK-NEXT: shl v2.2s, v2.2s, #24
; CHECK-NEXT: shl v0.2s, v0.2s, #24
; CHECK-NEXT: add v3.2s, v0.2s, v2.2s
; CHECK-NEXT: cmlt v4.2s, v3.2s, #0
; CHECK-NEXT: mvni v1.2s, #128, lsl #24
; CHECK-NEXT: cmlt v2.2s, v2.2s, #0
; CHECK-NEXT: cmgt v0.2s, v0.2s, v3.2s
; CHECK-NEXT: mvn v5.8b, v4.8b
; CHECK-NEXT: eor v0.8b, v2.8b, v0.8b
; CHECK-NEXT: bsl v1.8b, v4.8b, v5.8b
; CHECK-NEXT: bsl v0.8b, v1.8b, v3.8b
; CHECK-NEXT: ushr v0.2s, v0.2s, #24
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: strb w8, [x2, #1]
Expand Down Expand Up @@ -322,14 +336,21 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind {
; CHECK-NEXT: ldrh w10, [x0, #2]
; CHECK-NEXT: ldrh w11, [x1, #2]
; CHECK-NEXT: fmov s0, w8
; CHECK-NEXT: fmov s1, w9
; CHECK-NEXT: fmov s2, w9
; CHECK-NEXT: mov v0.s[1], w10
; CHECK-NEXT: mov v1.s[1], w11
; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
; CHECK-NEXT: movi v1.2s, #127, msl #8
; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mvni v1.2s, #127, msl #8
; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s
; CHECK-NEXT: mov v2.s[1], w11
; CHECK-NEXT: shl v2.2s, v2.2s, #16
; CHECK-NEXT: shl v0.2s, v0.2s, #16
; CHECK-NEXT: add v3.2s, v0.2s, v2.2s
; CHECK-NEXT: cmlt v4.2s, v3.2s, #0
; CHECK-NEXT: mvni v1.2s, #128, lsl #24
; CHECK-NEXT: cmlt v2.2s, v2.2s, #0
; CHECK-NEXT: cmgt v0.2s, v0.2s, v3.2s
; CHECK-NEXT: mvn v5.8b, v4.8b
; CHECK-NEXT: eor v0.8b, v2.8b, v0.8b
; CHECK-NEXT: bsl v1.8b, v4.8b, v5.8b
; CHECK-NEXT: bsl v0.8b, v1.8b, v3.8b
; CHECK-NEXT: ushr v0.2s, v0.2s, #16
; CHECK-NEXT: mov w8, v0.s[1]
; CHECK-NEXT: fmov w9, s0
; CHECK-NEXT: strh w8, [x2, #2]
Expand Down Expand Up @@ -441,11 +462,18 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
; CHECK-LABEL: v16i4:
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-NEXT: movi v1.16b, #7
; CHECK-NEXT: smin v0.16b, v0.16b, v1.16b
; CHECK-NEXT: movi v1.16b, #248
; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b
; CHECK-NEXT: shl v1.16b, v1.16b, #4
; CHECK-NEXT: shl v0.16b, v0.16b, #4
; CHECK-NEXT: add v3.16b, v0.16b, v1.16b
; CHECK-NEXT: cmlt v4.16b, v3.16b, #0
; CHECK-NEXT: movi v2.16b, #127
; CHECK-NEXT: cmlt v1.16b, v1.16b, #0
; CHECK-NEXT: cmgt v0.16b, v0.16b, v3.16b
; CHECK-NEXT: mvn v5.16b, v4.16b
; CHECK-NEXT: bsl v2.16b, v4.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-NEXT: sshr v0.16b, v0.16b, #4
; CHECK-NEXT: ret
%z = call <16 x i4> @llvm.sadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y)
ret <16 x i4> %z
Expand All @@ -454,11 +482,18 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; CHECK-LABEL: v16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: add v0.16b, v0.16b, v1.16b
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: smin v0.16b, v0.16b, v1.16b
; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
; CHECK-NEXT: smax v0.16b, v0.16b, v1.16b
; CHECK-NEXT: shl v1.16b, v1.16b, #7
; CHECK-NEXT: shl v0.16b, v0.16b, #7
; CHECK-NEXT: add v3.16b, v0.16b, v1.16b
; CHECK-NEXT: cmlt v4.16b, v3.16b, #0
; CHECK-NEXT: movi v2.16b, #127
; CHECK-NEXT: cmlt v1.16b, v1.16b, #0
; CHECK-NEXT: cmgt v0.16b, v0.16b, v3.16b
; CHECK-NEXT: mvn v5.16b, v4.16b
; CHECK-NEXT: bsl v2.16b, v4.16b, v5.16b
; CHECK-NEXT: eor v0.16b, v1.16b, v0.16b
; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b
; CHECK-NEXT: sshr v0.16b, v0.16b, #7
; CHECK-NEXT: ret
%z = call <16 x i1> @llvm.sadd.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
Expand Down
45 changes: 24 additions & 21 deletions llvm/test/CodeGen/AArch64/ssub_sat.ll
Expand Up @@ -39,13 +39,14 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
define i16 @func16(i16 %x, i16 %y) nounwind {
; CHECK-LABEL: func16:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w0, w1
; CHECK-NEXT: mov w9, #32767
; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: csel w8, w8, w9, lt
; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768
; CHECK-NEXT: mov w9, #-32768
; CHECK-NEXT: csel w0, w8, w9, gt
; CHECK-NEXT: lsl w8, w0, #16
; CHECK-NEXT: subs w10, w8, w1, lsl #16
; CHECK-NEXT: mov w9, #2147483647
; CHECK-NEXT: cmp w10, #0 // =0
; CHECK-NEXT: cinv w9, w9, ge
; CHECK-NEXT: subs w8, w8, w1, lsl #16
; CHECK-NEXT: csel w8, w9, w8, vs
; CHECK-NEXT: asr w0, w8, #16
; CHECK-NEXT: ret
%tmp = call i16 @llvm.ssub.sat.i16(i16 %x, i16 %y);
ret i16 %tmp;
Expand All @@ -54,13 +55,14 @@ define i16 @func16(i16 %x, i16 %y) nounwind {
define i8 @func8(i8 %x, i8 %y) nounwind {
; CHECK-LABEL: func8:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w0, w1
; CHECK-NEXT: mov w9, #127
; CHECK-NEXT: cmp w8, #127 // =127
; CHECK-NEXT: csel w8, w8, w9, lt
; CHECK-NEXT: cmn w8, #128 // =128
; CHECK-NEXT: mov w9, #-128
; CHECK-NEXT: csel w0, w8, w9, gt
; CHECK-NEXT: lsl w8, w0, #24
; CHECK-NEXT: subs w10, w8, w1, lsl #24
; CHECK-NEXT: mov w9, #2147483647
; CHECK-NEXT: cmp w10, #0 // =0
; CHECK-NEXT: cinv w9, w9, ge
; CHECK-NEXT: subs w8, w8, w1, lsl #24
; CHECK-NEXT: csel w8, w9, w8, vs
; CHECK-NEXT: asr w0, w8, #24
; CHECK-NEXT: ret
%tmp = call i8 @llvm.ssub.sat.i8(i8 %x, i8 %y);
ret i8 %tmp;
Expand All @@ -69,13 +71,14 @@ define i8 @func8(i8 %x, i8 %y) nounwind {
define i4 @func3(i4 %x, i4 %y) nounwind {
; CHECK-LABEL: func3:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w0, w1
; CHECK-NEXT: mov w9, #7
; CHECK-NEXT: cmp w8, #7 // =7
; CHECK-NEXT: csel w8, w8, w9, lt
; CHECK-NEXT: cmn w8, #8 // =8
; CHECK-NEXT: mov w9, #-8
; CHECK-NEXT: csel w0, w8, w9, gt
; CHECK-NEXT: lsl w8, w0, #28
; CHECK-NEXT: subs w10, w8, w1, lsl #28
; CHECK-NEXT: mov w9, #2147483647
; CHECK-NEXT: cmp w10, #0 // =0
; CHECK-NEXT: cinv w9, w9, ge
; CHECK-NEXT: subs w8, w8, w1, lsl #28
; CHECK-NEXT: csel w8, w9, w8, vs
; CHECK-NEXT: asr w0, w8, #28
; CHECK-NEXT: ret
%tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y);
ret i4 %tmp;
Expand Down

0 comments on commit 7c30af8

Please sign in to comment.