-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
release/18.x: [X86] Fix miscompile in combineShiftRightArithmetic #86728
Conversation
@llvm/pr-subscribers-backend-x86 Author: AtariDreams (AtariDreams) ChangesWhen folding (ashr (shl, x, c1), c2) we need to treat c1 and c2 as unsigned to find out if the combined shift should be a left or right shift. (cherry picked from commit 3e6e54e) Full diff: https://github.com/llvm/llvm-project/pull/86728.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9e64726fb6fff7..71fc6b5047eaa9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47035,10 +47035,13 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget))
return V;
- // fold (ashr (shl, a, [56,48,32,24,16]), SarConst)
- // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or
- // into (lshr, (sext (a), SarConst - [56,48,32,24,16]))
- // depending on sign of (SarConst - [56,48,32,24,16])
+ // fold (SRA (SHL X, ShlConst), SraConst)
+ // into (SHL (sext_in_reg X), ShlConst - SraConst)
+ // or (sext_in_reg X)
+ // or (SRA (sext_in_reg X), SraConst - ShlConst)
+ // depending on relation between SraConst and ShlConst.
+ // We only do this if (Size - ShlConst) is equal to 8, 16 or 32. That allows
+ // us to do the sext_in_reg from corresponding bit.
// sexts in X86 are MOVs. The MOVs have the same code size
// as above SHIFTs (only SHIFT on 1 has lower code size).
@@ -47054,29 +47057,29 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG,
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
APInt ShlConst = N01->getAsAPIntVal();
- APInt SarConst = N1->getAsAPIntVal();
+ APInt SraConst = N1->getAsAPIntVal();
EVT CVT = N1.getValueType();
- if (SarConst.isNegative())
+ if (CVT != N01.getValueType())
+ return SDValue();
+ if (SraConst.isNegative())
return SDValue();
for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) {
unsigned ShiftSize = SVT.getSizeInBits();
- // skipping types without corresponding sext/zext and
- // ShlConst that is not one of [56,48,32,24,16]
+ // Only deal with (Size - ShlConst) being equal to 8, 16 or 32.
if (ShiftSize >= Size || ShlConst != Size - ShiftSize)
continue;
SDLoc DL(N);
SDValue NN =
DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT));
- SarConst = SarConst - (Size - ShiftSize);
- if (SarConst == 0)
+ if (SraConst.eq(ShlConst))
return NN;
- if (SarConst.isNegative())
+ if (SraConst.ult(ShlConst))
return DAG.getNode(ISD::SHL, DL, VT, NN,
- DAG.getConstant(-SarConst, DL, CVT));
+ DAG.getConstant(ShlConst - SraConst, DL, CVT));
return DAG.getNode(ISD::SRA, DL, VT, NN,
- DAG.getConstant(SarConst, DL, CVT));
+ DAG.getConstant(SraConst - ShlConst, DL, CVT));
}
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/sar_fold.ll b/llvm/test/CodeGen/X86/sar_fold.ll
index 21655e19440afe..0f1396954b03a1 100644
--- a/llvm/test/CodeGen/X86/sar_fold.ll
+++ b/llvm/test/CodeGen/X86/sar_fold.ll
@@ -44,3 +44,44 @@ define i32 @shl24sar25(i32 %a) #0 {
%2 = ashr exact i32 %1, 25
ret i32 %2
}
+
+define void @shl144sar48(ptr %p) #0 {
+; CHECK-LABEL: shl144sar48:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movswl (%eax), %ecx
+; CHECK-NEXT: movl %ecx, %edx
+; CHECK-NEXT: sarl $31, %edx
+; CHECK-NEXT: shldl $2, %ecx, %edx
+; CHECK-NEXT: shll $2, %ecx
+; CHECK-NEXT: movl %ecx, 12(%eax)
+; CHECK-NEXT: movl %edx, 16(%eax)
+; CHECK-NEXT: movl $0, 8(%eax)
+; CHECK-NEXT: movl $0, 4(%eax)
+; CHECK-NEXT: movl $0, (%eax)
+; CHECK-NEXT: retl
+ %a = load i160, ptr %p
+ %1 = shl i160 %a, 144
+ %2 = ashr exact i160 %1, 46
+ store i160 %2, ptr %p
+ ret void
+}
+
+define void @shl144sar2(ptr %p) #0 {
+; CHECK-LABEL: shl144sar2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movswl (%eax), %ecx
+; CHECK-NEXT: shll $14, %ecx
+; CHECK-NEXT: movl %ecx, 16(%eax)
+; CHECK-NEXT: movl $0, 8(%eax)
+; CHECK-NEXT: movl $0, 12(%eax)
+; CHECK-NEXT: movl $0, 4(%eax)
+; CHECK-NEXT: movl $0, (%eax)
+; CHECK-NEXT: retl
+ %a = load i160, ptr %p
+ %1 = shl i160 %a, 144
+ %2 = ashr exact i160 %1, 2
+ store i160 %2, ptr %p
+ ret void
+}
|
de81ccd
to
d9f9230
Compare
@phoebewang What do you think about backporting this? |
I didn't review on it. Maybe @topperc can evaluate it. |
I think this is ok to backport. |
When folding (ashr (shl, x, c1), c2) we need to treat c1 and c2 as unsigned to find out if the combined shift should be a left or right shift. Also do an early out during pre-legalization in case c1 and c2 has different types, as that otherwise complicated the comparison of c1 and c2 a bit. (cherry picked from commit 3e6e54e)
Hi @AtariDreams (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
When folding (ashr (shl, x, c1), c2) we need to treat c1 and c2 as unsigned to find out if the combined shift should be a left or right shift.
Also do an early out during pre-legalization in case c1 and c2 has different types, as that otherwise complicated the comparison of c1 and c2 a bit.
(cherry picked from commit 3e6e54e)