Skip to content

Commit

Permalink
Fix generic shift expansion when shift amount is 0
Browse files Browse the repository at this point in the history
Summary:
This fixes http://llvm.org/bugs/show_bug.cgi?id=16439. 

This is one possible way to approach this. The other would be to split InL>>(nbits-Amt) into (InL>>(nbits-1-Amt))>>1, which is also valid since since we only need to care about Amt up nbits-1. It's hard to tell which one is better since the shift might be expensive if this stage of expansion is not yet a legal machine integer, whereas comparisons with zero are relatively cheap at all sizes, but more expensive than a shift if the shift is on a legal machine type. 

Patch by Keno Fischer!

Test Plan: regression test from http://reviews.llvm.org/D7752

Reviewers: chfast, resistor

Reviewed By: chfast, resistor

Subscribers: sanjoy, resistor, chfast, llvm-commits

Differential Revision: http://reviews.llvm.org/D4978

llvm-svn: 235370
  • Loading branch information
chfast committed Apr 21, 2015
1 parent 2718fb1 commit 57c2f7c
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 10 deletions.
16 changes: 9 additions & 7 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
Expand Up @@ -1547,6 +1547,9 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy),
Amt, NVBitsNode, ISD::SETULT);
SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(ShTy),
Amt, DAG.getConstant(0, ShTy),
ISD::SETEQ);

SDValue LoS, HiS, LoL, HiL;
switch (N->getOpcode()) {
Expand All @@ -1556,16 +1559,15 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
HiS = DAG.getNode(ISD::OR, dl, NVT,
DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
// FIXME: If Amt is zero, the following shift generates an undefined result
// on some architectures.
DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));

// Long: ShAmt >= NVTBits
LoL = DAG.getConstant(0, NVT); // Lo part is zero.
HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.

Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
Hi = DAG.getSelect(dl, NVT, isZero, InH,
DAG.getSelect(dl, NVT, isShort, HiS, HiL));
return true;
case ISD::SRL:
// Short: ShAmt < NVTBits
Expand All @@ -1580,24 +1582,24 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
HiL = DAG.getConstant(0, NVT); // Hi part is zero.
LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.

Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
Lo = DAG.getSelect(dl, NVT, isZero, InL,
DAG.getSelect(dl, NVT, isShort, LoS, LoL));
Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
return true;
case ISD::SRA:
// Short: ShAmt < NVTBits
HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
LoS = DAG.getNode(ISD::OR, dl, NVT,
DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
// FIXME: If Amt is zero, the following shift generates an undefined result
// on some architectures.
DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));

// Long: ShAmt >= NVTBits
HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part.
DAG.getConstant(NVTBits-1, ShTy));
LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.

Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL);
Lo = DAG.getSelect(dl, NVT, isZero, InL,
DAG.getSelect(dl, NVT, isShort, LoS, LoL));
Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL);
return true;
}
Expand Down
18 changes: 15 additions & 3 deletions llvm/test/CodeGen/X86/shift-i256.ll
@@ -1,9 +1,21 @@
; RUN: llc < %s -march=x86
; RUN: llc < %s -march=x86-64
; RUN: llc < %s -march=x86 | FileCheck %s
; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s -check-prefix=CHECK-X64
; RUN: llc < %s -march=x86-64 -O2 | FileCheck %s -check-prefix=CHECK-X64

define void @t(i256 %x, i256 %a, i256* nocapture %r) nounwind readnone {
; CHECK-LABEL: shift1
define void @shift1(i256 %x, i256 %a, i256* nocapture %r) nounwind readnone {
entry:
%0 = ashr i256 %x, %a
store i256 %0, i256* %r
ret void
}

; CHECK-LABEL: shift2
define i256 @shift2(i256 %c) nounwind
{
%b = shl i256 1, %c ; %c must not be a constant
; Special case when %c is 0:
; CHECK-X64: testb [[REG:%r[0-9]+b]], [[REG]]
; CHECK-X64: cmoveq
ret i256 %b
}

0 comments on commit 57c2f7c

Please sign in to comment.