Skip to content

Commit

Permalink
Allow PeepholeOptimizer to fold a few more cases
Browse files Browse the repository at this point in the history
The condition for clearing the folding candidate list was clamped together
with the "uninteresting instruction" condition. This is too conservative,
e.g. we don't need to clear the list when encountering an IMPLICIT_DEF.

Differential Revision: http://reviews.llvm.org/D11591

llvm-svn: 244577
  • Loading branch information
Michael Kuperstein committed Aug 11, 2015
1 parent c186ac7 commit 82814f6
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 18 deletions.
9 changes: 4 additions & 5 deletions llvm/lib/CodeGen/PeepholeOptimizer.cpp
Expand Up @@ -1236,14 +1236,13 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {

// If there exists an instruction which belongs to the following
// categories, we will discard the load candidates.
if (MI->mayStore() || MI->isCall() || MI->hasUnmodeledSideEffects())
FoldAsLoadDefCandidates.clear();

if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
MI->isKill() || MI->isInlineAsm() ||
MI->hasUnmodeledSideEffects()) {
FoldAsLoadDefCandidates.clear();
MI->hasUnmodeledSideEffects())
continue;
}
if (MI->mayStore() || MI->isCall())
FoldAsLoadDefCandidates.clear();

if ((isUncoalescableCopy(*MI) &&
optimizeUncoalescableCopy(MI, LocalMIs)) ||
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/X86/avx-cvt.ll
Expand Up @@ -113,8 +113,7 @@ define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
define void @fpext() nounwind uwtable {
; CHECK-LABEL: fpext:
; CHECK: # BB#0:
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vcvtss2sd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
; CHECK-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: retq
%f = alloca float, align 4
Expand Down
20 changes: 9 additions & 11 deletions llvm/test/CodeGen/X86/shift-bmi2.ll
Expand Up @@ -30,11 +30,10 @@ entry:
%x = load i32, i32* %p
%shl = shl i32 %x, %shamt
; BMI2: shl32p
; Source order scheduling prevents folding, rdar:14208996.
; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}}
; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI2: ret
; BMI264: shl32p
; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}}
; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i32 %shl
}
Expand Down Expand Up @@ -75,7 +74,7 @@ entry:
%x = load i64, i64* %p
%shl = shl i64 %x, %shamt
; BMI264: shl64p
; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}}
; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i64 %shl
}
Expand Down Expand Up @@ -107,11 +106,10 @@ entry:
%x = load i32, i32* %p
%shl = lshr i32 %x, %shamt
; BMI2: lshr32p
; Source order scheduling prevents folding, rdar:14208996.
; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}}
; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI2: ret
; BMI264: lshr32p
; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}}
; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i32 %shl
}
Expand All @@ -130,7 +128,7 @@ entry:
%x = load i64, i64* %p
%shl = lshr i64 %x, %shamt
; BMI264: lshr64p
; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}}
; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i64 %shl
}
Expand All @@ -153,10 +151,10 @@ entry:
%shl = ashr i32 %x, %shamt
; BMI2: ashr32p
; Source order scheduling prevents folding, rdar:14208996.
; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}}
; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI2: ret
; BMI264: ashr32p
; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}}
; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i32 %shl
}
Expand All @@ -175,7 +173,7 @@ entry:
%x = load i64, i64* %p
%shl = ashr i64 %x, %shamt
; BMI264: ashr64p
; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}}
; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i64 %shl
}

0 comments on commit 82814f6

Please sign in to comment.