From 22ee191c3ea92dd7054975c9e9074fbf3c3825f5 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 21 Jun 2018 16:02:05 +0000 Subject: [PATCH] DAG combine "and|or (select c, -1, 0), x" -> "select c, x, 0|-1" Allowed folding for "and/or" binops with non-constant operand if arguments of select are 0/-1 values. Normally this code with "and" opcode does not get to a DAG combiner and simplified yet in the InstCombine. However AMDGPU produces it during lowering and InstCombine has no chance to optimize it out. In turn the same pattern with "or" opcode can reach DAG. Differential Revision: https://reviews.llvm.org/D48301 llvm-svn: 335250 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 17 ++- llvm/test/CodeGen/AMDGPU/dagcombine-select.ll | 102 ++++++++++++++++++ llvm/test/CodeGen/AMDGPU/udivrem.ll | 26 ++--- llvm/test/CodeGen/X86/dagcombine-select.ll | 90 ++++++++++++---- 4 files changed, 195 insertions(+), 40 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 22d55a1a0efdf..443c9144d319f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1901,8 +1901,19 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { return SDValue(); // Bail out if any constants are opaque because we can't constant fold those. + // The exception is "and" and "or" with either 0 or -1 in which case we can + // propagate non constant operands into select. I.e.: + // and (select Cond, 0, -1), X --> select Cond, 0, X + // or X, (select Cond, -1, 0) --> select Cond, -1, X + bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && + (isNullConstantOrNullSplatConstant(CT) || + isAllOnesConstantOrAllOnesSplatConstant(CT)) && + (isNullConstantOrNullSplatConstant(CF) || + isAllOnesConstantOrAllOnesSplatConstant(CF)); + SDValue CBO = BO->getOperand(SelOpNo ^ 1); - if (!isConstantOrConstantVector(CBO, true) && + if (!CanFoldNonConst && + !isConstantOrConstantVector(CBO, true) && !isConstantFPBuildVectorOrConstantFP(CBO)) return SDValue(); @@ -1923,14 +1934,14 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { SDLoc DL(Sel); SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) : DAG.getNode(BinOpcode, DL, VT, CT, CBO); - if (!NewCT.isUndef() && + if (!CanFoldNonConst && !NewCT.isUndef() && !isConstantOrConstantVector(NewCT, true) && !isConstantFPBuildVectorOrConstantFP(NewCT)) return SDValue(); SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) : DAG.getNode(BinOpcode, DL, VT, CF, CBO); - if (!NewCF.isUndef() && + if (!CanFoldNonConst && !NewCF.isUndef() && !isConstantOrConstantVector(NewCF, true) && !isConstantFPBuildVectorOrConstantFP(NewCF)) return SDValue(); diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll index 645e6e1e2dd34..0e568a6568196 100644 --- a/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll +++ b/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll @@ -1,5 +1,107 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; GCN-LABEL: {{^}}select_and1: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN-NOT: v_and_b32 +; GCN: store_dword v[{{[0-9:]+}}], [[SEL]], +define amdgpu_kernel void @select_and1(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = and i32 %y, %s + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}select_and2: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN-NOT: v_and_b32 +; GCN: store_dword v[{{[0-9:]+}}], [[SEL]], +define amdgpu_kernel void @select_and2(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = and i32 %s, %y + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}select_and3: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN-NOT: v_and_b32 +; GCN: store_dword v[{{[0-9:]+}}], [[SEL]], +define amdgpu_kernel void @select_and3(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 -1, i32 0 + %a = and i32 %y, %s + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}select_and_v4: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN-NOT: v_and_b32 +; GCN: store_dword +define amdgpu_kernel void @select_and_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> + %a = and <4 x i32> %s, %y + store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32 + ret void +} + +; GCN-LABEL: {{^}}select_or1: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN-NOT: v_or_b32 +; GCN: store_dword v[{{[0-9:]+}}], [[SEL]], +define amdgpu_kernel void @select_or1(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = or i32 %y, %s + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}select_or2: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN-NOT: v_or_b32 +; GCN: store_dword v[{{[0-9:]+}}], [[SEL]], +define amdgpu_kernel void @select_or2(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = or i32 %s, %y + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}select_or3: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN-NOT: v_or_b32 +; GCN: store_dword v[{{[0-9:]+}}], [[SEL]], +define amdgpu_kernel void @select_or3(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 -1, i32 0 + %a = or i32 %y, %s + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}select_or_v4: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN-NOT: v_or_b32 +; GCN: store_dword +define amdgpu_kernel void @select_or_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> + %a = or <4 x i32> %s, %y + store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32 + ret void +} + ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants: ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9, define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) { diff --git a/llvm/test/CodeGen/AMDGPU/udivrem.ll b/llvm/test/CodeGen/AMDGPU/udivrem.ll index 08b764824cab4..b85c6bffe7e1a 100644 --- a/llvm/test/CodeGen/AMDGPU/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/udivrem.ll @@ -31,25 +31,25 @@ ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]] ; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]] ; SI-DAG: v_sub_{{[iu]}}32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]] -; SI: v_cndmask_b32_e64 -; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]] +; SI: v_cmp_eq_u32_e64 [[CC1:s\[[0-9:]+\]]], 0, [[RCP_HI]] +; SI: v_cndmask_b32_e64 [[CND1:v[0-9]+]], [[RCP_LO]], [[NEG_RCP_LO]], [[CC1]] +; SI: v_mul_hi_u32 [[E:v[0-9]+]], [[CND1]], [[RCP]] ; SI-DAG: v_add_{{[iu]}}32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]] ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]] -; SI: v_cndmask_b32_e64 -; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]] -; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]] +; SI: v_cndmask_b32_e64 [[CND2:v[0-9]+]], [[RCP_S_E]], [[RCP_A_E]], [[CC1]] +; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]], [[CND2]], +; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]], [[CND2]] ; SI-DAG: v_add_{{[iu]}}32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]] ; SI-DAG: v_sub_{{[iu]}}32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]] ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Quotient_S_One:v[0-9]+]], ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Remainder_S_Den:v[0-9]+]], -; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_add_{{[iu]}}32_e32 [[Remainder_A_Den:v[0-9]+]], ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 +; SI-NOT: v_and_b32 ; SI: s_endpgm define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) { %result0 = udiv i32 %x, %y @@ -124,8 +124,6 @@ define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1 ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -147,8 +145,6 @@ define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1 ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -157,6 +153,7 @@ define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 +; SI-NOT: v_and_b32 ; SI: s_endpgm define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) { %result0 = udiv <2 x i32> %x, %y @@ -274,8 +271,6 @@ define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i3 ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -297,8 +292,6 @@ define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i3 ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -320,8 +313,6 @@ define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i3 ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -339,6 +330,7 @@ define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i3 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 +; SI-NOT: v_and_b32 ; SI: s_endpgm define amdgpu_kernel void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { %result0 = udiv <4 x i32> %x, %y diff --git a/llvm/test/CodeGen/X86/dagcombine-select.ll b/llvm/test/CodeGen/X86/dagcombine-select.ll index 6da06657c5e0a..8be1333cec2fd 100644 --- a/llvm/test/CodeGen/X86/dagcombine-select.ll +++ b/llvm/test/CodeGen/X86/dagcombine-select.ll @@ -6,9 +6,7 @@ define i32 @select_and1(i32 %x, i32 %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $11, %edi -; CHECK-NEXT: setl %al -; CHECK-NEXT: decl %eax -; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: cmovgel %esi, %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 11 %s = select i1 %c, i32 0, i32 -1 @@ -21,9 +19,7 @@ define i32 @select_and2(i32 %x, i32 %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $11, %edi -; CHECK-NEXT: setl %al -; CHECK-NEXT: decl %eax -; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: cmovgel %esi, %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 11 %s = select i1 %c, i32 0, i32 -1 @@ -31,14 +27,42 @@ define i32 @select_and2(i32 %x, i32 %y) { ret i32 %a } +define i32 @select_and3(i32 %x, i32 %y) { +; CHECK-LABEL: select_and3: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpl $11, %edi +; CHECK-NEXT: cmovll %esi, %eax +; CHECK-NEXT: retq + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 -1, i32 0 + %a = and i32 %y, %s + ret i32 %a +} + +define <4 x i32> @select_and_v4(i32 %x, <4 x i32> %y) { +; CHECK-LABEL: select_and_v4: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpl $11, %edi +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: jl .LBB3_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: movaps %xmm0, %xmm1 +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: retq + %c = icmp slt i32 %x, 11 + %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> + %a = and <4 x i32> %s, %y + ret <4 x i32> %a +} + define i32 @select_or1(i32 %x, i32 %y) { ; CHECK-LABEL: select_or1: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $11, %edi -; CHECK-NEXT: setl %al -; CHECK-NEXT: decl %eax -; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovll %esi, %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 11 %s = select i1 %c, i32 0, i32 -1 @@ -49,11 +73,9 @@ define i32 @select_or1(i32 %x, i32 %y) { define i32 @select_or2(i32 %x, i32 %y) { ; CHECK-LABEL: select_or2: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $11, %edi -; CHECK-NEXT: setl %al -; CHECK-NEXT: decl %eax -; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovll %esi, %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 11 %s = select i1 %c, i32 0, i32 -1 @@ -61,6 +83,34 @@ define i32 @select_or2(i32 %x, i32 %y) { ret i32 %a } +define i32 @select_or3(i32 %x, i32 %y) { +; CHECK-LABEL: select_or3: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpl $11, %edi +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovgel %esi, %eax +; CHECK-NEXT: retq + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 -1, i32 0 + %a = or i32 %y, %s + ret i32 %a +} + +define <4 x i32> @select_or_v4(i32 %x, <4 x i32> %y) { +; CHECK-LABEL: select_or_v4: +; CHECK: # %bb.0: +; CHECK-NEXT: cmpl $11, %edi +; CHECK-NEXT: jl .LBB7_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: .LBB7_2: +; CHECK-NEXT: retq + %c = icmp slt i32 %x, 11 + %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> + %a = or <4 x i32> %s, %y + ret <4 x i32> %a +} + define i32 @sel_constants_sub_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: sel_constants_sub_constant_sel_constants: ; CHECK: # %bb.0: @@ -186,11 +236,11 @@ define double @fsub_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: fsub_constant_sel_constants: ; CHECK: # %bb.0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne .LBB13_1 +; CHECK-NEXT: jne .LBB17_1 ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB13_1: +; CHECK-NEXT: .LBB17_1: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq %sel = select i1 %cond, double -4.0, double 23.3 @@ -202,11 +252,11 @@ define double @fdiv_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: fdiv_constant_sel_constants: ; CHECK: # %bb.0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne .LBB14_1 +; CHECK-NEXT: jne .LBB18_1 ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB14_1: +; CHECK-NEXT: .LBB18_1: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq %sel = select i1 %cond, double -4.0, double 23.3 @@ -218,11 +268,11 @@ define double @frem_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: frem_constant_sel_constants: ; CHECK: # %bb.0: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne .LBB15_1 +; CHECK-NEXT: jne .LBB19_1 ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB15_1: +; CHECK-NEXT: .LBB19_1: ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq %sel = select i1 %cond, double -4.0, double 23.3