diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 920dff935daed..8eb59aa70fb36 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1754,24 +1754,45 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(2))->get(); - // If (1) we only need the sign-bit, (2) the setcc operands are the same - // width as the setcc result, and (3) the result of a setcc conforms to 0 or - // -1, we may be able to bypass the setcc. - if (DemandedBits.isSignMask() && - Op0.getScalarValueSizeInBits() == BitWidth && - getBooleanContents(Op0.getValueType()) == - BooleanContent::ZeroOrNegativeOneBooleanContent) { - // If we're testing X < 0, then this compare isn't needed - just use X! - // FIXME: We're limiting to integer types here, but this should also work - // if we don't care about FP signed-zero. The use of SETLT with FP means - // that we don't care about NaNs. - if (CC == ISD::SETLT && Op1.getValueType().isInteger() && - (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode()))) - return TLO.CombineTo(Op, Op0); - - // TODO: Should we check for other forms of sign-bit comparisons? - // Examples: X <= -1, X >= 0 + // If we're testing X < 0, X >= 0, X <= -1 (X is of integer type) or X > -1 + // (X is of integer type) then we only need the sign mask of the previous + // result + // FIXME: We're limiting to integer types for X < 0 or X >= 0 here, but this + // should also work if we don't care about FP signed-zero. The use of SETLT + // with FP means that we don't care about NaNs. + if (((CC == ISD::SETLT || CC == ISD::SETGE) && + Op1.getValueType().isInteger() && isNullOrNullSplat(Op1)) || + ((CC == ISD::SETLE || CC == ISD::SETGT) && + Op1.getValueType().isInteger() && isAllOnesOrAllOnesSplat(Op1))) { + KnownBits KnownOp0; + bool Changed = false; + if (SimplifyDemandedBits( + Op0, APInt::getSignMask(Op0.getScalarValueSizeInBits()), + DemandedElts, KnownOp0, TLO, Depth + 1)) + Changed = true; + // If (1) we only need the sign-bit, (2) the setcc operands are the same + // width as the setcc result, and (3) the result of a setcc conforms to 0 + // or -1, we may be able to bypass the setcc. + if (DemandedBits.isSignMask() && + Op0.getScalarValueSizeInBits() == BitWidth && + getBooleanContents(Op0.getValueType()) == + BooleanContent::ZeroOrNegativeOneBooleanContent) { + // If we remove a >= 0 or > -1 (for integers), we need to introduce a + // NOT Operation + if (CC == ISD::SETGE || CC == ISD::SETGT) { + SDLoc DL(Op); + EVT VT = Op0.getValueType(); + SDValue NotOp0 = TLO.DAG.getNode(ISD::XOR, DL, VT, Op0, + TLO.DAG.getAllOnesConstant(DL, VT)); + Changed |= TLO.CombineTo(Op, NotOp0); + } else { + Changed |= TLO.CombineTo(Op, Op0); + } + } + return Changed; } + // TODO: Should we check for other forms of sign-bit comparisons? + // Example: X <= -1, X > -1 if (getBooleanContents(Op0.getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) diff --git a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll index 6946cc23d867d..afac6c1e861c3 100644 --- a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll +++ b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll @@ -838,31 +838,11 @@ if.then28: ; preds = %if.end26 } define i1 @avifSequenceHeaderParse() { -; CHECK-SD-LABEL: avifSequenceHeaderParse: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov w8, #1 // =0x1 -; CHECK-SD-NEXT: cbz w8, .LBB24_2 -; CHECK-SD-NEXT: .LBB24_1: // %bb6 -; CHECK-SD-NEXT: mov w0, wzr -; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB24_2: // %bb1 -; CHECK-SD-NEXT: cbz w8, .LBB24_4 -; CHECK-SD-NEXT: // %bb.3: -; CHECK-SD-NEXT: tbz xzr, #63, .LBB24_1 -; CHECK-SD-NEXT: b .LBB24_5 -; CHECK-SD-NEXT: .LBB24_4: // %bb2 -; CHECK-SD-NEXT: mov w8, #1 // =0x1 -; CHECK-SD-NEXT: tbz x8, #63, .LBB24_1 -; CHECK-SD-NEXT: .LBB24_5: // %bb4 -; CHECK-SD-NEXT: mov w8, #1 // =0x1 -; CHECK-SD-NEXT: mov w0, wzr -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: avifSequenceHeaderParse: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov w0, wzr -; CHECK-GI-NEXT: mov w8, #1 // =0x1 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: avifSequenceHeaderParse: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: ret entry: %a = icmp slt i64 0, 0 br i1 %a, label %bb1, label %bb6 diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll index 3303cb86c874e..84e5f74db13df 100644 --- a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll @@ -14,15 +14,15 @@ define amdgpu_kernel void @uniform_trunc_i16_to_i1(ptr addrspace(1) %out, i16 %x ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 - ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_LOAD_DWORD_IMM]] ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GCN-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_2]], implicit-def dead $scc + ; GCN-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_2]], implicit-def dead $scc + ; GCN-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[S_LOAD_DWORD_IMM]], [[S_MOV_B32_2]], implicit-def dead $scc ; GCN-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY killed [[S_LSHR_B32_]] ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY3]], implicit-def dead $scc ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc ; GCN-NEXT: [[COPY4:%[0-9]+]]:sreg_64 = COPY $scc ; GCN-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: S_CMP_LT_I32 killed [[S_SEXT_I32_I16_]], killed [[S_MOV_B32_3]], implicit-def $scc + ; GCN-NEXT: S_CMP_LT_I32 killed [[S_LSHL_B32_]], killed [[S_MOV_B32_3]], implicit-def $scc ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_64 = COPY $scc ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY5]], killed [[COPY4]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec @@ -41,11 +41,12 @@ define i1 @divergent_trunc_i16_to_i1(ptr addrspace(1) %out, i16 %x, i1 %z) { ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GCN-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed [[S_MOV_B32_]], [[COPY1]], implicit $exec ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[COPY]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec - ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY1]], 0, 16, implicit $exec - ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GCN-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[V_BFE_I32_e64_]], killed [[S_MOV_B32_]], implicit $exec + ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GCN-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[V_LSHLREV_B32_e64_]], killed [[S_MOV_B32_1]], implicit $exec ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I32_e64_]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] @@ -124,16 +125,17 @@ define amdgpu_kernel void @uniform_trunc_i64_to_i1(ptr addrspace(1) %out, i64 %x ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY4]], %subreg.sub0, killed [[COPY3]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3 ; GCN-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub3 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2 - ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY5]], %subreg.sub1 - ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]] - ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY7]], implicit-def dead $scc + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[DEF]], %subreg.sub0, killed [[COPY5]], %subreg.sub1 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]] + ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY6]], implicit-def dead $scc ; GCN-NEXT: S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc - ; GCN-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY $scc + ; GCN-NEXT: [[COPY7:%[0-9]+]]:sreg_64 = COPY $scc ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]] - ; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE2]], [[COPY9]], implicit $exec - ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[COPY8]], implicit-def dead $scc + ; GCN-NEXT: [[COPY8:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]] + ; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE2]], [[COPY8]], implicit $exec + ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[COPY7]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec ; GCN-NEXT: BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.2, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 @@ -146,17 +148,18 @@ define amdgpu_kernel void @uniform_trunc_i64_to_i1(ptr addrspace(1) %out, i64 %x define i1 @divergent_trunc_i64_to_i1(ptr addrspace(1) %out, i64 %x, i1 %z) { ; GCN-LABEL: name: divergent_trunc_i64_to_i1 ; GCN: bb.0 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4 + ; GCN-NEXT: liveins: $vgpr3, $vgpr4 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[DEF]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GCN-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, [[COPY]], implicit $exec ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 killed [[V_AND_B32_e64_]], 1, implicit $exec ; GCN-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]] - ; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE]], [[COPY3]], implicit $exec + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]] + ; GCN-NEXT: [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE]], [[COPY2]], implicit $exec ; GCN-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec ; GCN-NEXT: $vgpr0 = COPY [[V_CNDMASK_B32_e64_]] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll index 956145fb24c4a..fc8628e9e7b14 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll @@ -391,12 +391,11 @@ define i1 @posnormal_bf16(bfloat %x) nounwind { ; GFX7CHECK: ; %bb.0: ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0 +; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], -1, v0 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f00 -; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], -1, v1 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] @@ -467,12 +466,11 @@ define i1 @negnormal_bf16(bfloat %x) nounwind { ; GFX7CHECK: ; %bb.0: ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0 +; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f00 -; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v1 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] @@ -601,11 +599,10 @@ define i1 @negsubnormal_bf16(bfloat %x) nounwind { ; GFX7CHECK: ; %bb.0: ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0 +; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v0 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 ; GFX7CHECK-NEXT: v_add_i32_e64 v0, s[4:5], -1, v0 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f -; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 ; GFX7CHECK-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v0 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] @@ -826,10 +823,9 @@ define i1 @negfinite_bf16(bfloat %x) nounwind { ; GFX7CHECK: ; %bb.0: ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0 +; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v0 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 -; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], s4, v0 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] @@ -1634,12 +1630,11 @@ define i1 @not_is_plus_normal_bf16(bfloat %x) { ; GFX7CHECK: ; %bb.0: ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0 +; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v0 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7eff -; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v1 ; GFX7CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] @@ -1710,12 +1705,11 @@ define i1 @not_is_neg_normal_bf16(bfloat %x) { ; GFX7CHECK: ; %bb.0: ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 -; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0 +; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], -1, v0 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 ; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0 ; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7eff -; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], -1, v1 ; GFX7CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0 ; GFX7CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5] ; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] @@ -2068,10 +2062,9 @@ define i1 @not_ispositive_bf16(bfloat %x) { ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v2, 16, v0 +; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v0 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 ; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f80 -; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], s6, v0 ; GFX7CHECK-NEXT: s_mov_b32 s7, 0xff80 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc @@ -2165,10 +2158,9 @@ define i1 @isnegative_bf16(bfloat %x) { ; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v2, 16, v0 +; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v0 ; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 ; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 -; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 ; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], s4, v0 ; GFX7CHECK-NEXT: s_mov_b32 s6, 0xff80 ; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll index 18c462ffd0ff5..558950e769579 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -463,7 +463,7 @@ define i1 @posnormal_f16(half %x) nounwind { ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800 -; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX7SELDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -548,7 +548,7 @@ define i1 @negnormal_f16(half %x) nounwind { ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7800 -; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX7SELDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -706,7 +706,7 @@ define i1 @negsubnormal_f16(half %x) nounwind { ; GFX7SELDAG: ; %bb.0: ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX7SELDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7SELDAG-NEXT: v_add_i32_e64 v0, s[4:5], -1, v0 ; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x3ff @@ -1002,7 +1002,7 @@ define i1 @negfinite_f16(half %x) nounwind { ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 -; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX7SELDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 ; GFX7SELDAG-NEXT: v_cmp_gt_i32_e64 s[4:5], s4, v0 @@ -2270,7 +2270,7 @@ define i1 @not_is_plus_normal_f16(half %x) { ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x77ff -; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX7SELDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -2364,7 +2364,7 @@ define i1 @not_is_neg_normal_f16(half %x) { ; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x77ff -; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX7SELDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0 ; GFX7SELDAG-NEXT: v_add_i32_e32 v0, vcc, 0xfffffc00, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 @@ -2853,7 +2853,7 @@ define i1 @not_ispositive_f16(half %x) { ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX7SELDAG-NEXT: s_movk_i32 s6, 0x7c00 ; GFX7SELDAG-NEXT: s_mov_b32 s7, 0xfc00 -; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX7SELDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v2, 0x7fff, v0 ; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 ; GFX7SELDAG-NEXT: v_cmp_gt_i32_e64 s[4:5], s6, v2 @@ -2942,7 +2942,7 @@ define i1 @isnegative_f16(half %x) { ; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00 ; GFX7SELDAG-NEXT: s_mov_b32 s6, 0xfc00 -; GFX7SELDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX7SELDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v0 ; GFX7SELDAG-NEXT: v_and_b32_e32 v2, 0x7fff, v0 ; GFX7SELDAG-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 ; GFX7SELDAG-NEXT: v_cmp_gt_i32_e64 s[4:5], s4, v2 diff --git a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll index 8bdeebef13dd2..2fff15d0b2e4e 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll @@ -702,11 +702,10 @@ entry: define i32 @vmsk2_sge_allzeros_i8(<32 x i8> %a) { ; LA32-LABEL: vmsk2_sge_allzeros_i8: ; LA32: # %bb.0: # %entry -; LA32-NEXT: vrepli.b $vr2, 0 -; LA32-NEXT: vsle.b $vr0, $vr2, $vr0 +; LA32-NEXT: vxori.b $vr0, $vr0, 255 ; LA32-NEXT: vmskltz.b $vr0, $vr0 ; LA32-NEXT: vpickve2gr.hu $a0, $vr0, 0 -; LA32-NEXT: vsle.b $vr0, $vr2, $vr1 +; LA32-NEXT: vxori.b $vr0, $vr1, 255 ; LA32-NEXT: vmskltz.b $vr0, $vr0 ; LA32-NEXT: vpickve2gr.hu $a1, $vr0, 0 ; LA32-NEXT: slli.w $a1, $a1, 16 @@ -715,11 +714,10 @@ define i32 @vmsk2_sge_allzeros_i8(<32 x i8> %a) { ; ; LA64-LABEL: vmsk2_sge_allzeros_i8: ; LA64: # %bb.0: # %entry -; LA64-NEXT: vrepli.b $vr2, 0 -; LA64-NEXT: vsle.b $vr0, $vr2, $vr0 +; LA64-NEXT: vxori.b $vr0, $vr0, 255 ; LA64-NEXT: vmskltz.b $vr0, $vr0 ; LA64-NEXT: vpickve2gr.hu $a0, $vr0, 0 -; LA64-NEXT: vsle.b $vr0, $vr2, $vr1 +; LA64-NEXT: vxori.b $vr0, $vr1, 255 ; LA64-NEXT: vmskltz.b $vr0, $vr0 ; LA64-NEXT: vpickve2gr.hu $a1, $vr0, 0 ; LA64-NEXT: slli.d $a1, $a1, 16 diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll index 35d38524c2e9a..f9a79db874bc6 100644 --- a/llvm/test/CodeGen/RISCV/bittest.ll +++ b/llvm/test/CodeGen/RISCV/bittest.ll @@ -3553,7 +3553,8 @@ define i32 @bittest_31_slt0_i32(i32 %x, i1 %y) { ; ; RV64-LABEL: bittest_31_slt0_i32: ; RV64: # %bb.0: -; RV64-NEXT: srliw a0, a0, 31 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 63 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %cmp = icmp slt i32 %x, 0 @@ -3565,14 +3566,14 @@ define i32 @bittest_31_slt0_i32(i32 %x, i1 %y) { define i32 @bittest_63_slt0_i64(i32 %x, i1 %y) { ; RV32-LABEL: bittest_63_slt0_i64: ; RV32: # %bb.0: -; RV32-NEXT: srai a0, a0, 31 ; RV32-NEXT: srli a0, a0, 31 ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bittest_63_slt0_i64: ; RV64: # %bb.0: -; RV64-NEXT: srliw a0, a0, 31 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 63 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret %ext = sext i32 %x to i64 diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll index 5f673ac17d569..d00186cfc6b8d 100644 --- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll @@ -1897,7 +1897,8 @@ define i1 @fpclass(float %x) { ; RV64I-NEXT: add a4, a5, a4 ; RV64I-NEXT: addi a5, a5, -1 ; RV64I-NEXT: sltu a2, a5, a2 -; RV64I-NEXT: srliw a0, a0, 31 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 63 ; RV64I-NEXT: seqz a1, a1 ; RV64I-NEXT: seqz a5, a6 ; RV64I-NEXT: srliw a4, a4, 24 @@ -2395,12 +2396,13 @@ define i1 @isnegfinite_fpclass(float %x) { ; ; RV64I-LABEL: isnegfinite_fpclass: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a1, a0, 33 +; RV64I-NEXT: slli a1, a0, 32 +; RV64I-NEXT: slli a0, a0, 33 ; RV64I-NEXT: lui a2, 522240 -; RV64I-NEXT: srli a1, a1, 33 -; RV64I-NEXT: slt a1, a1, a2 -; RV64I-NEXT: srliw a0, a0, 31 -; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: srli a0, a0, 33 +; RV64I-NEXT: slt a0, a0, a2 +; RV64I-NEXT: srli a1, a1, 63 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 56) ; 0x38 = "-finite" ret i1 %1 diff --git a/llvm/test/CodeGen/SystemZ/tdc-05.ll b/llvm/test/CodeGen/SystemZ/tdc-05.ll index becf293c21f1f..3e02229305b9b 100644 --- a/llvm/test/CodeGen/SystemZ/tdc-05.ll +++ b/llvm/test/CodeGen/SystemZ/tdc-05.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; Test the Test Data Class instruction logic operation conversion from ; compares, combined with signbit or other compares to ensure worthiness. ; @@ -10,21 +11,28 @@ declare fp128 @llvm.fabs.f128(fp128) ; Compare with 0, extract sign bit define i32 @f0(half %x) { -; CHECK-LABEL: f0 -; CHECK: lgdr %r0, %f0 -; CHECK-NEXT: srlg %r0, %r0, 48 -; CHECK-NEXT: lhr %r0, %r0 -; CHECK-NEXT: chi %r0, 0 -; CHECK-NEXT: ipm %r0 -; CHECK-NEXT: risbg %r13, %r0, 63, 191, 36 -; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d -; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT -; CHECK-NEXT: ltebr %f0, %f0 -; CHECK-NEXT: ipm %r0 -; CHECK-NEXT: rosbg %r13, %r0, 63, 63, 35 -; CHECK-NEXT: lr %r2, %r13 -; CHECK-NEXT: lmg %r13, %r15, 264(%r15) -; CHECK-NEXT: br %r14 +; CHECK-LABEL: f0: +; CHECK: # %bb.0: +; CHECK-NEXT: stmg %r13, %r15, 104(%r15) +; CHECK-NEXT: .cfi_offset %r13, -56 +; CHECK-NEXT: .cfi_offset %r14, -48 +; CHECK-NEXT: .cfi_offset %r15, -40 +; CHECK-NEXT: aghi %r15, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d +; CHECK-NEXT: lgdr %r0, %f0 +; CHECK-NEXT: risbg %r0, %r0, 32, 175, 32 +; CHECK-NEXT: chi %r0, 0 +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: risbg %r13, %r0, 63, 191, 36 +; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d +; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT +; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: rosbg %r13, %r0, 63, 63, 35 +; CHECK-NEXT: lr %r2, %r13 +; CHECK-NEXT: lmg %r13, %r15, 264(%r15) +; CHECK-NEXT: br %r14 %cast = bitcast half %x to i16 %sign = icmp slt i16 %cast, 0 %fcmp = fcmp ugt half %x, 0.0 @@ -35,8 +43,13 @@ define i32 @f0(half %x) { ; Compare with 0, extract sign bit define i32 @f1(float %x) { -; CHECK-LABEL: f1 -; CHECK: tceb %f0, 2047 +; CHECK-LABEL: f1: +; CHECK: # %bb.0: +; CHECK-NEXT: tceb %f0, 2047 +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: risbg %r2, %r0, 63, 191, 36 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 %cast = bitcast float %x to i32 %sign = icmp slt i32 %cast, 0 %fcmp = fcmp ugt float %x, 0.0 @@ -47,8 +60,13 @@ define i32 @f1(float %x) { ; Compare with inf, extract negated sign bit define i32 @f2(float %x) { -; CHECK-LABEL: f2 -; CHECK: tceb %f0, 2698 +; CHECK-LABEL: f2: +; CHECK: # %bb.0: +; CHECK-NEXT: tceb %f0, 2698 +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: risbg %r2, %r0, 63, 191, 36 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 %cast = bitcast float %x to i32 %sign = icmp sgt i32 %cast, -1 %fcmp = fcmp ult float %x, 0x7ff0000000000000 @@ -59,8 +77,13 @@ define i32 @f2(float %x) { ; Compare with minnorm, extract negated sign bit define i32 @f3(float %x) { -; CHECK-LABEL: f3 -; CHECK: tceb %f0, 2176 +; CHECK-LABEL: f3: +; CHECK: # %bb.0: +; CHECK-NEXT: tceb %f0, 2176 +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: risbg %r2, %r0, 63, 191, 36 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 %cast = bitcast float %x to i32 %sign = icmp sgt i32 %cast, -1 %fcmp = fcmp olt float %x, 0x3810000000000000 @@ -71,8 +94,13 @@ define i32 @f3(float %x) { ; Test float isnormal, from clang. define i32 @f4(float %x) { -; CHECK-LABEL: f4 -; CHECK: tceb %f0, 768 +; CHECK-LABEL: f4: +; CHECK: # %bb.0: +; CHECK-NEXT: tceb %f0, 768 +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: risbg %r2, %r0, 63, 191, 36 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 %y = call float @llvm.fabs.f32(float %x) %ord = fcmp ord float %x, 0.0 %a = fcmp ult float %y, 0x7ff0000000000000 @@ -85,8 +113,13 @@ define i32 @f4(float %x) { ; Check for negative 0. define i32 @f5(float %x) { -; CHECK-LABEL: f5 -; CHECK: tceb %f0, 1024 +; CHECK-LABEL: f5: +; CHECK: # %bb.0: +; CHECK-NEXT: tceb %f0, 1024 +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: risbg %r2, %r0, 63, 191, 36 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 %cast = bitcast float %x to i32 %sign = icmp slt i32 %cast, 0 %fcmp = fcmp oeq float %x, 0.0 @@ -97,8 +130,13 @@ define i32 @f5(float %x) { ; Test isnormal, from clang. define i32 @f6(double %x) { -; CHECK-LABEL: f6 -; CHECK: tcdb %f0, 768 +; CHECK-LABEL: f6: +; CHECK: # %bb.0: +; CHECK-NEXT: tcdb %f0, 768 +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: risbg %r2, %r0, 63, 191, 36 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 %y = call double @llvm.fabs.f64(double %x) %ord = fcmp ord double %x, 0.0 %a = fcmp ult double %y, 0x7ff0000000000000 @@ -111,8 +149,13 @@ define i32 @f6(double %x) { ; Test isinf || isnan, from clang. define i32 @f7(double %x) { -; CHECK-LABEL: f7 -; CHECK: tcdb %f0, 63 +; CHECK-LABEL: f7: +; CHECK: # %bb.0: +; CHECK-NEXT: tcdb %f0, 63 +; CHECK-NEXT: ipm %r0 +; CHECK-NEXT: risbg %r2, %r0, 63, 191, 36 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 %y = call double @llvm.fabs.f64(double %x) %a = fcmp oeq double %y, 0x7ff0000000000000 %b = fcmp uno double %x, 0.0 diff --git a/llvm/test/CodeGen/X86/combine-smax.ll b/llvm/test/CodeGen/X86/combine-smax.ll index 0133827b85cae..6c63097b6426b 100644 --- a/llvm/test/CodeGen/X86/combine-smax.ll +++ b/llvm/test/CodeGen/X86/combine-smax.ll @@ -100,16 +100,13 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) { ; SSE2-LABEL: test_v16i8_demandedbits: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm4 -; SSE2-NEXT: pcmpgtb %xmm1, %xmm4 -; SSE2-NEXT: pand %xmm4, %xmm0 -; SSE2-NEXT: pandn %xmm1, %xmm4 -; SSE2-NEXT: por %xmm0, %xmm4 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: pcmpgtb %xmm4, %xmm0 -; SSE2-NEXT: pand %xmm0, %xmm3 -; SSE2-NEXT: pandn %xmm2, %xmm0 -; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm3 +; SSE2-NEXT: pandn %xmm2, %xmm1 +; SSE2-NEXT: por %xmm3, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v16i8_demandedbits: @@ -142,7 +139,7 @@ define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 ; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 -; AVX512BW-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1 ; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1} diff --git a/llvm/test/CodeGen/X86/combine-smin.ll b/llvm/test/CodeGen/X86/combine-smin.ll index b58934256a209..c6a4d84ffa2ba 100644 --- a/llvm/test/CodeGen/X86/combine-smin.ll +++ b/llvm/test/CodeGen/X86/combine-smin.ll @@ -99,16 +99,13 @@ define <16 x i8> @test_v16i8_reassociation(<16 x i8> %a) { define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> %a, <16 x i8> %b) { ; SSE2-LABEL: test_v16i8_demandedbits: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm1, %xmm4 -; SSE2-NEXT: pcmpgtb %xmm0, %xmm4 -; SSE2-NEXT: pand %xmm4, %xmm0 -; SSE2-NEXT: pandn %xmm1, %xmm4 -; SSE2-NEXT: por %xmm0, %xmm4 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: pcmpgtb %xmm4, %xmm0 -; SSE2-NEXT: pand %xmm0, %xmm3 -; SSE2-NEXT: pandn %xmm2, %xmm0 -; SSE2-NEXT: por %xmm3, %xmm0 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pcmpgtb %xmm0, %xmm1 +; SSE2-NEXT: pand %xmm1, %xmm3 +; SSE2-NEXT: pandn %xmm2, %xmm1 +; SSE2-NEXT: por %xmm3, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_v16i8_demandedbits: @@ -141,7 +138,7 @@ define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 ; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 -; AVX512BW-NEXT: vpminsb %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1 ; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1} diff --git a/llvm/test/CodeGen/X86/combine-umax.ll b/llvm/test/CodeGen/X86/combine-umax.ll index 482b4fcd744ed..b58f6be5b2ca9 100644 --- a/llvm/test/CodeGen/X86/combine-umax.ll +++ b/llvm/test/CodeGen/X86/combine-umax.ll @@ -99,7 +99,7 @@ define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 ; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 -; AVX512BW-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1 ; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1} diff --git a/llvm/test/CodeGen/X86/combine-umin.ll b/llvm/test/CodeGen/X86/combine-umin.ll index e2757d00d1f61..233249be77d55 100644 --- a/llvm/test/CodeGen/X86/combine-umin.ll +++ b/llvm/test/CodeGen/X86/combine-umin.ll @@ -116,7 +116,7 @@ define <16 x i8> @test_v16i8_demandedbits(<16 x i8> %x, <16 x i8> %y, <16 x i8> ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 ; AVX512BW-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 -; AVX512BW-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpcmpnltb %zmm1, %zmm0, %k1 ; AVX512BW-NEXT: vpblendmb %zmm2, %zmm3, %zmm0 {%k1} diff --git a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll index 52d294ca01720..5a07f30c23c39 100644 --- a/llvm/test/CodeGen/X86/is_fpclass-fp80.ll +++ b/llvm/test/CodeGen/X86/is_fpclass-fp80.ll @@ -352,21 +352,22 @@ define i1 @is_posnormal_f80(x86_fp80 %x) nounwind { ; X86-LABEL: is_posnormal_f80: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl $32767, %edx # imm = 0x7FFF -; X86-NEXT: decl %edx -; X86-NEXT: movzwl %dx, %edx +; X86-NEXT: shll $16, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF +; X86-NEXT: decl %ecx +; X86-NEXT: movzwl %cx, %ecx ; X86-NEXT: xorl %esi, %esi -; X86-NEXT: cmpl $32766, %edx # imm = 0x7FFE +; X86-NEXT: cmpl $32766, %ecx # imm = 0x7FFE ; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: setb %dl -; X86-NEXT: testl $32768, %ecx # imm = 0x8000 -; X86-NEXT: sete %cl +; X86-NEXT: setb %cl +; X86-NEXT: testl %edx, %edx +; X86-NEXT: setns %dl ; X86-NEXT: shrl $31, %eax -; X86-NEXT: andb %cl, %al ; X86-NEXT: andb %dl, %al +; X86-NEXT: andb %cl, %al ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -396,21 +397,22 @@ define i1 @is_negnormal_f80(x86_fp80 %x) nounwind { ; X86-LABEL: is_negnormal_f80: ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %edx -; X86-NEXT: andl $32767, %edx # imm = 0x7FFF -; X86-NEXT: decl %edx -; X86-NEXT: movzwl %dx, %edx +; X86-NEXT: shll $16, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF +; X86-NEXT: decl %ecx +; X86-NEXT: movzwl %cx, %ecx ; X86-NEXT: xorl %esi, %esi -; X86-NEXT: cmpl $32766, %edx # imm = 0x7FFE +; X86-NEXT: cmpl $32766, %ecx # imm = 0x7FFE ; X86-NEXT: sbbl %esi, %esi -; X86-NEXT: setb %dl -; X86-NEXT: testl $32768, %ecx # imm = 0x8000 -; X86-NEXT: setne %cl +; X86-NEXT: setb %cl +; X86-NEXT: testl %edx, %edx +; X86-NEXT: sets %dl ; X86-NEXT: shrl $31, %eax -; X86-NEXT: andb %cl, %al ; X86-NEXT: andb %dl, %al +; X86-NEXT: andb %cl, %al ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -516,23 +518,24 @@ define i1 @is_negsubnormal_f80(x86_fp80 %x) nounwind { ; X86: # %bb.0: # %entry ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shll $16, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl $32767, %ecx # imm = 0x7FFF -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: addl $-1, %edi -; X86-NEXT: adcl $-1, %edx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: addl $-1, %esi +; X86-NEXT: adcl $-1, %edi ; X86-NEXT: adcl $-1, %ecx -; X86-NEXT: adcl $-1, %esi -; X86-NEXT: cmpl $-1, %edi -; X86-NEXT: sbbl $2147483647, %edx # imm = 0x7FFFFFFF +; X86-NEXT: adcl $-1, %edx +; X86-NEXT: cmpl $-1, %esi +; X86-NEXT: sbbl $2147483647, %edi # imm = 0x7FFFFFFF ; X86-NEXT: sbbl $0, %ecx -; X86-NEXT: sbbl $0, %esi +; X86-NEXT: sbbl $0, %edx ; X86-NEXT: setb %cl -; X86-NEXT: testl $32768, %eax # imm = 0x8000 -; X86-NEXT: setne %al +; X86-NEXT: testl %eax, %eax +; X86-NEXT: sets %al ; X86-NEXT: andb %cl, %al ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -541,7 +544,8 @@ define i1 @is_negsubnormal_f80(x86_fp80 %x) nounwind { ; X64-LABEL: is_negsubnormal_f80: ; X64: # %bb.0: # %entry ; X64-NEXT: movzwl {{[0-9]+}}(%rsp), %eax -; X64-NEXT: movswq %ax, %rcx +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: shlq $48, %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; X64-NEXT: andl $32767, %eax # imm = 0x7FFF ; X64-NEXT: addq $-1, %rdx diff --git a/llvm/test/CodeGen/X86/test-shrink.ll b/llvm/test/CodeGen/X86/test-shrink.ll index 03bba9c67455c..3ee6f6fb70f6b 100644 --- a/llvm/test/CodeGen/X86/test-shrink.ll +++ b/llvm/test/CodeGen/X86/test-shrink.ll @@ -577,7 +577,7 @@ no: define void @and16_trunc_8_sign(i16 %x) nounwind { ; CHECK-LINUX64-LABEL: and16_trunc_8_sign: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: testb $-128, %dil +; CHECK-LINUX64-NEXT: testb %dil, %dil ; CHECK-LINUX64-NEXT: js .LBB13_2 ; CHECK-LINUX64-NEXT: # %bb.1: # %yes ; CHECK-LINUX64-NEXT: pushq %rax @@ -589,7 +589,7 @@ define void @and16_trunc_8_sign(i16 %x) nounwind { ; CHECK-WIN32-64-LABEL: and16_trunc_8_sign: ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp -; CHECK-WIN32-64-NEXT: testb $-128, %cl +; CHECK-WIN32-64-NEXT: testb %cl, %cl ; CHECK-WIN32-64-NEXT: js .LBB13_2 ; CHECK-WIN32-64-NEXT: # %bb.1: # %yes ; CHECK-WIN32-64-NEXT: callq bar @@ -599,7 +599,7 @@ define void @and16_trunc_8_sign(i16 %x) nounwind { ; ; CHECK-X86-LABEL: and16_trunc_8_sign: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: testb $-128, {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) ; CHECK-X86-NEXT: js .LBB13_2 ; CHECK-X86-NEXT: # %bb.1: # %yes ; CHECK-X86-NEXT: calll bar@PLT @@ -620,7 +620,7 @@ no: define void @and32_trunc_8_sign(i32 %x) nounwind { ; CHECK-LINUX64-LABEL: and32_trunc_8_sign: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: testb $-128, %dil +; CHECK-LINUX64-NEXT: testb %dil, %dil ; CHECK-LINUX64-NEXT: js .LBB14_2 ; CHECK-LINUX64-NEXT: # %bb.1: # %yes ; CHECK-LINUX64-NEXT: pushq %rax @@ -632,7 +632,7 @@ define void @and32_trunc_8_sign(i32 %x) nounwind { ; CHECK-WIN32-64-LABEL: and32_trunc_8_sign: ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp -; CHECK-WIN32-64-NEXT: testb $-128, %cl +; CHECK-WIN32-64-NEXT: testb %cl, %cl ; CHECK-WIN32-64-NEXT: js .LBB14_2 ; CHECK-WIN32-64-NEXT: # %bb.1: # %yes ; CHECK-WIN32-64-NEXT: callq bar @@ -642,7 +642,7 @@ define void @and32_trunc_8_sign(i32 %x) nounwind { ; ; CHECK-X86-LABEL: and32_trunc_8_sign: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: testb $-128, {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) ; CHECK-X86-NEXT: js .LBB14_2 ; CHECK-X86-NEXT: # %bb.1: # %yes ; CHECK-X86-NEXT: calll bar@PLT @@ -663,7 +663,7 @@ no: define void @and64_trunc_8_sign(i64 %x) nounwind { ; CHECK-LINUX64-LABEL: and64_trunc_8_sign: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: testb $-128, %dil +; CHECK-LINUX64-NEXT: testb %dil, %dil ; CHECK-LINUX64-NEXT: js .LBB15_2 ; CHECK-LINUX64-NEXT: # %bb.1: # %yes ; CHECK-LINUX64-NEXT: pushq %rax @@ -675,7 +675,7 @@ define void @and64_trunc_8_sign(i64 %x) nounwind { ; CHECK-WIN32-64-LABEL: and64_trunc_8_sign: ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp -; CHECK-WIN32-64-NEXT: testb $-128, %cl +; CHECK-WIN32-64-NEXT: testb %cl, %cl ; CHECK-WIN32-64-NEXT: js .LBB15_2 ; CHECK-WIN32-64-NEXT: # %bb.1: # %yes ; CHECK-WIN32-64-NEXT: callq bar @@ -685,7 +685,7 @@ define void @and64_trunc_8_sign(i64 %x) nounwind { ; ; CHECK-X86-LABEL: and64_trunc_8_sign: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: testb $-128, {{[0-9]+}}(%esp) +; CHECK-X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) ; CHECK-X86-NEXT: js .LBB15_2 ; CHECK-X86-NEXT: # %bb.1: # %yes ; CHECK-X86-NEXT: calll bar@PLT @@ -706,7 +706,6 @@ no: define void @and32_trunc_16_sign(i32 %x) nounwind { ; CHECK-LINUX64-LABEL: and32_trunc_16_sign: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: andl $32768, %edi # imm = 0x8000 ; CHECK-LINUX64-NEXT: testw %di, %di ; CHECK-LINUX64-NEXT: js .LBB16_2 ; CHECK-LINUX64-NEXT: # %bb.1: # %yes @@ -719,7 +718,6 @@ define void @and32_trunc_16_sign(i32 %x) nounwind { ; CHECK-WIN32-64-LABEL: and32_trunc_16_sign: ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp -; CHECK-WIN32-64-NEXT: andl $32768, %ecx # imm = 0x8000 ; CHECK-WIN32-64-NEXT: testw %cx, %cx ; CHECK-WIN32-64-NEXT: js .LBB16_2 ; CHECK-WIN32-64-NEXT: # %bb.1: # %yes @@ -730,9 +728,7 @@ define void @and32_trunc_16_sign(i32 %x) nounwind { ; ; CHECK-X86-LABEL: and32_trunc_16_sign: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: andl $32768, %eax # imm = 0x8000 -; CHECK-X86-NEXT: testw %ax, %ax +; CHECK-X86-NEXT: cmpw $0, {{[0-9]+}}(%esp) ; CHECK-X86-NEXT: js .LBB16_2 ; CHECK-X86-NEXT: # %bb.1: # %yes ; CHECK-X86-NEXT: calll bar@PLT @@ -753,7 +749,7 @@ no: define void @and32_trunc_16_sign_minsize(i32 %x) minsize nounwind { ; CHECK-LINUX64-LABEL: and32_trunc_16_sign_minsize: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: testw $-32768, %di # imm = 0x8000 +; CHECK-LINUX64-NEXT: testw %di, %di ; CHECK-LINUX64-NEXT: js .LBB17_2 ; CHECK-LINUX64-NEXT: # %bb.1: # %yes ; CHECK-LINUX64-NEXT: pushq %rax @@ -765,7 +761,7 @@ define void @and32_trunc_16_sign_minsize(i32 %x) minsize nounwind { ; CHECK-WIN32-64-LABEL: and32_trunc_16_sign_minsize: ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp -; CHECK-WIN32-64-NEXT: testw $-32768, %cx # imm = 0x8000 +; CHECK-WIN32-64-NEXT: testw %cx, %cx ; CHECK-WIN32-64-NEXT: js .LBB17_2 ; CHECK-WIN32-64-NEXT: # %bb.1: # %yes ; CHECK-WIN32-64-NEXT: callq bar @@ -775,8 +771,7 @@ define void @and32_trunc_16_sign_minsize(i32 %x) minsize nounwind { ; ; CHECK-X86-LABEL: and32_trunc_16_sign_minsize: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; CHECK-X86-NEXT: testw $-32768, %ax # imm = 0x8000 +; CHECK-X86-NEXT: cmpw $0, {{[0-9]+}}(%esp) ; CHECK-X86-NEXT: js .LBB17_2 ; CHECK-X86-NEXT: # %bb.1: # %yes ; CHECK-X86-NEXT: calll bar@PLT @@ -797,7 +792,6 @@ no: define void @and64_trunc_16_sign(i64 %x) nounwind { ; CHECK-LINUX64-LABEL: and64_trunc_16_sign: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: andl $32768, %edi # imm = 0x8000 ; CHECK-LINUX64-NEXT: testw %di, %di ; CHECK-LINUX64-NEXT: js .LBB18_2 ; CHECK-LINUX64-NEXT: # %bb.1: # %yes @@ -810,7 +804,6 @@ define void @and64_trunc_16_sign(i64 %x) nounwind { ; CHECK-WIN32-64-LABEL: and64_trunc_16_sign: ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp -; CHECK-WIN32-64-NEXT: andl $32768, %ecx # imm = 0x8000 ; CHECK-WIN32-64-NEXT: testw %cx, %cx ; CHECK-WIN32-64-NEXT: js .LBB18_2 ; CHECK-WIN32-64-NEXT: # %bb.1: # %yes @@ -821,8 +814,7 @@ define void @and64_trunc_16_sign(i64 %x) nounwind { ; ; CHECK-X86-LABEL: and64_trunc_16_sign: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: movl $32768, %eax # imm = 0x8000 -; CHECK-X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-X86-NEXT: testw %ax, %ax ; CHECK-X86-NEXT: js .LBB18_2 ; CHECK-X86-NEXT: # %bb.1: # %yes @@ -844,7 +836,7 @@ no: define void @and64_trunc_16_sign_minsize(i64 %x) minsize nounwind { ; CHECK-LINUX64-LABEL: and64_trunc_16_sign_minsize: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: testw $-32768, %di # imm = 0x8000 +; CHECK-LINUX64-NEXT: testw %di, %di ; CHECK-LINUX64-NEXT: js .LBB19_2 ; CHECK-LINUX64-NEXT: # %bb.1: # %yes ; CHECK-LINUX64-NEXT: pushq %rax @@ -856,7 +848,7 @@ define void @and64_trunc_16_sign_minsize(i64 %x) minsize nounwind { ; CHECK-WIN32-64-LABEL: and64_trunc_16_sign_minsize: ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp -; CHECK-WIN32-64-NEXT: testw $-32768, %cx # imm = 0x8000 +; CHECK-WIN32-64-NEXT: testw %cx, %cx ; CHECK-WIN32-64-NEXT: js .LBB19_2 ; CHECK-WIN32-64-NEXT: # %bb.1: # %yes ; CHECK-WIN32-64-NEXT: callq bar @@ -866,7 +858,8 @@ define void @and64_trunc_16_sign_minsize(i64 %x) minsize nounwind { ; ; CHECK-X86-LABEL: and64_trunc_16_sign_minsize: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: testw $-32768, {{[0-9]+}}(%esp) # imm = 0x8000 +; CHECK-X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-X86-NEXT: testw %ax, %ax ; CHECK-X86-NEXT: js .LBB19_2 ; CHECK-X86-NEXT: # %bb.1: # %yes ; CHECK-X86-NEXT: calll bar@PLT @@ -887,7 +880,7 @@ no: define void @and64_trunc_32_sign(i64 %x) nounwind { ; CHECK-LINUX64-LABEL: and64_trunc_32_sign: ; CHECK-LINUX64: # %bb.0: -; CHECK-LINUX64-NEXT: testl $-2147483648, %edi # imm = 0x80000000 +; CHECK-LINUX64-NEXT: testl %edi, %edi ; CHECK-LINUX64-NEXT: js .LBB20_2 ; CHECK-LINUX64-NEXT: # %bb.1: # %yes ; CHECK-LINUX64-NEXT: pushq %rax @@ -899,7 +892,7 @@ define void @and64_trunc_32_sign(i64 %x) nounwind { ; CHECK-WIN32-64-LABEL: and64_trunc_32_sign: ; CHECK-WIN32-64: # %bb.0: ; CHECK-WIN32-64-NEXT: subq $40, %rsp -; CHECK-WIN32-64-NEXT: testl $-2147483648, %ecx # imm = 0x80000000 +; CHECK-WIN32-64-NEXT: testl %ecx, %ecx ; CHECK-WIN32-64-NEXT: js .LBB20_2 ; CHECK-WIN32-64-NEXT: # %bb.1: # %yes ; CHECK-WIN32-64-NEXT: callq bar @@ -909,7 +902,7 @@ define void @and64_trunc_32_sign(i64 %x) nounwind { ; ; CHECK-X86-LABEL: and64_trunc_32_sign: ; CHECK-X86: # %bb.0: -; CHECK-X86-NEXT: testl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000 +; CHECK-X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; CHECK-X86-NEXT: js .LBB20_2 ; CHECK-X86-NEXT: # %bb.1: # %yes ; CHECK-X86-NEXT: calll bar@PLT