diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 2c04330746468..a38f9bfa6e55f 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -369,17 +369,18 @@ multiclass AVX512_maskable_custom_cmp O, Format F, string AttSrcAsm, string IntelSrcAsm, list Pattern, list MaskingPattern, + InstrItinClass itin = NoItinerary, bit IsCommutable = 0> { let isCommutable = IsCommutable in def NAME: AVX512; + Pattern, itin>; def NAME#k: AVX512, EVEX_K; + MaskingPattern, itin>, EVEX_K; } multiclass AVX512_maskable_common_cmp O, Format F, X86VectorVTInfo _, @@ -388,27 +389,30 @@ multiclass AVX512_maskable_common_cmp O, Format F, X86VectorVTInfo _, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskingRHS, + InstrItinClass itin = NoItinerary, bit IsCommutable = 0> : AVX512_maskable_custom_cmp; + [(set _.KRC:$dst, MaskingRHS)], itin, IsCommutable>; multiclass AVX512_maskable_cmp O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, bit IsCommutable = 0> : + dag RHS, InstrItinClass itin = NoItinerary, + bit IsCommutable = 0> : AVX512_maskable_common_cmp; + (and _.KRCWM:$mask, RHS), itin, IsCommutable>; multiclass AVX512_maskable_cmp_alt O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, - string AttSrcAsm, string IntelSrcAsm> : + string AttSrcAsm, string IntelSrcAsm, + InstrItinClass itin = NoItinerary> : AVX512_maskable_custom_cmp; + AttSrcAsm, IntelSrcAsm, [],[], itin>; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the vector instruction. In the masking case, the @@ -2219,15 +2223,15 @@ defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, avx512vl_i64_info, HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; -multiclass avx512_vcmp_common { - +multiclass avx512_vcmp_common { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc), "vcmp${cc}"#_.Suffix, "$src2, $src1", "$src1, $src2", (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), - imm:$cc), 1>; + imm:$cc), itins.rr, 1>, + Sched<[itins.Sched]>; defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc), @@ -2235,7 +2239,8 @@ multiclass avx512_vcmp_common { "$src2, $src1", "$src1, $src2", (X86cmpm (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), - imm:$cc)>; + imm:$cc), itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -2245,28 +2250,32 @@ multiclass avx512_vcmp_common { "$src1, ${src2}"##_.BroadcastStr, (X86cmpm (_.VT _.RC:$src1), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), - imm:$cc)>,EVEX_B; + imm:$cc), itins.rm>, + EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, - "$cc, $src2, $src1", "$src1, $src2, $cc">; + "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rr>, + Sched<[itins.Sched]>; let mayLoad = 1 in { defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, - "$cc, $src2, $src1", "$src1, $src2, $cc">; + "$cc, $src2, $src1", "$src1, $src2, $cc", itins.rm>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, ${src2}"##_.BroadcastStr##", $src1", - "$src1, ${src2}"##_.BroadcastStr##", $cc">,EVEX_B; + "$src1, ${src2}"##_.BroadcastStr##", $cc", itins.rm>, + EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -2297,7 +2306,7 @@ multiclass avx512_vcmp_common { imm:$cc)>; } -multiclass avx512_vcmp_sae { +multiclass avx512_vcmp_sae { // comparison code form (VCMP[EQ/LT/LE/...] defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), @@ -2306,7 +2315,8 @@ multiclass avx512_vcmp_sae { (X86cmpmRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc, - (i32 FROUND_NO_EXC))>, EVEX_B; + (i32 FROUND_NO_EXC)), itins.rr>, + EVEX_B, Sched<[itins.Sched]>; let isAsmParserOnly = 1, hasSideEffects = 0 in { defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, @@ -2314,25 +2324,26 @@ multiclass avx512_vcmp_sae { (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, {sae}, $src2, $src1", - "$src1, $src2, {sae}, $cc">, EVEX_B; + "$src1, $src2, {sae}, $cc", itins.rr>, + EVEX_B, Sched<[itins.Sched]>; } } -multiclass avx512_vcmp { +multiclass avx512_vcmp { let Predicates = [HasAVX512] in { - defm Z : avx512_vcmp_common<_.info512>, - avx512_vcmp_sae<_.info512>, EVEX_V512; + defm Z : avx512_vcmp_common, + avx512_vcmp_sae, EVEX_V512; } let Predicates = [HasAVX512,HasVLX] in { - defm Z128 : avx512_vcmp_common<_.info128>, EVEX_V128; - defm Z256 : avx512_vcmp_common<_.info256>, EVEX_V256; + defm Z128 : avx512_vcmp_common, EVEX_V128; + defm Z256 : avx512_vcmp_common, EVEX_V256; } } -defm VCMPPD : avx512_vcmp, +defm VCMPPD : avx512_vcmp, AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; -defm VCMPPS : avx512_vcmp, +defm VCMPPS : avx512_vcmp, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; @@ -4998,34 +5009,35 @@ defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs //===----------------------------------------------------------------------===// multiclass avx512_vptest opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { let isCommutable = 1 in defm rr : AVX512_maskable_cmp, - EVEX_4V; + (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>, + EVEX_4V, Sched<[itins.Sched]>; defm rm : AVX512_maskable_cmp, - EVEX_4V, - EVEX_CD8<_.EltSize, CD8VF>; + (_.VT (bitconvert (_.LdFrag addr:$src2)))), itins.rm>, + EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } } multiclass avx512_vptest_mb opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { + OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rmb : AVX512_maskable_cmp, - EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + (_.ScalarLdFrag addr:$src2)))), + itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; } // Use 512bit version to implement 128/256 bit in case NoVLX. @@ -5042,16 +5054,17 @@ multiclass avx512_vptest_lowering opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo _, string Suffix> { + OpndItins itins, AVX512VLVectorVTInfo _, + string Suffix> { let Predicates = [HasAVX512] in - defm Z : avx512_vptest, - avx512_vptest_mb, EVEX_V512; + defm Z : avx512_vptest, + avx512_vptest_mb, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_vptest, - avx512_vptest_mb, EVEX_V256; - defm Z128 : avx512_vptest, - avx512_vptest_mb, EVEX_V128; + defm Z256 : avx512_vptest, + avx512_vptest_mb, EVEX_V256; + defm Z128 : avx512_vptest, + avx512_vptest_mb, EVEX_V128; } let Predicates = [HasAVX512, NoVLX] in { defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>; @@ -5059,30 +5072,31 @@ multiclass avx512_vptest_dq_sizes opc, string OpcodeStr, SDNode OpNode, } } -multiclass avx512_vptest_dq opc, string OpcodeStr, SDNode OpNode> { - defm D : avx512_vptest_dq_sizes opc, string OpcodeStr, SDNode OpNode, + OpndItins itins> { + defm D : avx512_vptest_dq_sizes; - defm Q : avx512_vptest_dq_sizes, VEX_W; } multiclass avx512_vptest_wb opc, string OpcodeStr, - SDNode OpNode> { + SDNode OpNode, OpndItins itins> { let Predicates = [HasBWI] in { - defm WZ: avx512_vptest, + defm WZ: avx512_vptest, EVEX_V512, VEX_W; - defm BZ: avx512_vptest, + defm BZ: avx512_vptest, EVEX_V512; } let Predicates = [HasVLX, HasBWI] in { - defm WZ256: avx512_vptest, + defm WZ256: avx512_vptest, EVEX_V256, VEX_W; - defm WZ128: avx512_vptest, + defm WZ128: avx512_vptest, EVEX_V128, VEX_W; - defm BZ256: avx512_vptest, + defm BZ256: avx512_vptest, EVEX_V256; - defm BZ128: avx512_vptest, + defm BZ128: avx512_vptest, EVEX_V128; } @@ -5092,16 +5106,17 @@ multiclass avx512_vptest_wb opc, string OpcodeStr, defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">; defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">; } - } multiclass avx512_vptest_all_forms opc_wb, bits<8> opc_dq, string OpcodeStr, - SDNode OpNode> : - avx512_vptest_wb , - avx512_vptest_dq; + SDNode OpNode, OpndItins itins> : + avx512_vptest_wb , + avx512_vptest_dq; -defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm>, T8PD; -defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm>, T8XS; +defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm, + SSE_BIT_ITINS_P>, T8PD; +defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm, + SSE_BIT_ITINS_P>, T8XS; //===----------------------------------------------------------------------===// @@ -9702,7 +9717,7 @@ def VPTERNLOG312_imm8 : SDNodeXForm; multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _>{ + OpndItins itins, X86VectorVTInfo _>{ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT _.RC:$src3), - (i8 imm:$src4)), NoItinerary, 1, 1>, - AVX512AIi8Base, EVEX_4V; + (i8 imm:$src4)), itins.rr, 1, 1>, + AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>; defm rmi : AVX512_maskable_3src, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + (i8 imm:$src4)), itins.rm, 1, 0>, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable_3src opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), - (i8 imm:$src4)), NoItinerary, 1, 0>, EVEX_B, - AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + (i8 imm:$src4)), itins.rm, 1, 0>, EVEX_B, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + Sched<[itins.Sched.Folded, ReadAfterLd]>; }// Constraints = "$src1 = $dst" // Additional patterns for matching passthru operand in other positions. @@ -9867,17 +9884,20 @@ multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; } -multiclass avx512_common_ternlog{ +multiclass avx512_common_ternlog { let Predicates = [HasAVX512] in - defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512; + defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128; - defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256; + defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info128>, EVEX_V128; + defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info256>, EVEX_V256; } } -defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>; -defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W; +defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SSE_INTALU_ITINS_P, + avx512vl_i32_info>; +defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P, + avx512vl_i64_info>, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 - FixupImm diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index 1028f970796f9..72541465e2340 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -1829,7 +1829,7 @@ define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) { ; GENERIC-LABEL: f64to4f32_mask: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} ; GENERIC-NEXT: vzeroupper ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1879,7 +1879,7 @@ define <8 x double> @f32to8f64(<8 x float> %b) nounwind { define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) { ; GENERIC-LABEL: f32to4f64_mask: ; GENERIC: # BB#0: -; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1 +; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2469,8 +2469,8 @@ define <16 x double> @sbto16f64(<16 x double> %a) { ; GENERIC-LABEL: sbto16f64: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k0 -; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k1 +; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k0 # sched: [3:1.00] +; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vpmovm2d %k1, %ymm0 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 ; GENERIC-NEXT: vpmovm2d %k0, %ymm1 @@ -2496,7 +2496,7 @@ define <8 x double> @sbto8f64(<8 x double> %a) { ; GENERIC-LABEL: sbto8f64: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00] ; GENERIC-NEXT: vpmovm2d %k0, %ymm0 ; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2517,7 +2517,7 @@ define <8 x float> @sbto8f32(<8 x float> %a) { ; GENERIC-LABEL: sbto8f32: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %k0 +; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %k0 # sched: [3:1.00] ; GENERIC-NEXT: vpmovm2d %k0, %ymm0 ; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2538,7 +2538,7 @@ define <4 x float> @sbto4f32(<4 x float> %a) { ; GENERIC-LABEL: sbto4f32: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 +; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00] ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2559,7 +2559,7 @@ define <4 x double> @sbto4f64(<4 x double> %a) { ; GENERIC-LABEL: sbto4f64: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpltpd %ymm0, %ymm1, %k0 +; GENERIC-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00] ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 ; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2580,7 +2580,7 @@ define <2 x float> @sbto2f32(<2 x float> %a) { ; GENERIC-LABEL: sbto2f32: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 +; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %k0 # sched: [3:1.00] ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 ; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2601,7 +2601,7 @@ define <2 x double> @sbto2f64(<2 x double> %a) { ; GENERIC-LABEL: sbto2f64: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %k0 +; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00] ; GENERIC-NEXT: vpmovm2q %k0, %xmm0 ; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -3263,7 +3263,7 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC-LABEL: zext_4x8mem_to_4x32: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3283,7 +3283,7 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC-LABEL: sext_4x8mem_to_4x32: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3449,7 +3449,7 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind re ; GENERIC-LABEL: zext_2x8mem_to_2x64: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3468,7 +3468,7 @@ define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwin ; GENERIC-LABEL: sext_2x8mem_to_2x64mask: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3502,7 +3502,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC-LABEL: zext_4x8mem_to_4x64: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3522,7 +3522,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwin ; GENERIC-LABEL: sext_4x8mem_to_4x64mask: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3612,7 +3612,7 @@ define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind ; GENERIC-LABEL: zext_4x16mem_to_4x32: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3632,7 +3632,7 @@ define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounw ; GENERIC-LABEL: sext_4x16mem_to_4x32mask: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3843,7 +3843,7 @@ define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind ; GENERIC-LABEL: zext_2x16mem_to_2x64: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3863,7 +3863,7 @@ define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounw ; GENERIC-LABEL: sext_2x16mem_to_2x64mask: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3898,7 +3898,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind ; GENERIC-LABEL: zext_4x16mem_to_4x64: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3918,7 +3918,7 @@ define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounw ; GENERIC-LABEL: sext_4x16mem_to_4x64mask: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4041,7 +4041,7 @@ define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind ; GENERIC-LABEL: zext_2x32mem_to_2x64: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4061,7 +4061,7 @@ define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounw ; GENERIC-LABEL: sext_2x32mem_to_2x64mask: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4096,7 +4096,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind ; GENERIC-LABEL: zext_4x32mem_to_4x64: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4116,7 +4116,7 @@ define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounw ; GENERIC-LABEL: sext_4x32mem_to_4x64mask: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4165,7 +4165,7 @@ define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind ; GENERIC-LABEL: zext_4x32_to_4x64mask: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm1, %xmm1, %k1 +; GENERIC-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -4354,7 +4354,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { ; GENERIC-LABEL: trunc_16i32_to_16i1: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 -; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k0 +; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: kmovd %k0, %eax ; GENERIC-NEXT: # kill: %ax %ax %eax ; GENERIC-NEXT: vzeroupper @@ -4377,9 +4377,9 @@ define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) { ; GENERIC-LABEL: trunc_4i32_to_4i1: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 {%k1} +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 {%k1} # sched: [1:1.00] ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6570,7 +6570,7 @@ define <16 x float> @mov_test40(i8 * %addr, <16 x float> %old, <16 x float> %mas ; GENERIC-LABEL: mov_test40: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 +; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [4:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6591,7 +6591,7 @@ define <16 x float> @mov_test41(i8 * %addr, <16 x float> %old, <16 x float> %mas ; GENERIC-LABEL: mov_test41: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 +; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [4:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6612,7 +6612,7 @@ define <16 x float> @mov_test42(i8 * %addr, <16 x float> %mask1) { ; GENERIC-LABEL: mov_test42: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 +; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6633,7 +6633,7 @@ define <16 x float> @mov_test43(i8 * %addr, <16 x float> %mask1) { ; GENERIC-LABEL: mov_test43: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 +; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6654,7 +6654,7 @@ define <8 x double> @mov_test44(i8 * %addr, <8 x double> %old, <8 x double> %mas ; GENERIC-LABEL: mov_test44: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 +; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [4:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6675,7 +6675,7 @@ define <8 x double> @mov_test45(i8 * %addr, <8 x double> %old, <8 x double> %mas ; GENERIC-LABEL: mov_test45: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 +; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [4:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6696,7 +6696,7 @@ define <8 x double> @mov_test46(i8 * %addr, <8 x double> %mask1) { ; GENERIC-LABEL: mov_test46: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 +; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -6717,7 +6717,7 @@ define <8 x double> @mov_test47(i8 * %addr, <8 x double> %mask1) { ; GENERIC-LABEL: mov_test47: ; GENERIC: # BB#0: ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 +; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [4:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7230,7 +7230,7 @@ define <4 x i1> @vmov_test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { ; GENERIC-NEXT: .LBB389_1: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: .LBB389_3: -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7471,7 +7471,7 @@ define void @vmov_test22(<4 x i1> %a, <4 x i1>* %addr) { ; GENERIC-LABEL: vmov_test22: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: kmovb %k0, (%rdi) ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7489,7 +7489,7 @@ define void @vmov_test23(<2 x i1> %a, <2 x i1>* %addr) { ; GENERIC-LABEL: vmov_test23: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 +; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: kmovb %k0, (%rdi) ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7528,7 +7528,7 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; GENERIC-LABEL: store_v2i1: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 +; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: knotw %k0, %k0 ; GENERIC-NEXT: kmovb %k0, (%rdi) ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7549,7 +7549,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; GENERIC-LABEL: store_v4i1: ; GENERIC: # BB#0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 +; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: knotw %k0, %k0 ; GENERIC-NEXT: kmovb %k0, (%rdi) ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7718,9 +7718,9 @@ define void @ktest_1(<8 x double> %in, double * %base) { ; GENERIC-LABEL: ktest_1: ; GENERIC: # BB#0: ; GENERIC-NEXT: vmovupd (%rdi), %zmm1 # sched: [4:0.50] -; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00] ; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [4:0.50] -; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} +; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00] ; GENERIC-NEXT: ktestb %k0, %k0 ; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00] ; GENERIC-NEXT: # BB#1: # %L1 @@ -7781,13 +7781,13 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; GENERIC: # BB#0: ; GENERIC-NEXT: vmovups (%rdi), %zmm2 # sched: [4:0.50] ; GENERIC-NEXT: vmovups 64(%rdi), %zmm3 # sched: [4:0.50] -; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 -; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 +; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00] ; GENERIC-NEXT: kunpckwd %k1, %k2, %k0 ; GENERIC-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [4:0.50] ; GENERIC-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [4:0.50] -; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 -; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 +; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00] ; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 ; GENERIC-NEXT: kord %k1, %k0, %k0 ; GENERIC-NEXT: ktestd %k0, %k0 @@ -8590,7 +8590,7 @@ define <16 x i32> @test_vbroadcast() { ; GENERIC-LABEL: test_vbroadcast: ; GENERIC: # BB#0: # %entry ; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 +; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00] ; GENERIC-NEXT: vpmovm2d %k0, %zmm0 ; GENERIC-NEXT: knotw %k0, %k1 ; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}