Skip to content

Commit

Permalink
[AVX512] Remove masked_move/blendm intrinsic from back-end.
Browse files Browse the repository at this point in the history
This is complement patch to D21060.

Differential Revision: http://reviews.llvm.org/D21174

llvm-svn: 272257
  • Loading branch information
Igor Breger committed Jun 9, 2016
1 parent cd242c1 commit f635367
Show file tree
Hide file tree
Showing 7 changed files with 1 addition and 758 deletions.
157 changes: 0 additions & 157 deletions llvm/include/llvm/IR/IntrinsicsX86.td
Expand Up @@ -1943,84 +1943,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem]>;

def int_x86_avx512_mask_mova_pd_128 :
GCCBuiltin<"__builtin_ia32_movapd128_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_mova_pd_256 :
GCCBuiltin<"__builtin_ia32_movapd256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_mova_pd_512 :
GCCBuiltin<"__builtin_ia32_movapd512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;

def int_x86_avx512_mask_mova_ps_128 :
GCCBuiltin<"__builtin_ia32_movaps128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_mova_ps_256 :
GCCBuiltin<"__builtin_ia32_movaps256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_mova_ps_512 :
GCCBuiltin<"__builtin_ia32_movaps512_mask">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;

def int_x86_avx512_mask_mova_q_128 :
GCCBuiltin<"__builtin_ia32_movdqa64_128_mask">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_mova_q_256 :
GCCBuiltin<"__builtin_ia32_movdqa64_256_mask">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_mova_q_512 :
GCCBuiltin<"__builtin_ia32_movdqa64_512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;

def int_x86_avx512_mask_mova_d_128 :
GCCBuiltin<"__builtin_ia32_movdqa32_128_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_mova_d_256 :
GCCBuiltin<"__builtin_ia32_movdqa32_256_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_mova_d_512 :
GCCBuiltin<"__builtin_ia32_movdqa32_512_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;

def int_x86_avx512_mask_movu_w_128 :
GCCBuiltin<"__builtin_ia32_movdquhi128_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx512_mask_movu_w_256 :
GCCBuiltin<"__builtin_ia32_movdquhi256_mask">,
Intrinsic<[llvm_v16i16_ty],
[llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_movu_w_512 :
GCCBuiltin<"__builtin_ia32_movdquhi512_mask">,
Intrinsic<[llvm_v32i16_ty],
[llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;

def int_x86_avx512_mask_movu_b_128 :
GCCBuiltin<"__builtin_ia32_movdquqi128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
def int_x86_avx512_mask_movu_b_256 :
GCCBuiltin<"__builtin_ia32_movdquqi256_mask">,
Intrinsic<[llvm_v32i8_ty],
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_movu_b_512 :
GCCBuiltin<"__builtin_ia32_movdquqi512_mask">,
Intrinsic<[llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
}

// Conditional store ops
Expand Down Expand Up @@ -7138,85 +7060,6 @@ let TargetPrefix = "x86" in {
[IntrNoMem]>;
}

// Vector blend
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_blend_ps_512 : GCCBuiltin<"__builtin_ia32_blendmps_512_mask">,
Intrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_ps_256 : GCCBuiltin<"__builtin_ia32_blendmps_256_mask">,
Intrinsic<[llvm_v8f32_ty],
[llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_ps_128 : GCCBuiltin<"__builtin_ia32_blendmps_128_mask">,
Intrinsic<[llvm_v4f32_ty],
[llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_pd_512 : GCCBuiltin<"__builtin_ia32_blendmpd_512_mask">,
Intrinsic<[llvm_v8f64_ty],
[llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_pd_256 : GCCBuiltin<"__builtin_ia32_blendmpd_256_mask">,
Intrinsic<[llvm_v4f64_ty],
[llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_pd_128 : GCCBuiltin<"__builtin_ia32_blendmpd_128_mask">,
Intrinsic<[llvm_v2f64_ty],
[llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem]>;

def int_x86_avx512_mask_blend_d_512 : GCCBuiltin<"__builtin_ia32_blendmd_512_mask">,
Intrinsic<[llvm_v16i32_ty],
[llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_q_512 : GCCBuiltin<"__builtin_ia32_blendmq_512_mask">,
Intrinsic<[llvm_v8i64_ty],
[llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_d_256 : GCCBuiltin<"__builtin_ia32_blendmd_256_mask">,
Intrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_q_256 : GCCBuiltin<"__builtin_ia32_blendmq_256_mask">,
Intrinsic<[llvm_v4i64_ty],
[llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_d_128 : GCCBuiltin<"__builtin_ia32_blendmd_128_mask">,
Intrinsic<[llvm_v4i32_ty],
[llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_q_128 : GCCBuiltin<"__builtin_ia32_blendmq_128_mask">,
Intrinsic<[llvm_v2i64_ty],
[llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
[IntrNoMem]>;

def int_x86_avx512_mask_blend_w_512 : GCCBuiltin<"__builtin_ia32_blendmw_512_mask">,
Intrinsic<[llvm_v32i16_ty],
[llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_w_256 : GCCBuiltin<"__builtin_ia32_blendmw_256_mask">,
Intrinsic<[llvm_v16i16_ty],
[llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_w_128 : GCCBuiltin<"__builtin_ia32_blendmw_128_mask">,
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_b_512 : GCCBuiltin<"__builtin_ia32_blendmb_512_mask">,
Intrinsic<[llvm_v64i8_ty],
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_b_256 : GCCBuiltin<"__builtin_ia32_blendmb_256_mask">,
Intrinsic<[llvm_v32i8_ty],
[llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_blend_b_128 : GCCBuiltin<"__builtin_ia32_blendmb_128_mask">,
Intrinsic<[llvm_v16i8_ty],
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty],
[IntrNoMem]>;

}

let TargetPrefix = "x86" in {
def int_x86_avx512_mask_valign_q_512 :
GCCBuiltin<"__builtin_ia32_alignq512_mask">,
Expand Down
8 changes: 0 additions & 8 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -17654,14 +17654,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
Mask = DAG.getBitcast(MaskVT, Mask);
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask);
}
case BLEND: {
SDValue Mask = Op.getOperand(3);
MVT VT = Op.getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
Op.getOperand(2));
}
case KUNPCK: {
MVT VT = Op.getSimpleValueType();
MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2);
Expand Down
38 changes: 1 addition & 37 deletions llvm/lib/Target/X86/X86IntrinsicsInfo.h
Expand Up @@ -33,7 +33,7 @@ enum IntrinsicType : uint16_t {
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
EXPAND_FROM_MEM, STOREANT, BLEND, INSERT_SUBVEC,
EXPAND_FROM_MEM, STOREANT, INSERT_SUBVEC,
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS,
FIXUPIMMS_MASKZ, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
};
Expand Down Expand Up @@ -416,24 +416,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_andn_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
X86_INTRINSIC_DATA(avx512_mask_andn_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
X86_INTRINSIC_DATA(avx512_mask_andn_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_b_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_b_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_b_512, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_d_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_d_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_d_512, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_pd_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_pd_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_pd_512, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_ps_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_ps_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_ps_512, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_q_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_q_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_q_512, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_w_128, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_w_256, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_blend_w_512, BLEND, X86ISD::SELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_broadcast_sd_pd_256, INTR_TYPE_1OP_MASK,
X86ISD::VBROADCAST, 0),
X86_INTRINSIC_DATA(avx512_mask_broadcast_sd_pd_512, INTR_TYPE_1OP_MASK,
Expand Down Expand Up @@ -848,18 +830,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::FMIN, X86ISD::FMIN_RND),
X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM,
X86ISD::FMIN, X86ISD::FMIN_RND),
X86_INTRINSIC_DATA(avx512_mask_mova_d_128, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_mova_d_256, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_mova_d_512, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_mova_pd_128, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_mova_pd_256, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_mova_pd_512, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_mova_ps_128, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_mova_ps_256, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_mova_ps_512, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_mova_q_128, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_mova_q_256, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_mova_q_512, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_movddup_128, INTR_TYPE_1OP_MASK,
X86ISD::MOVDDUP, 0),
X86_INTRINSIC_DATA(avx512_mask_movddup_256, INTR_TYPE_1OP_MASK,
Expand All @@ -882,12 +852,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::MOVSLDUP, 0),
X86_INTRINSIC_DATA(avx512_mask_movsldup_512, INTR_TYPE_1OP_MASK,
X86ISD::MOVSLDUP, 0),
X86_INTRINSIC_DATA(avx512_mask_movu_b_128, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_movu_b_256, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_movu_b_512, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_movu_w_128, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_movu_w_256, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_movu_w_512, BLEND, ISD::VSELECT, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
Expand Down
120 changes: 0 additions & 120 deletions llvm/test/CodeGen/X86/avx512-intrinsics.ll
Expand Up @@ -811,62 +811,6 @@ define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
ret <8 x i64> %res
}

define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
; CHECK-LABEL: test_x86_mask_blend_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
ret <16 x float> %res
}

declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly

define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
; CHECK-LABEL: test_x86_mask_blend_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
ret <8 x double> %res
}

define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
; CHECK-LABEL: test_x86_mask_blend_pd_512_memop:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
; CHECK-NEXT: vblendmpd (%rdi), %zmm0, %zmm0 {%k1}
; CHECK-NEXT: retq
%b = load <8 x double>, <8 x double>* %ptr
%res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
ret <8 x double> %res
}
declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly

define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
; CHECK-LABEL: test_x86_mask_blend_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
ret <16 x i32> %res
}
declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly

define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
; CHECK-LABEL: test_x86_mask_blend_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vpblendmq %zmm1, %zmm0, %zmm0 {%k1}
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
ret <8 x i64> %res
}
declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly

define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
; CHECK-LABEL: test_cmpps:
; CHECK: ## BB#0:
Expand Down Expand Up @@ -6859,70 +6803,6 @@ define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x
ret <16 x i32> %res4
}

declare <8 x double> @llvm.x86.avx512.mask.mova.pd.512(<8 x double>, <8 x double>, i8)

define <8 x double>@test_int_x86_avx512_mask_mova_pd_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_mova_pd_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.mova.pd.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
%res1 = call <8 x double> @llvm.x86.avx512.mask.mova.pd.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
%res2 = fadd <8 x double> %res, %res1
ret <8 x double> %res2
}

declare <16 x float> @llvm.x86.avx512.mask.mova.ps.512(<16 x float>, <16 x float>, i16)

define <16 x float>@test_int_x86_avx512_mask_mova_ps_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_mova_ps_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.mova.ps.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
%res1 = call <16 x float> @llvm.x86.avx512.mask.mova.ps.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
%res2 = fadd <16 x float> %res, %res1
ret <16 x float> %res2
}

declare <8 x i64> @llvm.x86.avx512.mask.mova.q.512(<8 x i64>, <8 x i64>, i8)

define <8 x i64>@test_int_x86_avx512_mask_mova_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_mova_q_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.mova.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
%res1 = call <8 x i64> @llvm.x86.avx512.mask.mova.q.512(<8 x i64> %x0, <8 x i64> zeroinitializer, i8 %x2)
%res2 = add <8 x i64> %res, %res1
ret <8 x i64> %res2
}

declare <16 x i32> @llvm.x86.avx512.mask.mova.d.512(<16 x i32>, <16 x i32>, i16)

define <16 x i32>@test_int_x86_avx512_mask_mova_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_mova_d_512:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.mova.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
%res1 = call <16 x i32> @llvm.x86.avx512.mask.mova.d.512(<16 x i32> %x0, <16 x i32> zeroinitializer, i16 %x2)
%res2 = add <16 x i32> %res, %res1
ret <16 x i32> %res2
}

declare <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32)

define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) {
Expand Down

0 comments on commit f635367

Please sign in to comment.