diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 36fb7d11b488a..bb2eb99c00317 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -4592,6 +4592,90 @@ struct MemorySanitizerVisitor : public InstVisitor { ConstantInt::get(IRB.getInt32Ty(), 0)); } + // Handle llvm.x86.avx512.mask.pmov{,s,us}.*.512 + // + // e.g., call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512 + // (<8 x i64>, <16 x i8>, i8) + // A WriteThru Mask + // + // call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512 + // (<16 x i32>, <16 x i8>, i16) + // + // Dst[i] = Mask[i] ? truncate_or_saturate(A[i]) : WriteThru[i] + // Dst_shadow[i] = Mask[i] ? truncate(A_shadow[i]) : WriteThru_shadow[i] + // + // If Dst has more elements than A, the excess elements are zeroed (and the + // corresponding shadow is initialized). + // + // Note: for PMOV (truncation), handleIntrinsicByApplyingToShadow is precise + // and is much faster than this handler. + void handleAVX512VectorDownConvert(IntrinsicInst &I) { + IRBuilder<> IRB(&I); + + assert(I.arg_size() == 3); + Value *A = I.getOperand(0); + Value *WriteThrough = I.getOperand(1); + Value *Mask = I.getOperand(2); + + assert(isa(A->getType())); + assert(A->getType()->isIntOrIntVectorTy()); + + assert(isa(WriteThrough->getType())); + assert(WriteThrough->getType()->isIntOrIntVectorTy()); + + unsigned ANumElements = + cast(A->getType())->getNumElements(); + unsigned OutputNumElements = + cast(WriteThrough->getType())->getNumElements(); + assert(ANumElements == OutputNumElements || + ANumElements * 2 == OutputNumElements); + + assert(Mask->getType()->isIntegerTy()); + assert(Mask->getType()->getScalarSizeInBits() == ANumElements); + insertCheckShadowOf(Mask, &I); + + assert(I.getType() == WriteThrough->getType()); + + // Widen the mask, if necessary, to have one bit per element of the output + // vector. + // We want the extra bits to have '1's, so that the CreateSelect will + // select the values from AShadow instead of WriteThroughShadow ("maskless" + // versions of the intrinsics are sometimes implemented using an all-1's + // mask and an undefined value for WriteThroughShadow). We accomplish this + // by using bitwise NOT before and after the ZExt. + if (ANumElements != OutputNumElements) { + Mask = IRB.CreateNot(Mask); + Mask = IRB.CreateZExt(Mask, Type::getIntNTy(*MS.C, OutputNumElements), + "_ms_widen_mask"); + Mask = IRB.CreateNot(Mask); + } + Mask = IRB.CreateBitCast( + Mask, FixedVectorType::get(IRB.getInt1Ty(), OutputNumElements)); + + Value *AShadow = getShadow(A); + + // The return type might have more elements than the input. + // Temporarily shrink the return type's number of elements. + VectorType *ShadowType = maybeShrinkVectorShadowType(A, I); + + // PMOV truncates; PMOVS/PMOVUS uses signed/unsigned saturation. + // This handler treats them all as truncation, which leads to some rare + // false positives in the cases where the truncated bytes could + // unambiguously saturate the value e.g., if A = ??????10 ???????? + // (big-endian), the unsigned saturated byte conversion is 11111111 i.e., + // fully defined, but the truncated byte is ????????. + // + // TODO: use GetMinMaxUnsigned() to handle saturation precisely. + AShadow = IRB.CreateTrunc(AShadow, ShadowType, "_ms_trunc_shadow"); + AShadow = maybeExtendVectorShadowWithZeros(AShadow, I); + + Value *WriteThroughShadow = getShadow(WriteThrough); + + Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow); + setShadow(&I, Shadow); + setOriginForNaryOp(I); + } + // For sh.* compiler intrinsics: // llvm.x86.avx512fp16.mask.{add/sub/mul/div/max/min}.sh.round // (<8 x half>, <8 x half>, <8 x half>, i8, i32) @@ -5412,6 +5496,66 @@ struct MemorySanitizerVisitor : public InstVisitor { break; } + // AVX512 PMOV: Packed MOV, with truncation + // Precisely handled by applying the same intrinsic to the shadow + case Intrinsic::x86_avx512_mask_pmov_dw_512: + case Intrinsic::x86_avx512_mask_pmov_db_512: + case Intrinsic::x86_avx512_mask_pmov_qb_512: + case Intrinsic::x86_avx512_mask_pmov_qw_512: { + // Intrinsic::x86_avx512_mask_pmov_{qd,wb}_512 were removed in + // f608dc1f5775ee880e8ea30e2d06ab5a4a935c22 + handleIntrinsicByApplyingToShadow(I, I.getIntrinsicID(), + /*trailingVerbatimArgs=*/1); + break; + } + + // AVX512 PMVOV{S,US}: Packed MOV, with signed/unsigned saturation + // Approximately handled using the corresponding truncation intrinsic + // TODO: improve handleAVX512VectorDownConvert to precisely model saturation + case Intrinsic::x86_avx512_mask_pmovs_dw_512: + case Intrinsic::x86_avx512_mask_pmovus_dw_512: { + handleIntrinsicByApplyingToShadow(I, + Intrinsic::x86_avx512_mask_pmov_dw_512, + /* trailingVerbatimArgs=*/1); + break; + } + + case Intrinsic::x86_avx512_mask_pmovs_db_512: + case Intrinsic::x86_avx512_mask_pmovus_db_512: { + handleIntrinsicByApplyingToShadow(I, + Intrinsic::x86_avx512_mask_pmov_db_512, + /* trailingVerbatimArgs=*/1); + break; + } + + case Intrinsic::x86_avx512_mask_pmovs_qb_512: + case Intrinsic::x86_avx512_mask_pmovus_qb_512: { + handleIntrinsicByApplyingToShadow(I, + Intrinsic::x86_avx512_mask_pmov_qb_512, + /* trailingVerbatimArgs=*/1); + break; + } + + case Intrinsic::x86_avx512_mask_pmovs_qw_512: + case Intrinsic::x86_avx512_mask_pmovus_qw_512: { + handleIntrinsicByApplyingToShadow(I, + Intrinsic::x86_avx512_mask_pmov_qw_512, + /* trailingVerbatimArgs=*/1); + break; + } + + case Intrinsic::x86_avx512_mask_pmovs_qd_512: + case Intrinsic::x86_avx512_mask_pmovus_qd_512: + case Intrinsic::x86_avx512_mask_pmovs_wb_512: + case Intrinsic::x86_avx512_mask_pmovus_wb_512: { + // Since Intrinsic::x86_avx512_mask_pmov_{qd,wb}_512 do not exist, we + // cannot use handleIntrinsicByApplyingToShadow. Instead, we call the + // slow-path handler. + handleAVX512VectorDownConvert(I); + break; + } + + // AVX512 FP16 Arithmetic case Intrinsic::x86_avx512fp16_mask_add_sh_round: case Intrinsic::x86_avx512fp16_mask_sub_sh_round: case Intrinsic::x86_avx512fp16_mask_mul_sh_round: diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll index d9ac1b43924bf..1b42396ff31d5 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll @@ -2,6 +2,47 @@ ; RUN: opt %s -S -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s ; ; Forked from llvm/test/CodeGen/X86/avx512-intrinsics.ll +; +; Strictly handled: +; - llvm.x86.avx512.add.ps.512 +; - llvm.x86.avx512.cvtsi2ss32, llvm.x86.avx512.cvttsd2si, llvm.x86.avx512.cvttss2si +; - llvm.x86.avx512.div.ps.512 +; - llvm.x86.avx512.mask.add.sd.round, llvm.x86.avx512.mask.add.ss.round +; - llvm.x86.avx512.mask.cmp.pd.512, llvm.x86.avx512.mask.cmp.ps.512, llvm.x86.avx512.mask.cmp.sd, llvm.x86.avx512.mask.cmp.ss +; - llvm.x86.avx512.mask.compress.v16f32, llvm.x86.avx512.mask.compress.v16i32llvm.x86.avx512.mask.compress.v8f64, llvm.x86.avx512.mask.compress.v8i64 +; - llvm.x86.avx512.mask.cvtpd2dq.512, llvm.x86.avx512.mask.cvtpd2ps.512, llvm.x86.avx512.mask.cvtpd2udq.512, llvm.x86.avx512.mask.cvtps2pd.512, llvm.x86.avx512.mask.cvtps2udq.512 +; - llvm.x86.avx512.mask.cvtsd2ss.round, llvm.x86.avx512.mask.cvtss2sd.round +; - llvm.x86.avx512.mask.cvttpd2dq.512, llvm.x86.avx512.mask.cvttpd2udq.512, llvm.x86.avx512.mask.cvttps2dq.512, llvm.x86.avx512.mask.cvttps2udq.512 +; - llvm.x86.avx512.mask.expand.v16f32, llvm.x86.avx512.mask.expand.v16i32, llvm.x86.avx512.mask.expand.v8f64, llvm.x86.avx512.mask.expand.v8i64 +; - llvm.x86.avx512.mask.fixupimm.pd.512, llvm.x86.avx512.mask.fixupimm.ps.512, llvm.x86.avx512.mask.fixupimm.sd, llvm.x86.avx512.mask.fixupimm.ss +; - llvm.x86.avx512.mask.getexp.pd.512, llvm.x86.avx512.mask.getexp.ps.512, llvm.x86.avx512.mask.getexp.sd, llvm.x86.avx512.mask.getexp.ss +; - llvm.x86.avx512.mask.getmant.pd.512, llvm.x86.avx512.mask.getmant.ps.512, llvm.x86.avx512.mask.getmant.sd, llvm.x86.avx512.mask.getmant.ss +; - llvm.x86.avx512.mask.max.sd.round, llvm.x86.avx512.mask.max.ss.round +; - llvm.x86.avx512.mask.pmov.db.mem.512, llvm.x86.avx512.mask.pmov.dw.mem.512, llvm.x86.avx512.mask.pmov.qb.mem.512, llvm.x86.avx512.mask.pmov.qd.mem.512llvm.x86.avx512.mask.pmov.qw.mem.512 +; - llvm.x86.avx512.mask.pmovs.db.mem.512, llvm.x86.avx512.mask.pmovs.dw.mem.512, llvm.x86.avx512.mask.pmovs.qb.mem.512, llvm.x86.avx512.mask.pmovs.qd.mem.512, llvm.x86.avx512.mask.pmovs.qw.mem.512 +; - llvm.x86.avx512.mask.pmovus.db.mem.512, llvm.x86.avx512.mask.pmovus.dw.mem.512, llvm.x86.avx512.mask.pmovus.qb.mem.512, llvm.x86.avx512.mask.pmovus.qd.mem.512, llvm.x86.avx512.mask.pmovus.qw.mem.512 +; - llvm.x86.avx512.mask.rndscale.pd.512, llvm.x86.avx512.mask.rndscale.ps.512, llvm.x86.avx512.mask.rndscale.sd, llvm.x86.avx512.mask.rndscale.ss +; - llvm.x86.avx512.mask.scalef.pd.512, llvm.x86.avx512.mask.scalef.ps.512 +; - llvm.x86.avx512.mask.sqrt.sd, llvm.x86.avx512.mask.sqrt.ss +; - llvm.x86.avx512.mask.vcvtps2ph.512 +; - llvm.x86.avx512.maskz.fixupimm.pd.512, llvm.x86.avx512.maskz.fixupimm.ps.512, llvm.x86.avx512.maskz.fixupimm.sd, llvm.x86.avx512.maskz.fixupimm.ss +; - llvm.x86.avx512.mul.pd.512, llvm.x86.avx512.mul.ps.512 +; - llvm.x86.avx512.permvar.df.512, llvm.x86.avx512.permvar.sf.512 +; - llvm.x86.avx512.pternlog.d.512, llvm.x86.avx512.pternlog.q.512 +; - llvm.x86.avx512.rcp14.pd.512, llvm.x86.avx512.rcp14.ps.512 +; - llvm.x86.avx512.rsqrt14.ps.512 +; - llvm.x86.avx512.sitofp.round.v16f32.v16i32 +; - llvm.x86.avx512.sqrt.pd.512, llvm.x86.avx512.sqrt.ps.512 +; - llvm.x86.avx512.sub.ps.512 +; - llvm.x86.avx512.uitofp.round.v16f32.v16i32 +; - llvm.x86.avx512.vcomi.sd, llvm.x86.avx512.vcomi.ss +; - llvm.x86.avx512.vcvtsd2si32, llvm.x86.avx512.vcvtss2si32 +; - llvm.x86.avx512.vfmadd.f32, llvm.x86.avx512.vfmadd.f64 +; +; Heuristically handled: +; - llvm.fma.f32, llvm.fma.f64 +; - llvm.sqrt.v16f32, llvm.sqrt.v8f64 +; - llvm.x86.avx512.permvar.di.512, llvm.x86.avx512.permvar.si.512 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -6565,43 +6606,24 @@ define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> % ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[TMP1]], <16 x i8> [[TMP2]], i8 -1) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> zeroinitializer, [[TMP4]] ; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] -; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]]) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] -; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 14: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[TMP1]], <16 x i8> [[TMP2]], i8 [[X2:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <16 x i8> +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <16 x i8> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2]]) +; CHECK-NEXT: [[TMP12:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[TMP1]], <16 x i8> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i128 [[TMP9]] to <16 x i8> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP5]], [[TMP12]] ; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], [[_MSPROP4]] ; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP3]], [[_MSPROP]] ; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]] -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[RES4]] ; %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) @@ -6657,43 +6679,24 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[TMP1]], <16 x i8> [[TMP2]], i8 -1) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i8> zeroinitializer, [[TMP4]] ; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] -; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]]) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] -; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 14: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[TMP1]], <16 x i8> [[TMP2]], i8 [[X2:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <16 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i8> [[TMP7]], [[TMP11]] +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[TMP1]], <16 x i8> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i128 [[TMP9]] to <16 x i8> +; CHECK-NEXT: [[TMP19:%.*]] = or <16 x i8> [[TMP10]], [[TMP8]] ; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP5]], [[TMP12]] ; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP19]] ; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]] -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[RES4]] ; %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) @@ -6749,43 +6752,24 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[TMP1]], <16 x i8> [[TMP2]], i8 -1) +; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i8> zeroinitializer, [[TMP4]] ; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] -; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]]) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] -; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 14: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[TMP1]], <16 x i8> [[TMP2]], i8 [[X2:%.*]]) +; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP6]] to <16 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i8> [[TMP7]], [[TMP11]] +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[TMP1]], <16 x i8> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i128 [[TMP9]] to <16 x i8> +; CHECK-NEXT: [[TMP19:%.*]] = or <16 x i8> [[TMP10]], [[TMP8]] ; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP5]], [[TMP12]] ; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP19]] ; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]] -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[RES4]] ; %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1) @@ -6841,43 +6825,24 @@ define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> % ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[TMP1]], <8 x i16> [[TMP2]], i8 -1) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i16> zeroinitializer, [[TMP8]] ; CHECK-NEXT: [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] -; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]]) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] -; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 14: -; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[TMP1]], <8 x i16> [[TMP2]], i8 [[TMP6:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = zext i8 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP10]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <8 x i16> [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[TMP6]]) +; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[TMP1]], <8 x i16> zeroinitializer, i8 [[TMP6]]) +; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i128 [[TMP11]] to <8 x i16> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP4]], [[TMP9]] +; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[TMP6]]) +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <8 x i16> [[_MSPROP2]], [[_MSPROP4]] ; CHECK-NEXT: [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[_MSPROP3]], [[_MSPROP]] ; CHECK-NEXT: [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]] -; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[RES4]] ; %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) @@ -6933,43 +6898,24 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[TMP1]], <8 x i16> [[TMP2]], i8 -1) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i16> zeroinitializer, [[TMP11]] ; CHECK-NEXT: [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] -; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]]) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] -; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 14: -; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[TMP1]], <8 x i16> [[TMP2]], i8 [[TMP6:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP12]] to <8 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[TMP1]], <8 x i16> zeroinitializer, i8 [[TMP6]]) +; CHECK-NEXT: [[TMP13:%.*]] = zext i8 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i128 [[TMP13]] to <8 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP10]], [[TMP8]] +; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[TMP6]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP4]], [[TMP9]] ; CHECK-NEXT: [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[_MSPROP]], [[TMP14]] ; CHECK-NEXT: [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]] -; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[RES4]] ; %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) @@ -7025,43 +6971,24 @@ define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[TMP1]], <8 x i16> [[TMP2]], i8 -1) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i16> zeroinitializer, [[TMP11]] ; CHECK-NEXT: [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] -; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]]) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] -; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 14: -; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]]) +; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[TMP1]], <8 x i16> [[TMP2]], i8 [[TMP6:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = zext i8 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP12]] to <8 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[TMP1]], <8 x i16> zeroinitializer, i8 [[TMP6]]) +; CHECK-NEXT: [[TMP13:%.*]] = zext i8 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i128 [[TMP13]] to <8 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP10]], [[TMP8]] +; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[TMP6]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP4]], [[TMP9]] ; CHECK-NEXT: [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[_MSPROP]], [[TMP14]] ; CHECK-NEXT: [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]] -; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i16> [[RES4]] ; %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1) @@ -7215,18 +7142,10 @@ define <8 x i32>@test_int_x86_avx512_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1) ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> splat (i1 true), <8 x i32> [[TMP3]], <8 x i32> [[TMP2]] ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 -1) -; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) @@ -7235,24 +7154,21 @@ define <8 x i32>@test_int_x86_avx512_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1) define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qd_512( +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> [[TMP2]] +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] ; CHECK-NEXT: unreachable ; CHECK: 7: -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]]) -; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[TMP4]]) +; CHECK-NEXT: store <8 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) @@ -7261,20 +7177,20 @@ define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> define <8 x i32>@test_int_x86_avx512_maskz_pmovs_qd_512(<8 x i64> %x0, i8 %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovs_qd_512( -; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> zeroinitializer +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] ; CHECK-NEXT: unreachable -; CHECK: 5: -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> zeroinitializer, i8 [[X2:%.*]]) -; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> zeroinitializer, i8 [[TMP3]]) +; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) @@ -7325,18 +7241,10 @@ define <8 x i32>@test_int_x86_avx512_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1) ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = select <8 x i1> splat (i1 true), <8 x i32> [[TMP3]], <8 x i32> [[TMP2]] ; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 -1) -; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1) @@ -7345,24 +7253,21 @@ define <8 x i32>@test_int_x86_avx512_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1) define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qd_512( +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP4:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> [[TMP2]] +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP3]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] ; CHECK-NEXT: unreachable ; CHECK: 7: -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]]) -; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[TMP4]]) +; CHECK-NEXT: store <8 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) @@ -7371,20 +7276,20 @@ define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> define <8 x i32>@test_int_x86_avx512_maskz_pmovus_qd_512(<8 x i64> %x0, i8 %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovus_qd_512( -; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP3:%.*]] to <8 x i1> +; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> zeroinitializer +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] +; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] ; CHECK-NEXT: unreachable -; CHECK: 5: -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> zeroinitializer, i8 [[X2:%.*]]) -; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> zeroinitializer, i8 [[TMP3]]) +; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2) @@ -7436,43 +7341,24 @@ define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[TMP1]], <16 x i8> [[TMP2]], i16 -1) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> zeroinitializer, [[TMP8]] ; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] -; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]]) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] -; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 14: -; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]]) +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[TMP1]], <16 x i8> [[TMP2]], i16 [[TMP6:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP10]] to <16 x i8> +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <16 x i8> [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[TMP6]]) +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[TMP1]], <16 x i8> zeroinitializer, i16 [[TMP6]]) +; CHECK-NEXT: [[TMP11:%.*]] = zext i16 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i128 [[TMP11]] to <16 x i8> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP4]], [[TMP9]] +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[TMP6]]) +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i8> [[_MSPROP2]], [[_MSPROP4]] ; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP3]], [[_MSPROP]] ; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]] -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[RES4]] ; %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) @@ -7528,43 +7414,24 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[TMP1]], <16 x i8> [[TMP2]], i16 -1) +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i8> zeroinitializer, [[TMP11]] ; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] -; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]]) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] -; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 14: -; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]]) +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[TMP1]], <16 x i8> [[TMP2]], i16 [[TMP6:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = zext i16 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP12]] to <16 x i8> +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[TMP1]], <16 x i8> zeroinitializer, i16 [[TMP6]]) +; CHECK-NEXT: [[TMP13:%.*]] = zext i16 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i128 [[TMP13]] to <16 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i8> [[TMP10]], [[TMP8]] +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[TMP6]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP4]], [[TMP9]] ; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP14]] ; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]] -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[RES4]] ; %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) @@ -7620,43 +7487,24 @@ define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[TMP1]], <16 x i8> [[TMP2]], i16 -1) +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i8> zeroinitializer, [[TMP11]] ; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] -; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]]) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] -; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 14: -; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]]) +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[TMP1]], <16 x i8> [[TMP2]], i16 [[TMP6:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = zext i16 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i128 [[TMP12]] to <16 x i8> +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[TMP1]], <16 x i8> zeroinitializer, i16 [[TMP6]]) +; CHECK-NEXT: [[TMP13:%.*]] = zext i16 [[TMP3]] to i128 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i128 [[TMP13]] to <16 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i8> [[TMP10]], [[TMP8]] +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[TMP6]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP4]], [[TMP9]] ; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i8> [[_MSPROP]], [[TMP14]] ; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]] -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i8> [[RES4]] ; %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1) @@ -7712,43 +7560,24 @@ define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[TMP1]], <16 x i16> [[TMP2]], i16 -1) +; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i16> zeroinitializer, [[TMP8]] ; CHECK-NEXT: [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] -; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]]) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] -; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 14: -; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]]) +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[TMP1]], <16 x i16> [[TMP2]], i16 [[TMP6:%.*]]) +; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP3]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP10]] to <16 x i16> +; CHECK-NEXT: [[_MSPROP4:%.*]] = or <16 x i16> [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[TMP6]]) +; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[TMP1]], <16 x i16> zeroinitializer, i16 [[TMP6]]) +; CHECK-NEXT: [[TMP11:%.*]] = zext i16 [[TMP3]] to i256 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i256 [[TMP11]] to <16 x i16> +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP4]], [[TMP9]] +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[TMP6]]) +; CHECK-NEXT: [[_MSPROP3:%.*]] = or <16 x i16> [[_MSPROP2]], [[_MSPROP4]] ; CHECK-NEXT: [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i16> [[_MSPROP3]], [[_MSPROP]] ; CHECK-NEXT: [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]] -; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES4]] ; %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) @@ -7804,43 +7633,24 @@ define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i1 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[TMP1]], <16 x i16> [[TMP2]], i16 -1) +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i16> zeroinitializer, [[TMP11]] ; CHECK-NEXT: [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] -; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]]) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] -; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 14: -; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]]) +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[TMP1]], <16 x i16> [[TMP2]], i16 [[TMP6:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = zext i16 [[TMP3]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP12]] to <16 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[TMP1]], <16 x i16> zeroinitializer, i16 [[TMP6]]) +; CHECK-NEXT: [[TMP13:%.*]] = zext i16 [[TMP3]] to i256 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i256 [[TMP13]] to <16 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i16> [[TMP10]], [[TMP8]] +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[TMP6]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP4]], [[TMP9]] ; CHECK-NEXT: [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i16> [[_MSPROP]], [[TMP14]] ; CHECK-NEXT: [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]] -; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES4]] ; %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) @@ -7896,43 +7706,24 @@ define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[TMP1]], <16 x i16> [[TMP2]], i16 -1) +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i16> zeroinitializer, [[TMP11]] ; CHECK-NEXT: [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1) -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]] -; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 11: -; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]]) -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0 -; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]] -; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]] -; CHECK: 13: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]] -; CHECK-NEXT: unreachable -; CHECK: 14: -; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]]) +; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[TMP1]], <16 x i16> [[TMP2]], i16 [[TMP6:%.*]]) +; CHECK-NEXT: [[TMP12:%.*]] = zext i16 [[TMP3]] to i256 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i256 [[TMP12]] to <16 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP7]], [[TMP5]] +; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[TMP6]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[TMP1]], <16 x i16> zeroinitializer, i16 [[TMP6]]) +; CHECK-NEXT: [[TMP13:%.*]] = zext i16 [[TMP3]] to i256 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i256 [[TMP13]] to <16 x i16> +; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i16> [[TMP10]], [[TMP8]] +; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[TMP6]]) +; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP4]], [[TMP9]] ; CHECK-NEXT: [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]] +; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i16> [[_MSPROP]], [[TMP14]] ; CHECK-NEXT: [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]] -; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <16 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES4]] ; %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1) diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll index 3f09e54a0224d..636409fe20f83 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll @@ -6,8 +6,6 @@ ; Strictly handled: ; - llvm.x86.avx512.dbpsadbw.512 ; - llvm.x86.avx512.ktestc.d, llvm.x86.avx512.ktestc.q, llvm.x86.avx512.ktestz.d, llvm.x86.avx512.ktestz.q -; - llvm.x86.avx512.mask.pmovs.wb.512, llvm.x86.avx512.mask.pmovs.wb.mem.512 -; - llvm.x86.avx512.mask.pmovus.wb.512, llvm.x86.avx512.mask.pmovus.wb.mem.512 ; - llvm.x86.avx512.mask.pmov.wb.mem.512 ; - llvm.x86.avx512.packssdw.512, llvm.x86.avx512.packsswb.512 ; - llvm.x86.avx512.packusdw.512, llvm.x86.avx512.packuswb.512 @@ -2478,18 +2476,10 @@ define <32 x i8>@test_int_x86_avx512_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1) ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = trunc <32 x i16> [[TMP1]] to <32 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = select <32 x i1> splat (i1 true), <32 x i8> [[TMP3]], <32 x i8> [[TMP2]] ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> [[X1:%.*]], i32 -1) -; CHECK-NEXT: store <32 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[RES]] ; %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) @@ -2498,24 +2488,21 @@ define <32 x i8>@test_int_x86_avx512_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1) define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_wb_512( +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = trunc <32 x i16> [[TMP1]] to <32 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = select <32 x i1> [[TMP3]], <32 x i8> [[TMP4]], <32 x i8> [[TMP2]] +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable ; CHECK: 7: -; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> [[X1:%.*]], i32 [[X2:%.*]]) -; CHECK-NEXT: store <32 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> [[X1:%.*]], i32 [[X2]]) +; CHECK-NEXT: store <32 x i8> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[RES]] ; %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) @@ -2524,20 +2511,20 @@ define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> define <32 x i8>@test_int_x86_avx512_maskz_pmovs_wb_512(<32 x i16> %x0, i32 %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovs_wb_512( +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = trunc <32 x i16> [[TMP1]] to <32 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP3]], <32 x i8> zeroinitializer +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: 5: -; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> zeroinitializer, i32 [[X2:%.*]]) -; CHECK-NEXT: store <32 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> zeroinitializer, i32 [[X2]]) +; CHECK-NEXT: store <32 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[RES]] ; %res = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2) @@ -2588,18 +2575,10 @@ define <32 x i8>@test_int_x86_avx512_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = trunc <32 x i16> [[TMP1]] to <32 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = select <32 x i1> splat (i1 true), <32 x i8> [[TMP3]], <32 x i8> [[TMP2]] ; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> [[X1:%.*]], i32 -1) -; CHECK-NEXT: store <32 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[RES]] ; %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1) @@ -2608,24 +2587,21 @@ define <32 x i8>@test_int_x86_avx512_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_wb_512( +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i8> [[TMP2]] to i256 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP3]], 0 -; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP4:%.*]] = trunc <32 x i16> [[TMP1]] to <32 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = select <32 x i1> [[TMP3]], <32 x i8> [[TMP4]], <32 x i8> [[TMP2]] +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP8]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] ; CHECK: 6: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable ; CHECK: 7: -; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> [[X1:%.*]], i32 [[X2:%.*]]) -; CHECK-NEXT: store <32 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> [[X1:%.*]], i32 [[X2]]) +; CHECK-NEXT: store <32 x i8> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[RES]] ; %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) @@ -2634,20 +2610,20 @@ define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8 define <32 x i8>@test_int_x86_avx512_maskz_pmovus_wb_512(<32 x i16> %x0, i32 %x2) #0 { ; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovus_wb_512( +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]] -; CHECK: 4: +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[X2:%.*]] to <32 x i1> +; CHECK-NEXT: [[TMP3:%.*]] = trunc <32 x i16> [[TMP1]] to <32 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP3]], <32 x i8> zeroinitializer +; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] +; CHECK: 5: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] ; CHECK-NEXT: unreachable -; CHECK: 5: -; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> zeroinitializer, i32 [[X2:%.*]]) -; CHECK-NEXT: store <32 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK: 6: +; CHECK-NEXT: [[RES:%.*]] = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> [[X0:%.*]], <32 x i8> zeroinitializer, i32 [[X2]]) +; CHECK-NEXT: store <32 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i8> [[RES]] ; %res = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)