From 036e413052e19a254b58d878fae5298e3c776601 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Wed, 10 Sep 2025 18:25:42 +0000 Subject: [PATCH] [msan] Handle AVX512 pack with saturation intrinsics Approximately handle packssdw/packsswb/packusdw_512/packuswb with handleVectorPackIntrinsic(), instead of relying on the default (strict) handler. --- .../Instrumentation/MemorySanitizer.cpp | 21 + .../X86/avx512bw-intrinsics-upgrade.ll | 584 +++++++----------- .../X86/avx512bw-intrinsics.ll | 584 +++++++----------- 3 files changed, 453 insertions(+), 736 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 9899a2aae2b15..3ea790ad1839a 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3684,6 +3684,15 @@ struct MemorySanitizerVisitor : public InstVisitor { case Intrinsic::x86_mmx_packssdw: return Intrinsic::x86_mmx_packssdw; + + case Intrinsic::x86_avx512_packssdw_512: + case Intrinsic::x86_avx512_packusdw_512: + return Intrinsic::x86_avx512_packssdw_512; + + case Intrinsic::x86_avx512_packsswb_512: + case Intrinsic::x86_avx512_packuswb_512: + return Intrinsic::x86_avx512_packsswb_512; + default: llvm_unreachable("unexpected intrinsic id"); } @@ -3696,6 +3705,8 @@ struct MemorySanitizerVisitor : public InstVisitor { // Shadow is propagated with the signed variant of the same intrinsic applied // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer). // MMXEltSizeInBits is used only for x86mmx arguments. + // + // TODO: consider using GetMinMaxUnsigned() to handle saturation precisely void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned MMXEltSizeInBits = 0) { assert(I.arg_size() == 2); @@ -5554,6 +5565,7 @@ struct MemorySanitizerVisitor : public InstVisitor { handleVectorShiftIntrinsic(I, /* Variable */ true); break; + // Pack with Signed/Unsigned Saturation case Intrinsic::x86_sse2_packsswb_128: case Intrinsic::x86_sse2_packssdw_128: case Intrinsic::x86_sse2_packuswb_128: @@ -5562,6 +5574,15 @@ struct MemorySanitizerVisitor : public InstVisitor { case Intrinsic::x86_avx2_packssdw: case Intrinsic::x86_avx2_packuswb: case Intrinsic::x86_avx2_packusdw: + // e.g., <64 x i8> @llvm.x86.avx512.packsswb.512 + // (<32 x i16> %a, <32 x i16> %b) + // <32 x i16> @llvm.x86.avx512.packssdw.512 + // (<16 x i32> %a, <16 x i32> %b) + // Note: AVX512 masked variants are auto-upgraded by LLVM. + case Intrinsic::x86_avx512_packsswb_512: + case Intrinsic::x86_avx512_packssdw_512: + case Intrinsic::x86_avx512_packuswb_512: + case Intrinsic::x86_avx512_packusdw_512: handleVectorPackIntrinsic(I); break; diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll index 51dad35a1edbc..7bd35182d5c90 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll @@ -5,8 +5,6 @@ ; ; Strictly handled: ; - llvm.x86.avx512.dbpsadbw.512 -; - llvm.x86.avx512.packssdw.512, llvm.x86.avx512.packsswb.512 -; - llvm.x86.avx512.packusdw.512, llvm.x86.avx512.packuswb.512 ; ; Heuristically handled: ; - llvm.sadd.sat.v32i16, llvm.sadd.sat.v64i8 @@ -2039,19 +2037,14 @@ define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) no ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP8]] ; %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) ret <32 x i16> %res @@ -2064,25 +2057,20 @@ define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, < ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] -; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP17]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP17]], <32 x i16> [[PASSTHRU]] ; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP16]] ; @@ -2096,25 +2084,20 @@ define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]] -; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP16]], <32 x i16> zeroinitializer ; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP15]] ; @@ -2138,18 +2121,13 @@ define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) nounw ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %b = load <16 x i32>, ptr %ptr_b @@ -2175,22 +2153,17 @@ define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -2219,22 +2192,17 @@ define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -2266,18 +2234,13 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) noun ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %q = load i32, ptr %ptr_b @@ -2309,22 +2272,17 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -2359,22 +2317,17 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i3 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -2395,19 +2348,14 @@ define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nou ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <64 x i8> [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP3]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]]) +; CHECK-NEXT: store <64 x i8> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP8]] ; %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) ret <64 x i8> %res @@ -2420,25 +2368,20 @@ define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <6 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP17:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] -; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP17]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP17]], <64 x i8> [[PASSTHRU]] ; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP16]] ; @@ -2452,25 +2395,20 @@ define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP4]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP16:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]] -; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP16]], <64 x i8> zeroinitializer ; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP15]] ; @@ -2494,18 +2432,13 @@ define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwi ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP13]], <32 x i16> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <64 x i8> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP12]] ; %b = load <32 x i16>, ptr %ptr_b @@ -2531,22 +2464,17 @@ define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP22]], <32 x i16> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP11]], <64 x i8> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]] @@ -2575,22 +2503,17 @@ define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP9]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP21]], <32 x i16> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP10]], <64 x i8> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer @@ -2610,18 +2533,13 @@ define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) n ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP8]], <16 x i32> [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP7]] ; %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1) @@ -2635,22 +2553,17 @@ define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP17]], <16 x i32> [[TMP8]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP6]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]] @@ -2667,22 +2580,17 @@ define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP16]], <16 x i32> [[TMP7]]) ; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP5]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]] ; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer @@ -2709,18 +2617,13 @@ define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) noun ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %b = load <16 x i32>, ptr %ptr_b @@ -2746,22 +2649,17 @@ define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -2790,22 +2688,17 @@ define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i3 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -2837,18 +2730,13 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) nou ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %q = load i32, ptr %ptr_b @@ -2880,22 +2768,17 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <3 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -2930,22 +2813,17 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -2966,18 +2844,13 @@ define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) no ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP3]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP8]], <32 x i16> [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <64 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP7]] ; %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1) @@ -2991,22 +2864,17 @@ define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, < ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP17]], <32 x i16> [[TMP8]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP6]], <64 x i8> [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]] @@ -3023,22 +2891,17 @@ define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = sext <32 x i1> [[TMP4]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP16]], <32 x i16> [[TMP7]]) ; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP5]], <64 x i8> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]] ; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer @@ -3065,18 +2928,13 @@ define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounw ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP13]], <32 x i16> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <64 x i8> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP12]] ; %b = load <32 x i16>, ptr %ptr_b @@ -3102,22 +2960,17 @@ define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP22]], <32 x i16> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP11]], <64 x i8> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]] @@ -3146,22 +2999,17 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP9]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP21]], <32 x i16> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP10]], <64 x i8> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll index c6c7e002213bd..8bf6d5acc21ba 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll @@ -7,8 +7,6 @@ ; - llvm.x86.avx512.dbpsadbw.512 ; - llvm.x86.avx512.ktestc.d, llvm.x86.avx512.ktestc.q, llvm.x86.avx512.ktestz.d, llvm.x86.avx512.ktestz.q ; - llvm.x86.avx512.mask.pmov.wb.mem.512 -; - llvm.x86.avx512.packssdw.512, llvm.x86.avx512.packsswb.512 -; - llvm.x86.avx512.packusdw.512, llvm.x86.avx512.packuswb.512 ; - llvm.x86.avx512.psad.bw.512 ; ; Heuristically handled: @@ -295,19 +293,14 @@ define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) #0 ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <32 x i16> [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]]) +; CHECK-NEXT: store <32 x i16> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <32 x i16> [[TMP8]] ; %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b) ret <32 x i16> %1 @@ -320,25 +313,20 @@ define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, < ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] -; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP17]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]] +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP17]], <32 x i16> [[PASSTHRU]] ; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP16]] ; @@ -354,25 +342,20 @@ define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]]) +; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]] -; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP16]], <32 x i16> zeroinitializer ; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP15]] ; @@ -398,18 +381,13 @@ define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %b = load <16 x i32>, ptr %ptr_b @@ -435,22 +413,17 @@ define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -481,22 +454,17 @@ define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -530,18 +498,13 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) #0 { ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %q = load i32, ptr %ptr_b @@ -573,22 +536,17 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -625,22 +583,17 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i3 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -663,19 +616,14 @@ define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) #0 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: -; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 -; CHECK-NEXT: ret <64 x i8> [[TMP7]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP3]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]]) +; CHECK-NEXT: store <64 x i8> [[TMP7]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: ret <64 x i8> [[TMP8]] ; %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b) ret <64 x i8> %1 @@ -688,25 +636,20 @@ define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <6 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 8: -; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP17:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] -; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP17]], [[PASSTHRU:%.*]] +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[TMP9]] ; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]] +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP17]], <64 x i8> [[PASSTHRU]] ; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP16]] ; @@ -722,25 +665,20 @@ define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 7: -; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP4]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]]) +; CHECK-NEXT: [[TMP16:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]] -; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP16]], <64 x i8> zeroinitializer ; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP15]] ; @@ -766,18 +704,13 @@ define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP13]], <32 x i16> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <64 x i8> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP12]] ; %b = load <32 x i16>, ptr %ptr_b @@ -803,22 +736,17 @@ define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP22]], <32 x i16> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP11]], <64 x i8> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]] @@ -849,22 +777,17 @@ define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP9]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP21]], <32 x i16> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP10]], <64 x i8> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer @@ -886,18 +809,13 @@ define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) # ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP8]], <16 x i32> [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP7]] ; %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b) @@ -911,22 +829,17 @@ define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP17]], <16 x i32> [[TMP8]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP6]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]] @@ -945,22 +858,17 @@ define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP16]], <16 x i32> [[TMP7]]) ; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP5]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]] ; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer @@ -989,18 +897,13 @@ define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %b = load <16 x i32>, ptr %ptr_b @@ -1026,22 +929,17 @@ define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -1072,22 +970,17 @@ define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i3 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -1121,18 +1014,13 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) #0 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) -; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <32 x i16> [[TMP12]] ; %q = load i32, ptr %ptr_b @@ -1164,22 +1052,17 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <3 ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]] @@ -1216,22 +1099,17 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i ; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0 ; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer ; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512 -; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer @@ -1254,18 +1132,13 @@ define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) #0 ; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]] -; CHECK: 5: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 6: +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP3]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP8]], <32 x i16> [[TMP6]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <64 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP7]] ; %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b) @@ -1279,22 +1152,17 @@ define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, < ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8 ; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] -; CHECK: 7: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 8: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP7]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP17]], <32 x i16> [[TMP8]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP6]], <64 x i8> [[TMP4]] ; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[TMP6]] ; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]] @@ -1313,22 +1181,17 @@ define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8 ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512 -; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]] -; CHECK: 6: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 7: +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = sext <32 x i1> [[TMP4]] to <32 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP16]], <32 x i16> [[TMP7]]) ; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]]) ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP5]], <64 x i8> zeroinitializer ; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], [[TMP5]] ; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]] ; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer @@ -1357,18 +1220,13 @@ define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) #0 { ; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]] -; CHECK: 10: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 11: +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP13]], <32 x i16> [[TMP11]]) ; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) -; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <64 x i8> [[TMP9]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <64 x i8> [[TMP12]] ; %b = load <32 x i16>, ptr %ptr_b @@ -1394,22 +1252,17 @@ define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 ; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080 ; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]] -; CHECK: 12: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 13: +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16> +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP22]], <32 x i16> [[TMP13]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]] +; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP11]], <64 x i8> [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]] -; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], [[TMP11]] ; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]] ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]] ; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]] @@ -1440,22 +1293,17 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64 ; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080 ; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512 -; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512 -; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0 -; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]] -; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] -; CHECK: 11: -; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]] -; CHECK-NEXT: unreachable -; CHECK: 12: +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP9]] to <32 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP21]], <32 x i16> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]]) ; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1> ; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1> -; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP10]], <64 x i8> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], [[TMP10]] ; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer ; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]] ; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer